Update 1-data-exploration.R
This commit is contained in:
parent
5bc94ee52f
commit
e939a6eaa7
1 changed files with 25 additions and 41 deletions
|
@ -12,6 +12,7 @@ box::use(
|
||||||
,tidyr
|
,tidyr
|
||||||
,gp2 = ggplot2[ggplot, aes]
|
,gp2 = ggplot2[ggplot, aes]
|
||||||
,gtsummary
|
,gtsummary
|
||||||
|
,GGally
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,30 +60,6 @@ ds_recode <- ds1 %>%
|
||||||
|
|
||||||
#summary Table
|
#summary Table
|
||||||
|
|
||||||
summary_tbl <- ds1 %>%
|
|
||||||
dplyr$select(-subject_id, -charttime) %>%
|
|
||||||
gtsummary$tbl_summary(
|
|
||||||
by = ft4_dia
|
|
||||||
,missing = "no"
|
|
||||||
,type = gtsummary$all_continuous() ~ "continuous2"
|
|
||||||
,label = list(
|
|
||||||
gender ~ "Gender"
|
|
||||||
,anchor_age ~ "Age"
|
|
||||||
)
|
|
||||||
,statistic = gtsummary$all_continuous() ~ c(
|
|
||||||
"{p_miss}"
|
|
||||||
,"{median} ({p25}, {p75})"
|
|
||||||
,"{min}, {max}"
|
|
||||||
)
|
|
||||||
) %>%
|
|
||||||
# gtsummary$bold_labels() %>%
|
|
||||||
gtsummary$add_stat_label(
|
|
||||||
label = gtsummary$all_continuous() ~ c("% Missing", "Median (IQR)", "Range")
|
|
||||||
) %>%
|
|
||||||
gtsummary$modify_header(label = "**Variable**") %>%
|
|
||||||
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Outcome**")
|
|
||||||
|
|
||||||
|
|
||||||
summary_tbl <- ds1 %>%
|
summary_tbl <- ds1 %>%
|
||||||
dplyr$select(-subject_id, -charttime) %>%
|
dplyr$select(-subject_id, -charttime) %>%
|
||||||
gtsummary$tbl_summary(
|
gtsummary$tbl_summary(
|
||||||
|
@ -93,31 +70,38 @@ summary_tbl <- ds1 %>%
|
||||||
gender ~ "Gender"
|
gender ~ "Gender"
|
||||||
,anchor_age ~ "Age"
|
,anchor_age ~ "Age"
|
||||||
)
|
)
|
||||||
,statistic = gtsummary$all_continuous() ~ c("{p_miss}{median}" )
|
,statistic = gtsummary$all_continuous() ~ c("{median} ({p25}, {p75})")
|
||||||
) %>%
|
) %>%
|
||||||
# gtsummary$bold_labels() %>%
|
# gtsummary$bold_labels() %>%
|
||||||
|
gtsummary$add_n(statistic = "{p_miss}", col_label = "**% Missing**") %>%
|
||||||
gtsummary$modify_header(label = "**Variable**") %>%
|
gtsummary$modify_header(label = "**Variable**") %>%
|
||||||
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Outcome**") %>%
|
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Outcome**")
|
||||||
|
|
||||||
|
|
||||||
# summary_tbl
|
# summary_tbl
|
||||||
|
|
||||||
#code for saving corr plot
|
|
||||||
devEMF::emf(here("figures","corrplot.emf"))
|
# corr-plot ---------------------------------------------------------------
|
||||||
corr_data <- cor(
|
|
||||||
ds1 %>% dplyr$select(-gender,-ft4_dia, -subject_id, -charttime)
|
corr_plot <- ds1 %>%
|
||||||
,use = "complete.obs"
|
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
||||||
|
dplyr$rename(Age = anchor_age) %>%
|
||||||
|
GGally$ggcorr(nbreaks = 5, palette = "Greys"
|
||||||
|
,label = TRUE, label_size = 3, label_color = "white"
|
||||||
|
,label_round = 2
|
||||||
|
,hjust = 0.75
|
||||||
|
,layout.exp = 1)
|
||||||
|
|
||||||
|
# corr_plot
|
||||||
|
|
||||||
|
gp2$ggsave(
|
||||||
|
here("figures","corr_plot.emf")
|
||||||
|
,width = 7
|
||||||
|
,height = 7
|
||||||
|
,dpi = 300
|
||||||
|
,device = devEMF::emf
|
||||||
)
|
)
|
||||||
corrplot::corrplot(corr = corr_data,
|
|
||||||
method = "color"
|
|
||||||
,type = "lower"
|
|
||||||
,tl.col = "black"
|
|
||||||
,tl.srt = 45
|
|
||||||
,number.font =
|
|
||||||
,col = corrplot::COL1("Greys")
|
|
||||||
,addCoef.col = 'white'
|
|
||||||
)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
#quick recode of gender, will still do recoding during feature engineering
|
#quick recode of gender, will still do recoding during feature engineering
|
||||||
g1 <- ds1 %>%
|
g1 <- ds1 %>%
|
||||||
|
|
Loading…
Reference in a new issue