Update 1-data-exploration.R
This commit is contained in:
parent
eb770c9d77
commit
1cefc2c306
1 changed files with 46 additions and 26 deletions
|
@ -77,7 +77,8 @@ ds_low_tsh <- ds_low_tsh_raw %>%
|
||||||
|
|
||||||
#summary Table
|
#summary Table
|
||||||
#use this instead of making myself
|
#use this instead of making myself
|
||||||
high_table_summary <- ds_high_tsh %>%
|
summary_table <- function(ds){
|
||||||
|
table <- ds %>%
|
||||||
gtsummary$tbl_summary(
|
gtsummary$tbl_summary(
|
||||||
by = ft4_dia
|
by = ft4_dia
|
||||||
,missing = "no"
|
,missing = "no"
|
||||||
|
@ -87,9 +88,9 @@ high_table_summary <- ds_high_tsh %>%
|
||||||
,anchor_age ~ "Age"
|
,anchor_age ~ "Age"
|
||||||
)
|
)
|
||||||
,statistic = gtsummary$all_continuous() ~ c(
|
,statistic = gtsummary$all_continuous() ~ c(
|
||||||
"{N_miss}",
|
"{N_miss}"
|
||||||
"{median} ({p25}, {p75})",
|
,"{median} ({p25}, {p75})"
|
||||||
"{min}, {max}"
|
,"{min}, {max}"
|
||||||
)
|
)
|
||||||
) %>%
|
) %>%
|
||||||
gtsummary$bold_labels() %>%
|
gtsummary$bold_labels() %>%
|
||||||
|
@ -99,12 +100,32 @@ high_table_summary <- ds_high_tsh %>%
|
||||||
gtsummary$modify_header(label = "**Variable**") %>%
|
gtsummary$modify_header(label = "**Variable**") %>%
|
||||||
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Diagnostic**")
|
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Diagnostic**")
|
||||||
|
|
||||||
high_table_summary
|
return(table)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# create both tables
|
||||||
|
high_table_summary <- summary_table(ds_high_tsh)
|
||||||
|
low_table_summary <- summary_table(ds_low_tsh)
|
||||||
|
|
||||||
|
# merge tables
|
||||||
|
merged_summary_table <- gtsummary$tbl_merge(
|
||||||
|
tbls = list(high_table_summary, low_table_summary)
|
||||||
|
,tab_spanner = c(
|
||||||
|
"**Elevated TSH** \n Free T4 Diagnostic"
|
||||||
|
,"**Decreased TSH** \n Free T4 Diagnostic"
|
||||||
|
)
|
||||||
|
) %>%
|
||||||
|
gtsummary$as_flex_table()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# correlation plot
|
# correlation plot
|
||||||
ds_corr <- cor(ds_high_tsh %>% dplyr$select(-subject_id, - charttime)
|
ds_corr <- cor(ds_high_tsh %>%
|
||||||
%>% dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2)))
|
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2)))
|
||||||
,use = "complete.obs")
|
,use = "complete.obs")
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,7 +137,6 @@ dev.off()
|
||||||
|
|
||||||
#quick recode of gender, will still do recoding during feature engineering
|
#quick recode of gender, will still do recoding during feature engineering
|
||||||
g1 <- ds_high_tsh %>%
|
g1 <- ds_high_tsh %>%
|
||||||
dplyr$select(-subject_id, - charttime) %>%
|
|
||||||
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2))) %>%
|
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2))) %>%
|
||||||
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
||||||
ggplot(aes(x = value)) +
|
ggplot(aes(x = value)) +
|
||||||
|
@ -127,7 +147,7 @@ g1
|
||||||
|
|
||||||
# this takes a bit to load. No discernable paterns in the data
|
# this takes a bit to load. No discernable paterns in the data
|
||||||
g2 <- ds_high_tsh %>%
|
g2 <- ds_high_tsh %>%
|
||||||
dplyr$select(-gender, -subject_id, - charttime) %>%
|
dplyr$select(-gender) %>%
|
||||||
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
||||||
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
||||||
gp2$geom_boxplot(outlier.shape = NA, na.rm = TRUE) +
|
gp2$geom_boxplot(outlier.shape = NA, na.rm = TRUE) +
|
||||||
|
|
Loading…
Reference in a new issue