2023-01-09 09:07:17 -05:00
|
|
|
rm(list = ls(all.names = TRUE)) # Clear the memory of variables from previous run.
|
|
|
|
cat("\014") # Clear the console
|
|
|
|
|
|
|
|
|
|
|
|
# load packages -----------------------------------------------------------
|
|
|
|
|
|
|
|
box::use(
|
|
|
|
magrittr[`%>%`]
|
|
|
|
,here[here]
|
|
|
|
,dplyr
|
|
|
|
,readr
|
|
|
|
,tidyr
|
2023-01-11 12:48:02 -05:00
|
|
|
,gp2 = ggplot2[ggplot, aes]
|
2023-01-19 07:49:38 -05:00
|
|
|
,gtsummary
|
2023-01-26 07:39:20 -05:00
|
|
|
,GGally
|
2023-01-09 09:07:17 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-01-09 09:37:37 -05:00
|
|
|
# globals -----------------------------------------------------------------
|
|
|
|
|
2023-01-09 09:07:17 -05:00
|
|
|
|
|
|
|
# load data ---------------------------------------------------------------
|
|
|
|
|
2023-01-21 07:44:35 -05:00
|
|
|
ds0 <- readr$read_rds(here("ML","data-unshared","ds_final.RDS"))
|
2023-01-09 09:07:17 -05:00
|
|
|
|
|
|
|
# data manipulation -------------------------------------------------------
|
|
|
|
|
|
|
|
#here I am adding a column to determine if the Free T4 Value is diagnostic or not
|
|
|
|
# using the FT4 Referance range low as the cut off (0.93)
|
|
|
|
|
|
|
|
|
2023-01-21 07:44:35 -05:00
|
|
|
ds1 <- ds0 %>%
|
|
|
|
dplyr$mutate(dplyr$across(
|
|
|
|
ft4_dia
|
2023-01-24 08:00:21 -05:00
|
|
|
, ~factor(., levels = c("Hypo", "Non-Hypo","Hyper", "Non-Hyper")
|
2023-01-21 07:44:35 -05:00
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
2023-01-09 09:07:17 -05:00
|
|
|
|
2023-01-22 08:11:50 -05:00
|
|
|
ds_recode <- ds1 %>%
|
|
|
|
dplyr$mutate(
|
|
|
|
dplyr$across(
|
|
|
|
gender
|
|
|
|
,~dplyr$recode(.,"M" = 1, "F" = 2)
|
|
|
|
)
|
|
|
|
,dplyr$across(
|
|
|
|
ft4_dia
|
|
|
|
,~dplyr$recode(.
|
|
|
|
,"Hypo" = 1
|
|
|
|
,"Non-Hypo" = 2
|
2023-01-24 08:00:21 -05:00
|
|
|
,"Hyper" = 3
|
|
|
|
,"Non-Hyper" = 4
|
2023-01-22 08:11:50 -05:00
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2023-01-18 19:45:10 -05:00
|
|
|
|
2023-01-09 09:07:17 -05:00
|
|
|
# basic visualization -----------------------------------------------------
|
|
|
|
|
2023-01-19 07:49:38 -05:00
|
|
|
#summary Table
|
2023-01-21 07:44:35 -05:00
|
|
|
|
2023-01-25 16:39:47 -05:00
|
|
|
summary_tbl <- ds1 %>%
|
|
|
|
dplyr$select(-subject_id, -charttime) %>%
|
|
|
|
gtsummary$tbl_summary(
|
|
|
|
by = ft4_dia
|
|
|
|
,missing = "no"
|
|
|
|
,type = gtsummary$all_continuous() ~ "continuous"
|
|
|
|
,label = list(
|
|
|
|
gender ~ "Gender"
|
|
|
|
,anchor_age ~ "Age"
|
|
|
|
)
|
2023-01-26 07:39:20 -05:00
|
|
|
,statistic = gtsummary$all_continuous() ~ c("{median} ({p25}, {p75})")
|
2023-01-25 16:39:47 -05:00
|
|
|
) %>%
|
|
|
|
# gtsummary$bold_labels() %>%
|
2023-01-26 07:39:20 -05:00
|
|
|
gtsummary$add_n(statistic = "{p_miss}", col_label = "**% Missing**") %>%
|
2023-01-25 16:39:47 -05:00
|
|
|
gtsummary$modify_header(label = "**Variable**") %>%
|
2023-01-26 07:39:20 -05:00
|
|
|
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Outcome**")
|
2023-01-25 16:39:47 -05:00
|
|
|
|
|
|
|
|
2023-01-21 07:44:35 -05:00
|
|
|
# summary_tbl
|
|
|
|
|
2023-01-26 07:39:20 -05:00
|
|
|
|
|
|
|
# corr-plot ---------------------------------------------------------------
|
|
|
|
|
|
|
|
corr_plot <- ds1 %>%
|
|
|
|
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
|
|
|
dplyr$rename(Age = anchor_age) %>%
|
|
|
|
GGally$ggcorr(nbreaks = 5, palette = "Greys"
|
|
|
|
,label = TRUE, label_size = 3, label_color = "white"
|
|
|
|
,label_round = 2
|
|
|
|
,hjust = 0.75
|
|
|
|
,layout.exp = 1)
|
|
|
|
|
|
|
|
# corr_plot
|
|
|
|
|
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","corr_plot.emf")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
,device = devEMF::emf
|
2023-01-25 16:39:47 -05:00
|
|
|
)
|
2023-01-26 08:15:47 -05:00
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","corr_plot.png")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
)
|
2023-01-26 07:39:20 -05:00
|
|
|
|
2023-01-13 07:47:50 -05:00
|
|
|
|
2023-01-11 12:48:02 -05:00
|
|
|
#quick recode of gender, will still do recoding during feature engineering
|
2023-01-22 08:11:50 -05:00
|
|
|
g1 <- ds1 %>%
|
2023-01-24 13:58:35 -05:00
|
|
|
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
2023-01-11 12:48:02 -05:00
|
|
|
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
|
|
|
ggplot(aes(x = value)) +
|
2023-01-15 08:14:52 -05:00
|
|
|
gp2$geom_histogram(na.rm = TRUE) +
|
2023-01-22 08:11:50 -05:00
|
|
|
gp2$facet_wrap(~name, scales = "free") +
|
|
|
|
gp2$theme_bw() +
|
|
|
|
gp2$labs(
|
|
|
|
x = NULL
|
|
|
|
,y = NULL
|
|
|
|
)
|
2023-01-22 11:35:06 -05:00
|
|
|
|
|
|
|
# g1
|
2023-01-11 12:58:06 -05:00
|
|
|
|
2023-01-24 08:00:21 -05:00
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","distrubution_histo.emf")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
,device = devEMF::emf
|
|
|
|
)
|
2023-01-26 08:15:47 -05:00
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","distrubution_histo.png")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
)
|
2023-01-11 12:58:06 -05:00
|
|
|
|
2023-01-24 08:00:21 -05:00
|
|
|
# this takes a bit to load. No discernible patterns in the data
|
2023-01-22 08:11:50 -05:00
|
|
|
g2 <- ds_recode %>%
|
2023-01-24 13:58:35 -05:00
|
|
|
dplyr$select(-gender, -subject_id, -charttime) %>%
|
2023-01-22 15:26:39 -05:00
|
|
|
dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
|
2023-01-11 12:58:06 -05:00
|
|
|
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
|
|
|
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
2023-01-22 15:26:39 -05:00
|
|
|
gp2$stat_boxplot(geom = "errorbar", na.rm = TRUE) +
|
|
|
|
gp2$geom_boxplot(na.rm = TRUE, outlier.shape = NA) +
|
2023-01-22 08:11:50 -05:00
|
|
|
gp2$facet_wrap(~name, scales = "free") +
|
|
|
|
gp2$theme_bw() +
|
2023-01-22 11:35:06 -05:00
|
|
|
gp2$scale_fill_brewer(
|
|
|
|
palette = "Greys"
|
2023-01-24 08:00:21 -05:00
|
|
|
,labels = c("1 - Hypo","2 - Non-Hypo","3 - Hyper","4 - Non-Hyper")
|
2023-01-22 15:26:39 -05:00
|
|
|
) +
|
2023-01-22 11:35:06 -05:00
|
|
|
gp2$labs(
|
|
|
|
x = NULL
|
|
|
|
,y = NULL
|
|
|
|
,fill = "Lab Diagnosis"
|
2023-01-24 08:00:21 -05:00
|
|
|
,caption = "Note. All values log transformed"
|
|
|
|
) +
|
|
|
|
gp2$theme(
|
|
|
|
plot.caption = gp2$element_text(hjust = 0)
|
2023-01-22 11:35:06 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
# g2
|
2023-01-22 15:26:39 -05:00
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","boxplot.emf")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
,device = devEMF::emf
|
|
|
|
)
|
2023-01-26 08:15:47 -05:00
|
|
|
gp2$ggsave(
|
|
|
|
here("figures","boxplot.png")
|
|
|
|
,width = 7
|
|
|
|
,height = 7
|
|
|
|
,dpi = 300
|
|
|
|
)
|
2023-01-22 15:26:39 -05:00
|
|
|
|
|
|
|
|
2023-01-22 15:38:41 -05:00
|
|
|
# save-data ---------------------------------------------------------------
|
|
|
|
|
|
|
|
ds1 %>% readr$write_rds(here("ML","data-unshared","model_data.RDS"))
|
2023-01-15 08:14:52 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|