Update 1-data-exploration.R

This commit is contained in:
Kyle Belanger 2023-01-21 07:44:35 -05:00
parent 92c8e68f85
commit 66f213bc5a

View file

@ -17,36 +17,11 @@ box::use(
# globals ----------------------------------------------------------------- # globals -----------------------------------------------------------------
test_list_names <- c(
"BUN" = "51006"
,"CA" = "50893"
,"CO2" = "50882"
,"CL" = "50902"
,"CREA" = "50912"
,"GLU" = "50931"
,"K" = "50971"
,"NA" = "50983"
,"TSH" = "50993"
,"FT4" = "50995"
,"RBC" = "51279"
,"WBC" = "51300"
,"HCT" = "51221"
,"HGB" = "51222"
,"PLT" = "51265"
)
# load data --------------------------------------------------------------- # load data ---------------------------------------------------------------
ds_high_tsh_raw <- readr$read_rds( ds0 <- readr$read_rds(here("ML","data-unshared","ds_final.RDS"))
here("ML","data-unshared","ds_high_tsh.RDS")
)
ds_low_tsh_raw <- readr$read_rds(
here("ML","data-unshared","ds_low_tsh.RDS")
)
# data manipulation ------------------------------------------------------- # data manipulation -------------------------------------------------------
@ -54,31 +29,21 @@ ds_low_tsh_raw <- readr$read_rds(
# using the FT4 Referance range low as the cut off (0.93) # using the FT4 Referance range low as the cut off (0.93)
ds_high_tsh <- ds_high_tsh_raw %>% ds1 <- ds0 %>%
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` < 0.93, TRUE, FALSE)) %>%
#can rename with a vector using either of these
# dplyr$rename_with(~names(test_list_names), dplyr$all_of(test_list_names))
dplyr$rename(!!!test_list_names) %>%
dplyr$select(-FT4, -subject_id, -charttime) %>% dplyr$select(-FT4, -subject_id, -charttime) %>%
dplyr$relocate(gender, anchor_age) dplyr$mutate(dplyr$across(
ft4_dia
, ~factor(., levels = c("Hypo", "Non-Hypo", "Normal TSH", "Hyper", "Non-Hyper")
ds_low_tsh <- ds_low_tsh_raw %>% )
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` > 1.7, TRUE, FALSE)) %>% )
#can rename with a vector using either of these )
# dplyr$rename_with(~names(test_list_names), dplyr$all_of(test_list_names))
dplyr$rename(!!!test_list_names) %>%
dplyr$select(-FT4, -subject_id, -charttime) %>%
dplyr$relocate(gender, anchor_age)
# basic visualization ----------------------------------------------------- # basic visualization -----------------------------------------------------
#summary Table #summary Table
#use this instead of making myself
summary_table <- function(ds){ summary_tbl <- ds1 %>%
table <- ds %>%
gtsummary$tbl_summary( gtsummary$tbl_summary(
by = ft4_dia by = ft4_dia
,missing = "no" ,missing = "no"
@ -100,23 +65,8 @@ summary_table <- function(ds){
gtsummary$modify_header(label = "**Variable**") %>% gtsummary$modify_header(label = "**Variable**") %>%
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Diagnostic**") gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Diagnostic**")
return(table) # summary_tbl
}
# create both tables
high_table_summary <- summary_table(ds_high_tsh)
low_table_summary <- summary_table(ds_low_tsh)
# merge tables
merged_summary_table <- gtsummary$tbl_merge(
tbls = list(high_table_summary, low_table_summary)
,tab_spanner = c(
"**Elevated TSH** \n Free T4 Diagnostic"
,"**Decreased TSH** \n Free T4 Diagnostic"
)
) %>%
gtsummary$as_flex_table()
# correlation plot # correlation plot