diff --git a/ML/0-data_prep.R b/ML/0-data_prep.R index 83183d6..0083d28 100644 --- a/ML/0-data_prep.R +++ b/ML/0-data_prep.R @@ -12,6 +12,7 @@ box::use( ,dplyr ,dbplyr ,tidyr + ,readr ) # globals ----------------------------------------------------------------- @@ -65,12 +66,6 @@ patients <- dplyr$tbl(db, "patients") %>% dplyr$select(-anchor_year, -anchor_year_group, -dod) %>% dplyr$collect() -#this function is failing if run as part of the DB query -# Recoding Male = 1, Female = 2 - -patients <- patients %>% - dplyr$mutate(dplyr$across(gender, ~dplyr$recode(gender, "M" = 1, "F" = 2))) - # most likely will not use this as there are not as many complete rows. However # gathering it just in case. # first is using specimen id, usable data set is using chart time as it appears @@ -123,6 +118,20 @@ ds_bmp <- patients %>% dplyr$filter(dplyr$if_all(.fns = ~!is.na(.))) +# save data --------------------------------------------------------------- + + +ds_high_tsh <- ds_bmp %>% + dplyr$filter(`50993` > 4.2) %>% + readr$write_rds( + here("ML","data-unshared","ds_high_tsh.RDS") + ) + +ds_low_tsh <- ds_bmp %>% + dplyr$filter(`50993` < 0.27) %>% + readr$write_rds( + here("ML","data-unshared","ds_low_tsh.RDS") + ) diff --git a/ML/0-greeter-dbcreation.R b/ML/0-greeter-dbcreation.R index 101982a..1c9f540 100644 --- a/ML/0-greeter-dbcreation.R +++ b/ML/0-greeter-dbcreation.R @@ -39,7 +39,7 @@ readr$read_csv_chunked( dbWriteTable(mimicDB, "labevents", chunk, append = TRUE) } ,chunk_size = 10000 - ,col_types = "_d_ddTT_d______" + ,col_types = "_d_dd_TT_d______" )