Update 0-data_prep.R

This commit is contained in:
Kyle Belanger 2023-01-07 09:04:42 -05:00
parent 3abba4a904
commit 2d634f568d
2 changed files with 16 additions and 7 deletions

View file

@ -12,6 +12,7 @@ box::use(
,dplyr
,dbplyr
,tidyr
,readr
)
# globals -----------------------------------------------------------------
@ -65,12 +66,6 @@ patients <- dplyr$tbl(db, "patients") %>%
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
dplyr$collect()
#this function is failing if run as part of the DB query
# Recoding Male = 1, Female = 2
patients <- patients %>%
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(gender, "M" = 1, "F" = 2)))
# most likely will not use this as there are not as many complete rows. However
# gathering it just in case.
# first is using specimen id, usable data set is using chart time as it appears
@ -123,6 +118,20 @@ ds_bmp <- patients %>%
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
# save data ---------------------------------------------------------------
ds_high_tsh <- ds_bmp %>%
dplyr$filter(`50993` > 4.2) %>%
readr$write_rds(
here("ML","data-unshared","ds_high_tsh.RDS")
)
ds_low_tsh <- ds_bmp %>%
dplyr$filter(`50993` < 0.27) %>%
readr$write_rds(
here("ML","data-unshared","ds_low_tsh.RDS")
)

View file

@ -39,7 +39,7 @@ readr$read_csv_chunked(
dbWriteTable(mimicDB, "labevents", chunk, append = TRUE)
}
,chunk_size = 10000
,col_types = "_d_ddTT_d______"
,col_types = "_d_dd_TT_d______"
)