Update 0-data_prep.R

This commit is contained in:
Kyle Belanger 2023-01-08 08:35:25 -05:00
parent 74067adea5
commit cbaf895c21

View file

@ -66,8 +66,6 @@ patients <- dplyr$tbl(db, "patients") %>%
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
dplyr$collect()
# most likely will not use this as there are not as many complete rows. However
# gathering it just in case.
# first is using specimen id, usable data set is using chart time as it appears
# LIS uses different id's for groups of tests
#
@ -95,39 +93,39 @@ ds_cmp <- dplyr$tbl(db, "labevents") %>%
dplyr$collect()
#this keeps failing if run as part of the above query. Moving here to keep going
# keeps only rows that have values for all columns
# keeps only rows that have no more then three NA's
ds_cmp <- patients %>%
dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
dplyr$filter(rowSums(is.na(.)) <= 3)
ds_bmp <- dplyr$tbl(db, "labevents") %>%
dplyr$filter(itemid %in% test_list_bmp) %>%
dplyr$select(-storetime) %>%
tidyr$pivot_wider(
id_cols = c(subject_id,charttime)
,names_from = itemid
,values_from = valuenum
) %>%
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
dplyr$collect()
ds_bmp <- patients %>%
dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
# No longer using this, but saving incase
# ds_bmp <- dplyr$tbl(db, "labevents") %>%
# dplyr$filter(itemid %in% test_list_bmp) %>%
# dplyr$select(-storetime) %>%
# tidyr$pivot_wider(
# id_cols = c(subject_id,charttime)
# ,names_from = itemid
# ,values_from = valuenum
# ) %>%
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
# dplyr$collect()
#
# ds_bmp <- patients %>%
# dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
# save data ---------------------------------------------------------------
ds_high_tsh <- ds_bmp %>%
ds_high_tsh <- ds_cmp %>%
dplyr$filter(`50993` > 4.2) %>%
readr$write_rds(
here("ML","data-unshared","ds_high_tsh.RDS")
)
ds_low_tsh <- ds_bmp %>%
ds_low_tsh <- ds_cmp %>%
dplyr$filter(`50993` < 0.27) %>%
readr$write_rds(
here("ML","data-unshared","ds_low_tsh.RDS")