Update 0-data_prep.R
This commit is contained in:
parent
74067adea5
commit
cbaf895c21
1 changed files with 19 additions and 21 deletions
|
@ -66,8 +66,6 @@ patients <- dplyr$tbl(db, "patients") %>%
|
|||
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
|
||||
dplyr$collect()
|
||||
|
||||
# most likely will not use this as there are not as many complete rows. However
|
||||
# gathering it just in case.
|
||||
# first is using specimen id, usable data set is using chart time as it appears
|
||||
# LIS uses different id's for groups of tests
|
||||
#
|
||||
|
@ -95,39 +93,39 @@ ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
|||
dplyr$collect()
|
||||
|
||||
#this keeps failing if run as part of the above query. Moving here to keep going
|
||||
# keeps only rows that have values for all columns
|
||||
# keeps only rows that have no more then three NA's
|
||||
ds_cmp <- patients %>%
|
||||
dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
||||
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||
dplyr$filter(rowSums(is.na(.)) <= 3)
|
||||
|
||||
|
||||
|
||||
ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
||||
dplyr$filter(itemid %in% test_list_bmp) %>%
|
||||
dplyr$select(-storetime) %>%
|
||||
tidyr$pivot_wider(
|
||||
id_cols = c(subject_id,charttime)
|
||||
,names_from = itemid
|
||||
,values_from = valuenum
|
||||
) %>%
|
||||
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
dplyr$collect()
|
||||
|
||||
ds_bmp <- patients %>%
|
||||
dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
|
||||
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||
# No longer using this, but saving incase
|
||||
# ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
||||
# dplyr$filter(itemid %in% test_list_bmp) %>%
|
||||
# dplyr$select(-storetime) %>%
|
||||
# tidyr$pivot_wider(
|
||||
# id_cols = c(subject_id,charttime)
|
||||
# ,names_from = itemid
|
||||
# ,values_from = valuenum
|
||||
# ) %>%
|
||||
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
# dplyr$collect()
|
||||
#
|
||||
# ds_bmp <- patients %>%
|
||||
# dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
|
||||
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||
|
||||
|
||||
# save data ---------------------------------------------------------------
|
||||
|
||||
|
||||
ds_high_tsh <- ds_bmp %>%
|
||||
ds_high_tsh <- ds_cmp %>%
|
||||
dplyr$filter(`50993` > 4.2) %>%
|
||||
readr$write_rds(
|
||||
here("ML","data-unshared","ds_high_tsh.RDS")
|
||||
)
|
||||
|
||||
ds_low_tsh <- ds_bmp %>%
|
||||
ds_low_tsh <- ds_cmp %>%
|
||||
dplyr$filter(`50993` < 0.27) %>%
|
||||
readr$write_rds(
|
||||
here("ML","data-unshared","ds_low_tsh.RDS")
|
||||
|
|
Loading…
Reference in a new issue