Update 0-data_prep.R
This commit is contained in:
parent
74067adea5
commit
cbaf895c21
1 changed files with 19 additions and 21 deletions
|
@ -66,8 +66,6 @@ patients <- dplyr$tbl(db, "patients") %>%
|
||||||
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
|
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
|
||||||
dplyr$collect()
|
dplyr$collect()
|
||||||
|
|
||||||
# most likely will not use this as there are not as many complete rows. However
|
|
||||||
# gathering it just in case.
|
|
||||||
# first is using specimen id, usable data set is using chart time as it appears
|
# first is using specimen id, usable data set is using chart time as it appears
|
||||||
# LIS uses different id's for groups of tests
|
# LIS uses different id's for groups of tests
|
||||||
#
|
#
|
||||||
|
@ -95,39 +93,39 @@ ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
||||||
dplyr$collect()
|
dplyr$collect()
|
||||||
|
|
||||||
#this keeps failing if run as part of the above query. Moving here to keep going
|
#this keeps failing if run as part of the above query. Moving here to keep going
|
||||||
# keeps only rows that have values for all columns
|
# keeps only rows that have no more then three NA's
|
||||||
ds_cmp <- patients %>%
|
ds_cmp <- patients %>%
|
||||||
dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
||||||
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
dplyr$filter(rowSums(is.na(.)) <= 3)
|
||||||
|
|
||||||
|
|
||||||
|
# No longer using this, but saving incase
|
||||||
ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
# ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
||||||
dplyr$filter(itemid %in% test_list_bmp) %>%
|
# dplyr$filter(itemid %in% test_list_bmp) %>%
|
||||||
dplyr$select(-storetime) %>%
|
# dplyr$select(-storetime) %>%
|
||||||
tidyr$pivot_wider(
|
# tidyr$pivot_wider(
|
||||||
id_cols = c(subject_id,charttime)
|
# id_cols = c(subject_id,charttime)
|
||||||
,names_from = itemid
|
# ,names_from = itemid
|
||||||
,values_from = valuenum
|
# ,values_from = valuenum
|
||||||
) %>%
|
# ) %>%
|
||||||
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||||
dplyr$collect()
|
# dplyr$collect()
|
||||||
|
#
|
||||||
ds_bmp <- patients %>%
|
# ds_bmp <- patients %>%
|
||||||
dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
|
# dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
|
||||||
dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||||
|
|
||||||
|
|
||||||
# save data ---------------------------------------------------------------
|
# save data ---------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
ds_high_tsh <- ds_bmp %>%
|
ds_high_tsh <- ds_cmp %>%
|
||||||
dplyr$filter(`50993` > 4.2) %>%
|
dplyr$filter(`50993` > 4.2) %>%
|
||||||
readr$write_rds(
|
readr$write_rds(
|
||||||
here("ML","data-unshared","ds_high_tsh.RDS")
|
here("ML","data-unshared","ds_high_tsh.RDS")
|
||||||
)
|
)
|
||||||
|
|
||||||
ds_low_tsh <- ds_bmp %>%
|
ds_low_tsh <- ds_cmp %>%
|
||||||
dplyr$filter(`50993` < 0.27) %>%
|
dplyr$filter(`50993` < 0.27) %>%
|
||||||
readr$write_rds(
|
readr$write_rds(
|
||||||
here("ML","data-unshared","ds_low_tsh.RDS")
|
here("ML","data-unshared","ds_low_tsh.RDS")
|
||||||
|
|
Loading…
Reference in a new issue