Update 0-data_prep.R
This commit is contained in:
parent
cbaf895c21
commit
b19d1f25eb
1 changed files with 39 additions and 28 deletions
|
@ -43,6 +43,7 @@ test_list_cmp <- c(
|
|||
,50995 #Thyroxine (T4), Free
|
||||
)
|
||||
|
||||
# 51301 and 51300 looks like test name may have changed
|
||||
test_list_bmp <- c(
|
||||
51006 #Urea Nitrogen
|
||||
,50893 #Calcium, Total
|
||||
|
@ -54,7 +55,13 @@ test_list_bmp <- c(
|
|||
,50983 #Sodium
|
||||
,50993 #Thyroid Stimulating Hormone
|
||||
,50995 #Thyroxine (T4), Free
|
||||
)
|
||||
,51279 #RBC
|
||||
,51301 #White Blood Cells
|
||||
,51300 #WBC Count
|
||||
,51221 #Hematocrit
|
||||
,51222 #Hemoglobin
|
||||
,51265 #Platelet Count
|
||||
)
|
||||
|
||||
# TSH Ref Range from File 0.27 - 4.2 uIU/mL
|
||||
# Free T4 Ref Range from File 0.93 - 1.7 ng/dL
|
||||
|
@ -80,28 +87,9 @@ patients <- dplyr$tbl(db, "patients") %>%
|
|||
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
# dplyr$filter(dplyr$across(where(is.numeric), ~!is.na(.x))) %>%
|
||||
# dplyr$collect()
|
||||
|
||||
ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
||||
dplyr$filter(itemid %in% test_list_cmp) %>%
|
||||
dplyr$select(-storetime) %>%
|
||||
tidyr$pivot_wider(
|
||||
id_cols = c(subject_id,charttime)
|
||||
,names_from = itemid
|
||||
,values_from = valuenum
|
||||
) %>%
|
||||
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
dplyr$collect()
|
||||
|
||||
#this keeps failing if run as part of the above query. Moving here to keep going
|
||||
# keeps only rows that have no more then three NA's
|
||||
ds_cmp <- patients %>%
|
||||
dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
||||
dplyr$filter(rowSums(is.na(.)) <= 3)
|
||||
|
||||
|
||||
# No longer using this, but saving incase
|
||||
# ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
||||
# dplyr$filter(itemid %in% test_list_bmp) %>%
|
||||
#No longer using this, but saving incase
|
||||
# ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
||||
# dplyr$filter(itemid %in% test_list_cmp) %>%
|
||||
# dplyr$select(-storetime) %>%
|
||||
# tidyr$pivot_wider(
|
||||
# id_cols = c(subject_id,charttime)
|
||||
|
@ -111,21 +99,44 @@ ds_cmp <- patients %>%
|
|||
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
# dplyr$collect()
|
||||
#
|
||||
# ds_bmp <- patients %>%
|
||||
# dplyr$left_join(ds_bmp, by = c("subject_id" = "subject_id")) %>%
|
||||
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||
# #this keeps failing if run as part of the above query. Moving here to keep going
|
||||
# # keeps only rows that have no more then three NA's
|
||||
# ds_cmp <- patients %>%
|
||||
# dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
||||
# dplyr$filter(rowSums(is.na(.)) <= 3)
|
||||
|
||||
# BMP and CBC Results together
|
||||
|
||||
ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
||||
dplyr$filter(itemid %in% test_list_bmp) %>%
|
||||
dplyr$select(-storetime) %>%
|
||||
tidyr$pivot_wider(
|
||||
id_cols = c(subject_id,charttime)
|
||||
,names_from = itemid
|
||||
,values_from = valuenum
|
||||
) %>%
|
||||
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||
dplyr$collect()
|
||||
|
||||
|
||||
ds_bmp <- ds_bmp %>%
|
||||
dplyr$left_join(patients, by = c("subject_id" = "subject_id")) %>%
|
||||
dplyr$mutate(dplyr$across(`51300`, ~dplyr$if_else(!is.na(.),`51300`,`51301`))) %>%
|
||||
dplyr$select(-`51301`) %>%
|
||||
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
||||
dplyr$filter(rowSums(is.na(.)) <= 2) #allows for 2 missing test
|
||||
|
||||
|
||||
# save data ---------------------------------------------------------------
|
||||
|
||||
|
||||
ds_high_tsh <- ds_cmp %>%
|
||||
ds_high_tsh <- ds_bmp %>%
|
||||
dplyr$filter(`50993` > 4.2) %>%
|
||||
readr$write_rds(
|
||||
here("ML","data-unshared","ds_high_tsh.RDS")
|
||||
)
|
||||
|
||||
ds_low_tsh <- ds_cmp %>%
|
||||
ds_low_tsh <- ds_bmp %>%
|
||||
dplyr$filter(`50993` < 0.27) %>%
|
||||
readr$write_rds(
|
||||
here("ML","data-unshared","ds_low_tsh.RDS")
|
||||
|
|
Loading…
Reference in a new issue