2023-01-05 08:18:18 -05:00
|
|
|
rm(list = ls(all.names = TRUE)) # Clear the memory of variables from previous run.
|
|
|
|
cat("\014") # Clear the console
|
|
|
|
|
|
|
|
|
|
|
|
# load packages -----------------------------------------------------------
|
|
|
|
|
|
|
|
box::use(
|
|
|
|
magrittr[`%>%`]
|
|
|
|
,RSQLite
|
|
|
|
,DBI[dbConnect,dbDisconnect]
|
|
|
|
,here[here]
|
2023-01-05 15:27:20 -05:00
|
|
|
,dplyr
|
2023-01-06 07:40:03 -05:00
|
|
|
,dbplyr
|
|
|
|
,tidyr
|
2023-01-07 09:04:42 -05:00
|
|
|
,readr
|
2023-01-05 08:18:18 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
# globals -----------------------------------------------------------------
|
|
|
|
|
|
|
|
db <- dbConnect(
|
|
|
|
RSQLite$SQLite()
|
|
|
|
,here("ML","data-unshared","mimicDB.sqlite")
|
|
|
|
)
|
|
|
|
|
2023-01-06 08:46:52 -05:00
|
|
|
#item list shows two different numbers for a few tests, second set of items do not have
|
|
|
|
# any results that are on the same samples as TSH and Free T4
|
2023-01-06 14:48:53 -05:00
|
|
|
test_list_cmp <- c(
|
2023-01-05 15:27:20 -05:00
|
|
|
50862 #Albumin
|
|
|
|
,50863 #Alkaline Phosphatase
|
|
|
|
,50861 #Alanine Aminotransferase (ALT)
|
|
|
|
,50878 #Asparate Aminotransferase (AST)
|
|
|
|
,51006 #Urea Nitrogen
|
|
|
|
,50893 #Calcium, Total
|
|
|
|
,50882 #Bicarbonate
|
|
|
|
,50902 #Chloride
|
|
|
|
,50912 #Creatinine
|
|
|
|
,50931 #Glucose
|
|
|
|
,50971 #Potassium
|
|
|
|
,50983 #Sodium
|
|
|
|
,50885 #Bilirubin, Total
|
|
|
|
,50976 #Protein, Total
|
|
|
|
,50993 #Thyroid Stimulating Hormone
|
2023-01-06 08:46:52 -05:00
|
|
|
,50995 #Thyroxine (T4), Free
|
2023-01-05 15:27:20 -05:00
|
|
|
)
|
2023-01-05 08:18:18 -05:00
|
|
|
|
2023-01-09 09:07:14 -05:00
|
|
|
# 51301 and 51300 looks like test name may have changed
|
2023-01-06 14:48:53 -05:00
|
|
|
test_list_bmp <- c(
|
|
|
|
51006 #Urea Nitrogen
|
|
|
|
,50893 #Calcium, Total
|
|
|
|
,50882 #Bicarbonate
|
|
|
|
,50902 #Chloride
|
|
|
|
,50912 #Creatinine
|
|
|
|
,50931 #Glucose
|
|
|
|
,50971 #Potassium
|
|
|
|
,50983 #Sodium
|
|
|
|
,50993 #Thyroid Stimulating Hormone
|
|
|
|
,50995 #Thyroxine (T4), Free
|
2023-01-09 09:07:14 -05:00
|
|
|
,51279 #RBC
|
|
|
|
,51301 #White Blood Cells
|
|
|
|
,51300 #WBC Count
|
|
|
|
,51221 #Hematocrit
|
|
|
|
,51222 #Hemoglobin
|
|
|
|
,51265 #Platelet Count
|
|
|
|
)
|
2023-01-05 08:18:18 -05:00
|
|
|
|
2023-01-06 14:48:53 -05:00
|
|
|
# TSH Ref Range from File 0.27 - 4.2 uIU/mL
|
|
|
|
# Free T4 Ref Range from File 0.93 - 1.7 ng/dL
|
2023-01-05 08:18:18 -05:00
|
|
|
|
2023-01-05 15:27:20 -05:00
|
|
|
# load data ---------------------------------------------------------------
|
2023-01-05 08:18:18 -05:00
|
|
|
|
2023-01-06 15:48:47 -05:00
|
|
|
# load patients first to add to lab values
|
|
|
|
patients <- dplyr$tbl(db, "patients") %>%
|
|
|
|
dplyr$select(-anchor_year, -anchor_year_group, -dod) %>%
|
|
|
|
dplyr$collect()
|
|
|
|
|
2023-01-06 14:48:53 -05:00
|
|
|
# first is using specimen id, usable data set is using chart time as it appears
|
|
|
|
# LIS uses different id's for groups of tests
|
|
|
|
#
|
|
|
|
# ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
|
|
|
# dplyr$filter(itemid %in% test_list_cmp) %>%
|
|
|
|
# dplyr$select(-charttime,-storetime) %>%
|
|
|
|
# tidyr$pivot_wider(
|
|
|
|
# id_cols = c(subject_id,specimen_id)
|
|
|
|
# ,names_from = itemid
|
|
|
|
# ,values_from = valuenum
|
|
|
|
# ) %>%
|
|
|
|
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
|
|
|
# dplyr$filter(dplyr$across(where(is.numeric), ~!is.na(.x))) %>%
|
|
|
|
# dplyr$collect()
|
2023-01-09 09:07:14 -05:00
|
|
|
#No longer using this, but saving incase
|
|
|
|
# ds_cmp <- dplyr$tbl(db, "labevents") %>%
|
|
|
|
# dplyr$filter(itemid %in% test_list_cmp) %>%
|
|
|
|
# dplyr$select(-storetime) %>%
|
|
|
|
# tidyr$pivot_wider(
|
|
|
|
# id_cols = c(subject_id,charttime)
|
|
|
|
# ,names_from = itemid
|
|
|
|
# ,values_from = valuenum
|
|
|
|
# ) %>%
|
|
|
|
# dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
|
|
|
# dplyr$collect()
|
|
|
|
#
|
|
|
|
# #this keeps failing if run as part of the above query. Moving here to keep going
|
|
|
|
# # keeps only rows that have no more then three NA's
|
|
|
|
# ds_cmp <- patients %>%
|
|
|
|
# dplyr$left_join(ds_cmp, by = c("subject_id" = "subject_id")) %>%
|
|
|
|
# dplyr$filter(rowSums(is.na(.)) <= 3)
|
|
|
|
|
|
|
|
# BMP and CBC Results together
|
2023-01-06 14:48:53 -05:00
|
|
|
|
2023-01-09 09:07:14 -05:00
|
|
|
ds_bmp <- dplyr$tbl(db, "labevents") %>%
|
|
|
|
dplyr$filter(itemid %in% test_list_bmp) %>%
|
2023-01-06 14:48:53 -05:00
|
|
|
dplyr$select(-storetime) %>%
|
2023-01-06 07:40:03 -05:00
|
|
|
tidyr$pivot_wider(
|
2023-01-06 14:48:53 -05:00
|
|
|
id_cols = c(subject_id,charttime)
|
2023-01-06 07:40:03 -05:00
|
|
|
,names_from = itemid
|
|
|
|
,values_from = valuenum
|
2023-01-06 14:48:53 -05:00
|
|
|
) %>%
|
2023-01-06 08:46:52 -05:00
|
|
|
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
2023-01-06 07:40:03 -05:00
|
|
|
dplyr$collect()
|
|
|
|
|
2023-01-06 08:46:52 -05:00
|
|
|
|
2023-01-09 09:07:14 -05:00
|
|
|
ds_bmp <- ds_bmp %>%
|
|
|
|
dplyr$left_join(patients, by = c("subject_id" = "subject_id")) %>%
|
|
|
|
dplyr$mutate(dplyr$across(`51300`, ~dplyr$if_else(!is.na(.),`51300`,`51301`))) %>%
|
|
|
|
dplyr$select(-`51301`) %>%
|
|
|
|
# dplyr$filter(dplyr$if_all(.fns = ~!is.na(.)))
|
|
|
|
dplyr$filter(rowSums(is.na(.)) <= 2) #allows for 2 missing test
|
2023-01-05 08:18:18 -05:00
|
|
|
|
2023-01-06 08:49:22 -05:00
|
|
|
|
2023-01-07 09:04:42 -05:00
|
|
|
# save data ---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
2023-01-09 09:07:14 -05:00
|
|
|
ds_high_tsh <- ds_bmp %>%
|
2023-01-07 09:04:42 -05:00
|
|
|
dplyr$filter(`50993` > 4.2) %>%
|
|
|
|
readr$write_rds(
|
|
|
|
here("ML","data-unshared","ds_high_tsh.RDS")
|
|
|
|
)
|
|
|
|
|
2023-01-09 09:07:14 -05:00
|
|
|
ds_low_tsh <- ds_bmp %>%
|
2023-01-07 09:04:42 -05:00
|
|
|
dplyr$filter(`50993` < 0.27) %>%
|
|
|
|
readr$write_rds(
|
|
|
|
here("ML","data-unshared","ds_low_tsh.RDS")
|
|
|
|
)
|
2023-01-07 09:06:39 -05:00
|
|
|
|
2023-01-05 08:18:18 -05:00
|
|
|
# close database ----------------------------------------------------------
|
|
|
|
|
|
|
|
dbDisconnect(db)
|