rm(list = ls(all.names = TRUE)) # Clear the memory of variables from previous run. cat("\014") # Clear the console # load packages ----------------------------------------------------------- box::use( magrittr[`%>%`] ,RSQLite ,DBI[dbConnect,dbDisconnect] ,here[here] ,dplyr ,dbplyr ,tidyr ,readr ) # globals ----------------------------------------------------------------- db <- dbConnect( RSQLite$SQLite() ,here("ML","data-unshared","mimicDB.sqlite") ) #item list shows two different numbers for a few tests, second set of items do not have # any results that are on the same samples as TSH and Free T4 # 51301 and 51300 looks like test name may have changed test_list_bmp <- c( 51006 #Urea Nitrogen ,50893 #Calcium, Total ,50882 #Bicarbonate ,50902 #Chloride ,50912 #Creatinine ,50931 #Glucose ,50971 #Potassium ,50983 #Sodium ,50993 #Thyroid Stimulating Hormone ,50995 #Thyroxine (T4), Free ,51279 #RBC ,51301 #White Blood Cells ,51300 #WBC Count ,51221 #Hematocrit ,51222 #Hemoglobin ,51265 #Platelet Count ) test_list_names <- c( "BUN" = "51006" ,"CA" = "50893" ,"CO2" = "50882" ,"CL" = "50902" ,"CREA" = "50912" ,"GLU" = "50931" ,"K" = "50971" ,"NA" = "50983" ,"TSH" = "50993" ,"FT4" = "50995" ,"RBC" = "51279" ,"WBC" = "51300" ,"HCT" = "51221" ,"HGB" = "51222" ,"PLT" = "51265" ) # TSH Ref Range from File 0.27 - 4.2 uIU/mL # Free T4 Ref Range from File 0.93 - 1.7 ng/dL # load data --------------------------------------------------------------- # load patients first to add to lab values patients <- dplyr$tbl(db, "patients") %>% dplyr$select(-anchor_year, -anchor_year_group, -dod) %>% dplyr$collect() # usable data set is using chart time as it appears # LIS uses different id's for groups of tests # BMP and CBC Results together ds_bmp <- dplyr$tbl(db, "labevents") %>% dplyr$filter(itemid %in% test_list_bmp) %>% dplyr$select(-storetime) %>% tidyr$pivot_wider( id_cols = c(subject_id,charttime) ,names_from = itemid ,values_from = valuenum ) %>% dplyr$collect() ds1 <- ds_bmp %>% dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>% dplyr$left_join(patients, by = c("subject_id" = "subject_id")) %>% dplyr$mutate(dplyr$across(`51300`, ~dplyr$if_else(!is.na(.),`51300`,`51301`))) %>% dplyr$select(-`51301`) %>% # dplyr$filter(dplyr$if_all(.fns = ~!is.na(.))) dplyr$filter(rowSums(is.na(.)) <= 3) #allows for 3 missing test ds_final <- ds1 %>% dplyr$mutate( ft4_dia = dplyr$case_when( `50993` > 4.2 & `50995` < 0.93 ~ "Hypo" ,`50993` > 4.2 & `50995` > 0.93 ~ "Non-Hypo" ,`50993` < 0.27 & `50995` > 1.7 ~ "Hyper" ,`50993` < 0.27 & `50995` < 1.7 ~ "Non-Hyper" ,TRUE ~ "Normal TSH" ) ) %>% dplyr$filter(ft4_dia != "Normal TSH") %>% dplyr$rename(!!!test_list_names) %>% # dplyr$select(-FT4) %>% dplyr$relocate(gender, anchor_age) # save data --------------------------------------------------------------- ds_final %>% readr$write_rds(here("ML","data-unshared","ds_final.RDS")) # close database ---------------------------------------------------------- dbDisconnect(db)