Update 0-data_prep.R
This commit is contained in:
parent
f10ee3fcd7
commit
427d365908
1 changed files with 34 additions and 4 deletions
|
@ -21,7 +21,8 @@ db <- dbConnect(
|
||||||
,here("ML","data-unshared","mimicDB.sqlite")
|
,here("ML","data-unshared","mimicDB.sqlite")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
#item list shows two different numbers for a few tests, second set of items do not have
|
||||||
|
# any results that are on the same samples as TSH and Free T4
|
||||||
test_list <- c(
|
test_list <- c(
|
||||||
50862 #Albumin
|
50862 #Albumin
|
||||||
,50863 #Alkaline Phosphatase
|
,50863 #Alkaline Phosphatase
|
||||||
|
@ -38,7 +39,7 @@ test_list <- c(
|
||||||
,50885 #Bilirubin, Total
|
,50885 #Bilirubin, Total
|
||||||
,50976 #Protein, Total
|
,50976 #Protein, Total
|
||||||
,50993 #Thyroid Stimulating Hormone
|
,50993 #Thyroid Stimulating Hormone
|
||||||
,50995 #Thyroxine (T4), FreE
|
,50995 #Thyroxine (T4), Free
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,10 +54,39 @@ ds <- dplyr$tbl(db, "labevents") %>%
|
||||||
,names_from = itemid
|
,names_from = itemid
|
||||||
,values_from = valuenum
|
,values_from = valuenum
|
||||||
) %>%
|
) %>%
|
||||||
dplyr$filter(!is.na(`50993`)) %>%
|
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||||
dplyr$collect()
|
dplyr$collect()
|
||||||
|
|
||||||
ds <- ds %>% dplyr$collect()
|
|
||||||
|
ds %>% dplyr$filter(dplyr$across(where(is.numeric), ~!is.na(.x)))
|
||||||
|
|
||||||
|
count <- data.frame(colSums(is.na(ds))) %>% tibble::rownames_to_column()
|
||||||
|
|
||||||
|
|
||||||
|
testds <- readr::read_csv(
|
||||||
|
here("ML","data-unshared", "labevents.csv")
|
||||||
|
,col_types = "_d_ddTT_d______"
|
||||||
|
,n_max = 100
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ds1 <- dplyr$tbl(db, "labevents") %>%
|
||||||
|
dplyr$filter(itemid %in% test_list) %>%
|
||||||
|
dplyr$select(-storetime) %>%
|
||||||
|
tidyr$pivot_wider(
|
||||||
|
id_cols = c(subject_id,charttime)
|
||||||
|
,names_from = itemid
|
||||||
|
,values_from = valuenum
|
||||||
|
) %>%
|
||||||
|
dplyr$filter(!is.na(`50993`) & !is.na(`50995`)) %>%
|
||||||
|
dplyr$collect()
|
||||||
|
|
||||||
|
count2 <- data.frame(colSums(is.na(ds1))) %>% tibble::rownames_to_column()
|
||||||
|
|
||||||
|
counts <- count %>%
|
||||||
|
dplyr$left_join(count2)
|
||||||
|
|
||||||
# close database ----------------------------------------------------------
|
# close database ----------------------------------------------------------
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue