diff --git a/ML/0-data_prep.R b/ML/0-data_prep.R index 63ff9d3..f3d09b0 100644 --- a/ML/0-data_prep.R +++ b/ML/0-data_prep.R @@ -70,7 +70,8 @@ testds <- readr::read_csv( ) - +#using chart time instead of spceimen id results in less NA values. +# total protien still have very low resulting ds1 <- dplyr$tbl(db, "labevents") %>% dplyr$filter(itemid %in% test_list) %>% @@ -88,6 +89,9 @@ count2 <- data.frame(colSums(is.na(ds1))) %>% tibble::rownames_to_column() counts <- count %>% dplyr$left_join(count2) +# using charttime, total of 5,424 rows with all values filled in +ds1 %>% dplyr$filter(dplyr$across(where(is.numeric), ~!is.na(.x))) + # close database ---------------------------------------------------------- dbDisconnect(db)