From ff36565211f2bc85f86d2f5cced9d983f9d0a1fc Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Fri, 13 Jan 2023 14:27:55 -0500 Subject: [PATCH] Update 1-data-exploration.R --- ML/1-data-exploration.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ML/1-data-exploration.R b/ML/1-data-exploration.R index c0254e2..ab5ad3b 100644 --- a/ML/1-data-exploration.R +++ b/ML/1-data-exploration.R @@ -62,6 +62,7 @@ ds_high_tsh <- ds_high_tsh_raw %>% # basic visualization ----------------------------------------------------- +# count of missing values g_count <- dplyr$as_tibble(colSums(is.na(ds_high_tsh)), rownames = NA ) %>% tibble::rownames_to_column() %>% ggplot(aes(x = rowname, y = value)) + @@ -72,6 +73,7 @@ g_count <- dplyr$as_tibble(colSums(is.na(ds_high_tsh)), rownames = NA ) %>% g_count +#corrleation plot ds_corr <- cor(ds_high_tsh %>% dplyr$select(-subject_id, - charttime) %>% dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2))) ,use = "complete.obs") @@ -86,9 +88,7 @@ dev.off() - - - +# quick view of distribution of features #quick recode of gender, will still do recoding during feature engineering g1 <- ds_high_tsh %>% dplyr$select(-subject_id, - charttime) %>%