Update 1-data-exploration.R
This commit is contained in:
parent
4b794c104d
commit
ff36565211
1 changed files with 3 additions and 3 deletions
|
@ -62,6 +62,7 @@ ds_high_tsh <- ds_high_tsh_raw %>%
|
||||||
|
|
||||||
# basic visualization -----------------------------------------------------
|
# basic visualization -----------------------------------------------------
|
||||||
|
|
||||||
|
# count of missing values
|
||||||
g_count <- dplyr$as_tibble(colSums(is.na(ds_high_tsh)), rownames = NA ) %>%
|
g_count <- dplyr$as_tibble(colSums(is.na(ds_high_tsh)), rownames = NA ) %>%
|
||||||
tibble::rownames_to_column() %>%
|
tibble::rownames_to_column() %>%
|
||||||
ggplot(aes(x = rowname, y = value)) +
|
ggplot(aes(x = rowname, y = value)) +
|
||||||
|
@ -72,6 +73,7 @@ g_count <- dplyr$as_tibble(colSums(is.na(ds_high_tsh)), rownames = NA ) %>%
|
||||||
|
|
||||||
g_count
|
g_count
|
||||||
|
|
||||||
|
#corrleation plot
|
||||||
ds_corr <- cor(ds_high_tsh %>% dplyr$select(-subject_id, - charttime)
|
ds_corr <- cor(ds_high_tsh %>% dplyr$select(-subject_id, - charttime)
|
||||||
%>% dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2)))
|
%>% dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2)))
|
||||||
,use = "complete.obs")
|
,use = "complete.obs")
|
||||||
|
@ -86,9 +88,7 @@ dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# quick view of distribution of features
|
||||||
|
|
||||||
|
|
||||||
#quick recode of gender, will still do recoding during feature engineering
|
#quick recode of gender, will still do recoding during feature engineering
|
||||||
g1 <- ds_high_tsh %>%
|
g1 <- ds_high_tsh %>%
|
||||||
dplyr$select(-subject_id, - charttime) %>%
|
dplyr$select(-subject_id, - charttime) %>%
|
||||||
|
|
Loading…
Reference in a new issue