Update 1-data-exploration.R
This commit is contained in:
parent
538f9f76e5
commit
bbe5cd1ae8
1 changed files with 4 additions and 4 deletions
|
@ -30,7 +30,6 @@ ds0 <- readr$read_rds(here("ML","data-unshared","ds_final.RDS"))
|
||||||
|
|
||||||
|
|
||||||
ds1 <- ds0 %>%
|
ds1 <- ds0 %>%
|
||||||
dplyr$select(-subject_id, -charttime) %>%
|
|
||||||
dplyr$mutate(dplyr$across(
|
dplyr$mutate(dplyr$across(
|
||||||
ft4_dia
|
ft4_dia
|
||||||
, ~factor(., levels = c("Hypo", "Non-Hypo","Hyper", "Non-Hyper")
|
, ~factor(., levels = c("Hypo", "Non-Hypo","Hyper", "Non-Hyper")
|
||||||
|
@ -61,6 +60,7 @@ ds_recode <- ds1 %>%
|
||||||
#summary Table
|
#summary Table
|
||||||
|
|
||||||
summary_tbl <- ds1 %>%
|
summary_tbl <- ds1 %>%
|
||||||
|
dplyr$select(-subject_id, -charttime) %>%
|
||||||
gtsummary$tbl_summary(
|
gtsummary$tbl_summary(
|
||||||
by = ft4_dia
|
by = ft4_dia
|
||||||
,missing = "no"
|
,missing = "no"
|
||||||
|
@ -88,7 +88,7 @@ summary_tbl <- ds1 %>%
|
||||||
|
|
||||||
# correlation plot
|
# correlation plot
|
||||||
corr_plot <- cor(
|
corr_plot <- cor(
|
||||||
ds1 %>% dplyr$select(-gender,-ft4_dia)
|
ds1 %>% dplyr$select(-gender,-ft4_dia, -subject_id, -charttime)
|
||||||
,use = "complete.obs"
|
,use = "complete.obs"
|
||||||
) %>%
|
) %>%
|
||||||
corrplot::corrplot(method = "number", type = "lower", tl.col = "black", tl.srt = 45
|
corrplot::corrplot(method = "number", type = "lower", tl.col = "black", tl.srt = 45
|
||||||
|
@ -105,7 +105,7 @@ dev.off()
|
||||||
|
|
||||||
#quick recode of gender, will still do recoding during feature engineering
|
#quick recode of gender, will still do recoding during feature engineering
|
||||||
g1 <- ds1 %>%
|
g1 <- ds1 %>%
|
||||||
dplyr$select(-gender, -ft4_dia) %>%
|
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
||||||
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
||||||
ggplot(aes(x = value)) +
|
ggplot(aes(x = value)) +
|
||||||
gp2$geom_histogram(na.rm = TRUE) +
|
gp2$geom_histogram(na.rm = TRUE) +
|
||||||
|
@ -128,7 +128,7 @@ gp2$ggsave(
|
||||||
|
|
||||||
# this takes a bit to load. No discernible patterns in the data
|
# this takes a bit to load. No discernible patterns in the data
|
||||||
g2 <- ds_recode %>%
|
g2 <- ds_recode %>%
|
||||||
dplyr$select(-gender) %>%
|
dplyr$select(-gender, -subject_id, -charttime) %>%
|
||||||
dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
|
dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
|
||||||
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
||||||
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
||||||
|
|
Loading…
Reference in a new issue