Update 1-data-exploration.R
This commit is contained in:
parent
66f213bc5a
commit
4486218a6e
1 changed files with 33 additions and 10 deletions
|
@ -38,6 +38,24 @@ ds1 <- ds0 %>%
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ds_recode <- ds1 %>%
|
||||||
|
dplyr$mutate(
|
||||||
|
dplyr$across(
|
||||||
|
gender
|
||||||
|
,~dplyr$recode(.,"M" = 1, "F" = 2)
|
||||||
|
)
|
||||||
|
,dplyr$across(
|
||||||
|
ft4_dia
|
||||||
|
,~dplyr$recode(.
|
||||||
|
,"Hypo" = 1
|
||||||
|
,"Non-Hypo" = 2
|
||||||
|
,"Normal TSH" = 3
|
||||||
|
,"Hyper" = 4
|
||||||
|
,"Non-Hyper" = 5
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# basic visualization -----------------------------------------------------
|
# basic visualization -----------------------------------------------------
|
||||||
|
|
||||||
|
@ -70,39 +88,44 @@ summary_tbl <- ds1 %>%
|
||||||
|
|
||||||
|
|
||||||
# correlation plot
|
# correlation plot
|
||||||
ds_corr <- cor(ds_high_tsh %>%
|
ds_corr <- cor(ds_recode,use = "complete.obs")
|
||||||
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2)))
|
|
||||||
,use = "complete.obs")
|
|
||||||
|
|
||||||
|
|
||||||
#code for saving corr plot
|
#code for saving corr plot
|
||||||
png(here("figures","corrplot_high.png"), type = 'cairo')
|
png(here("figures","corrplot.png"), type = 'cairo')
|
||||||
corrplot::corrplot(ds_corr, method = "number")
|
corrplot::corrplot(ds_corr, method = "number")
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
#quick recode of gender, will still do recoding during feature engineering
|
#quick recode of gender, will still do recoding during feature engineering
|
||||||
g1 <- ds_high_tsh %>%
|
g1 <- ds1 %>%
|
||||||
dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2))) %>%
|
dplyr$select(-gender, -ft4_dia) %>%
|
||||||
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
||||||
ggplot(aes(x = value)) +
|
ggplot(aes(x = value)) +
|
||||||
gp2$geom_histogram(na.rm = TRUE) +
|
gp2$geom_histogram(na.rm = TRUE) +
|
||||||
gp2$facet_wrap(~name, scales = "free")
|
gp2$facet_wrap(~name, scales = "free") +
|
||||||
|
gp2$theme_bw() +
|
||||||
|
gp2$labs(
|
||||||
|
x = NULL
|
||||||
|
,y = NULL
|
||||||
|
)
|
||||||
g1
|
g1
|
||||||
|
|
||||||
|
|
||||||
# this takes a bit to load. No discernable paterns in the data
|
# this takes a bit to load. No discernable paterns in the data
|
||||||
g2 <- ds_high_tsh %>%
|
g2 <- ds_recode %>%
|
||||||
dplyr$select(-gender) %>%
|
dplyr$select(-gender) %>%
|
||||||
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
||||||
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
||||||
gp2$geom_boxplot(outlier.shape = NA, na.rm = TRUE) +
|
gp2$geom_boxplot(outlier.shape = NA, na.rm = TRUE) +
|
||||||
gp2$geom_jitter(size=.7, width=.1, alpha=.5, na.rm = TRUE) +
|
gp2$geom_jitter(size=.7, width=.1, alpha=.5, na.rm = TRUE) +
|
||||||
gp2$facet_wrap(~name, scales = "free")
|
gp2$facet_wrap(~name, scales = "free") +
|
||||||
|
gp2$theme_bw() +
|
||||||
|
gp2$scale_fill_brewer(palette = "Greys")
|
||||||
|
|
||||||
g2
|
g2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue