Update 1-data-exploration.R
This commit is contained in:
parent
4a4e617918
commit
c331ab3f38
1 changed files with 25 additions and 41 deletions
|
@ -11,6 +11,7 @@ box::use(
|
|||
,readr
|
||||
,tidyr
|
||||
,gp2 = ggplot2[ggplot, aes]
|
||||
,gtsummary
|
||||
)
|
||||
|
||||
|
||||
|
@ -54,64 +55,47 @@ ds_low_tsh_raw <- readr$read_rds(
|
|||
|
||||
|
||||
ds_high_tsh <- ds_high_tsh_raw %>%
|
||||
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` < 0.93, 1, 0)) %>%
|
||||
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` < 0.93, TRUE, FALSE)) %>%
|
||||
#can rename with a vector using either of these
|
||||
# dplyr$rename_with(~names(test_list_names), dplyr$all_of(test_list_names))
|
||||
dplyr$rename(!!!test_list_names) %>%
|
||||
dplyr$select(-FT4)
|
||||
dplyr$select(-FT4, -subject_id, -charttime) %>%
|
||||
dplyr$relocate(gender, anchor_age)
|
||||
|
||||
|
||||
ds_low_tsh <- ds_low_tsh_raw %>%
|
||||
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` > 1.7, 1, 0)) %>%
|
||||
dplyr$mutate(ft4_dia = dplyr$if_else(`50995` > 1.7, TRUE, FALSE)) %>%
|
||||
#can rename with a vector using either of these
|
||||
# dplyr$rename_with(~names(test_list_names), dplyr$all_of(test_list_names))
|
||||
dplyr$rename(!!!test_list_names) %>%
|
||||
dplyr$select(-FT4)
|
||||
dplyr$select(-FT4, -subject_id, -charttime) %>%
|
||||
dplyr$relocate(gender, anchor_age)
|
||||
|
||||
|
||||
|
||||
# basic visualization -----------------------------------------------------
|
||||
|
||||
#graph and table of missing tests
|
||||
|
||||
missing_count <- function(ds){
|
||||
|
||||
df <- dplyr$as_tibble(colSums(is.na(ds)), rownames = NA ) %>%
|
||||
tibble::rownames_to_column()
|
||||
|
||||
graph <- df %>%
|
||||
ggplot(aes(x = rowname, y = value)) +
|
||||
gp2$geom_col() +
|
||||
gp2$theme(
|
||||
axis.text.x = gp2$element_text(angle = 90)
|
||||
)
|
||||
|
||||
return(
|
||||
list(
|
||||
df = df
|
||||
,graph = graph
|
||||
)
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
high_missing <- missing_count(ds_high_tsh)
|
||||
low_missing <- missing_count(ds_low_tsh)
|
||||
|
||||
missing_table <- high_missing$df %>%
|
||||
dplyr$left_join(low_missing$df, by = "rowname")
|
||||
|
||||
#summary Table
|
||||
#use this instead of making myself
|
||||
ds_high_tsh %>% gtsummary::tbl_summary(by = ft4_dia)
|
||||
high_table_summary <- ds_high_tsh %>%
|
||||
gtsummary$tbl_summary(
|
||||
by = ft4_dia
|
||||
,missing = "no"
|
||||
,label = list(
|
||||
gender ~ "Gender"
|
||||
,anchor_age ~ "Age"
|
||||
)
|
||||
,statistic = list(
|
||||
gtsummary$all_continuous() ~ "{median} ({min}-{max})
|
||||
/n Missing: {N_miss} "
|
||||
)
|
||||
) %>%
|
||||
gtsummary$add_p() %>%
|
||||
gtsummary$modify_header(label = "**Variable**") %>%
|
||||
gtsummary$modify_spanning_header(gtsummary$all_stat_cols() ~ "**Free T4 Diagnostic**")
|
||||
|
||||
high_table_summary
|
||||
|
||||
# count of diagnostics ft4 and freq
|
||||
t1 <- ds_high_tsh %>%
|
||||
dplyr$count(ft4_dia) %>%
|
||||
dplyr$mutate(freq = n/sum(n)) %>%
|
||||
knitr::kable()
|
||||
|
||||
t1
|
||||
|
||||
# correlation plot
|
||||
ds_corr <- cor(ds_high_tsh %>% dplyr$select(-subject_id, - charttime)
|
||||
|
|
Loading…
Reference in a new issue