update data
This commit is contained in:
parent
1b72be4248
commit
c7aa0b2da1
3 changed files with 17 additions and 19 deletions
|
@ -107,7 +107,7 @@ ds_final <- ds1 %>%
|
|||
) %>%
|
||||
dplyr$filter(ft4_dia != "Normal TSH") %>%
|
||||
dplyr$rename(!!!test_list_names) %>%
|
||||
dplyr$select(-FT4) %>%
|
||||
# dplyr$select(-FT4) %>%
|
||||
dplyr$relocate(gender, anchor_age)
|
||||
|
||||
# save data ---------------------------------------------------------------
|
||||
|
|
|
@ -60,7 +60,7 @@ ds_recode <- ds1 %>%
|
|||
#summary Table
|
||||
|
||||
summary_tbl <- ds1 %>%
|
||||
dplyr$select(-subject_id, -charttime) %>%
|
||||
dplyr$select(-subject_id, -charttime, -FT4) %>%
|
||||
gtsummary$tbl_summary(
|
||||
by = ft4_dia
|
||||
,missing = "no"
|
||||
|
@ -83,7 +83,7 @@ summary_tbl <- ds1 %>%
|
|||
# corr-plot ---------------------------------------------------------------
|
||||
|
||||
corr_plot <- ds1 %>%
|
||||
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
||||
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime, -FT4) %>%
|
||||
dplyr$rename(Age = anchor_age) %>%
|
||||
GGally$ggcorr(nbreaks = 5, palette = "Greys"
|
||||
,label = TRUE, label_size = 3, label_color = "white"
|
||||
|
@ -110,7 +110,7 @@ gp2$ggsave(
|
|||
|
||||
#quick recode of gender, will still do recoding during feature engineering
|
||||
g1 <- ds1 %>%
|
||||
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
|
||||
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime, -FT4) %>%
|
||||
tidyr$pivot_longer(cols = dplyr$everything()) %>%
|
||||
ggplot(aes(x = value)) +
|
||||
gp2$geom_histogram(na.rm = TRUE) +
|
||||
|
@ -139,7 +139,7 @@ gp2$ggsave(
|
|||
|
||||
# this takes a bit to load. No discernible patterns in the data
|
||||
g2 <- ds_recode %>%
|
||||
dplyr$select(-gender, -subject_id, -charttime) %>%
|
||||
dplyr$select(-gender, -subject_id, -charttime, -FT4) %>%
|
||||
dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
|
||||
tidyr$pivot_longer(cols = !ft4_dia) %>%
|
||||
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
|
||||
|
|
|
@ -51,12 +51,13 @@ table(ds_test$ft4_dia) %>% prop.table()
|
|||
|
||||
|
||||
rf_model <- p$rand_forest(trees = 1900) %>%
|
||||
p$set_engine("ranger") %>% p$set_mode("classification")
|
||||
p$set_engine("ranger") %>% p$set_mode("regression")
|
||||
|
||||
rf_recipe <- r$recipe(ft4_dia ~ . , data = ds_train) %>%
|
||||
rf_recipe <- r$recipe(FT4 ~ . , data = ds_train) %>%
|
||||
r$update_role(subject_id, new_role = "id") %>%
|
||||
r$update_role(charttime, new_role = "time") %>%
|
||||
r$step_impute_bag(r$all_numeric())
|
||||
r$update_role(ft4_dia, new_role = "class") %>%
|
||||
r$step_impute_bag(r$all_predictors())
|
||||
|
||||
|
||||
|
||||
|
@ -67,18 +68,15 @@ rf_workflow <- wf$workflow() %>%
|
|||
rf_fit <- p$fit(rf_workflow, ds_train)
|
||||
|
||||
rf_predict <- ds_train %>%
|
||||
dplyr::select(ft4_dia) %>%
|
||||
dplyr::bind_cols(
|
||||
predict(rf_fit, ds_train)
|
||||
,predict(rf_fit, ds_train, type = "prob")
|
||||
)
|
||||
dplyr::select(FT4) %>%
|
||||
dplyr::bind_cols(predict(rf_fit, ds_train))
|
||||
|
||||
ys$accuracy(rf_predict, ft4_dia, .pred_class)
|
||||
ys$conf_mat(rf_predict, ft4_dia, .pred_class)
|
||||
|
||||
ggplot2::autoplot(ys$roc_curve(rf_predict, ft4_dia, `.pred_Hypo`:`.pred_Non-Hyper`))
|
||||
gp2$ggplot(rf_predict, gp2$aes(x = FT4, y = .pred)) +
|
||||
gp2$geom_point()
|
||||
|
||||
ys$sensitivity(rf_predict,ft4_dia, .pred_class, estimator = "macro_weighted")
|
||||
ys$sensitivity(rf_predict,ft4_dia, .pred_class, estimator = "micro")
|
||||
ys$roc_auc(rf_predict, ft4_dia, `.pred_Hypo`, `.pred_Non-Hypo`, `.pred_Hyper`, `.pred_Non-Hyper`)
|
||||
ys$rmse(rf_predict, FT4, .pred)
|
||||
|
||||
metrics <- ys$metric_set(ys$rmse, ys$rsq, ys$mae)
|
||||
|
||||
metrics(rf_predict, FT4, .pred)
|
||||
|
|
Loading…
Reference in a new issue