update data

This commit is contained in:
Kyle Belanger 2023-01-31 19:33:24 -05:00
parent 1b72be4248
commit c7aa0b2da1
3 changed files with 17 additions and 19 deletions

View file

@ -107,7 +107,7 @@ ds_final <- ds1 %>%
) %>%
dplyr$filter(ft4_dia != "Normal TSH") %>%
dplyr$rename(!!!test_list_names) %>%
dplyr$select(-FT4) %>%
# dplyr$select(-FT4) %>%
dplyr$relocate(gender, anchor_age)
# save data ---------------------------------------------------------------

View file

@ -60,7 +60,7 @@ ds_recode <- ds1 %>%
#summary Table
summary_tbl <- ds1 %>%
dplyr$select(-subject_id, -charttime) %>%
dplyr$select(-subject_id, -charttime, -FT4) %>%
gtsummary$tbl_summary(
by = ft4_dia
,missing = "no"
@ -83,7 +83,7 @@ summary_tbl <- ds1 %>%
# corr-plot ---------------------------------------------------------------
corr_plot <- ds1 %>%
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime, -FT4) %>%
dplyr$rename(Age = anchor_age) %>%
GGally$ggcorr(nbreaks = 5, palette = "Greys"
,label = TRUE, label_size = 3, label_color = "white"
@ -110,7 +110,7 @@ gp2$ggsave(
#quick recode of gender, will still do recoding during feature engineering
g1 <- ds1 %>%
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
dplyr$select(-gender,-ft4_dia, -subject_id, -charttime, -FT4) %>%
tidyr$pivot_longer(cols = dplyr$everything()) %>%
ggplot(aes(x = value)) +
gp2$geom_histogram(na.rm = TRUE) +
@ -139,7 +139,7 @@ gp2$ggsave(
# this takes a bit to load. No discernible patterns in the data
g2 <- ds_recode %>%
dplyr$select(-gender, -subject_id, -charttime) %>%
dplyr$select(-gender, -subject_id, -charttime, -FT4) %>%
dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
tidyr$pivot_longer(cols = !ft4_dia) %>%
ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +

View file

@ -51,12 +51,13 @@ table(ds_test$ft4_dia) %>% prop.table()
rf_model <- p$rand_forest(trees = 1900) %>%
p$set_engine("ranger") %>% p$set_mode("classification")
p$set_engine("ranger") %>% p$set_mode("regression")
rf_recipe <- r$recipe(ft4_dia ~ . , data = ds_train) %>%
rf_recipe <- r$recipe(FT4 ~ . , data = ds_train) %>%
r$update_role(subject_id, new_role = "id") %>%
r$update_role(charttime, new_role = "time") %>%
r$step_impute_bag(r$all_numeric())
r$update_role(ft4_dia, new_role = "class") %>%
r$step_impute_bag(r$all_predictors())
@ -67,18 +68,15 @@ rf_workflow <- wf$workflow() %>%
rf_fit <- p$fit(rf_workflow, ds_train)
rf_predict <- ds_train %>%
dplyr::select(ft4_dia) %>%
dplyr::bind_cols(
predict(rf_fit, ds_train)
,predict(rf_fit, ds_train, type = "prob")
)
dplyr::select(FT4) %>%
dplyr::bind_cols(predict(rf_fit, ds_train))
ys$accuracy(rf_predict, ft4_dia, .pred_class)
ys$conf_mat(rf_predict, ft4_dia, .pred_class)
ggplot2::autoplot(ys$roc_curve(rf_predict, ft4_dia, `.pred_Hypo`:`.pred_Non-Hyper`))
gp2$ggplot(rf_predict, gp2$aes(x = FT4, y = .pred)) +
gp2$geom_point()
ys$sensitivity(rf_predict,ft4_dia, .pred_class, estimator = "macro_weighted")
ys$sensitivity(rf_predict,ft4_dia, .pred_class, estimator = "micro")
ys$roc_auc(rf_predict, ft4_dia, `.pred_Hypo`, `.pred_Non-Hypo`, `.pred_Hyper`, `.pred_Non-Hyper`)
ys$rmse(rf_predict, FT4, .pred)
metrics <- ys$metric_set(ys$rmse, ys$rsq, ys$mae)
metrics(rf_predict, FT4, .pred)