From bebaa34e7cd295932cfe1cba58b27e259a70402b Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Wed, 7 Jun 2023 14:08:53 -0400 Subject: [PATCH] update --- ML/2-modeling.R | 24 ++++++++++++++++++++++++ chapter4.qmd | 16 +++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/ML/2-modeling.R b/ML/2-modeling.R index 1d6b52c..b48e024 100644 --- a/ML/2-modeling.R +++ b/ML/2-modeling.R @@ -335,6 +335,30 @@ reg_test_results <- final_rf_reg_fit %>% tune::last_fit(split = model_data_split) + +reg_metrics(reg_test_results %>% tune::collect_predictions(), truth = FT4, estimate = .pred) + +final_reg_result_pred <- reg_test_results %>% tune::collect_predictions() + +ggplot(reg_test_results %>% tune::collect_predictions(), aes(x = FT4, y = .pred)) + + gp2$geom_abline(lty = 2) + + gp2$geom_point(alpha = 0.5) + + tune::coord_obs_pred() + +gp2$ggsave( + here("figures","reggression_pred.emf") + ,width = 7 + ,height = 7 + ,dpi = 300 + ,device = devEMF::emf +) +gp2$ggsave( + here("figures","reggression_pred.png") + ,width = 7 + ,height = 7 + ,dpi = 300 +) + ds_reg_class_pred <- reg_test_results %>% tune::collect_predictions() %>% dplyr::select(-id, -.config) %>% diff --git a/chapter4.qmd b/chapter4.qmd index 1c03466..9a663aa 100644 --- a/chapter4.qmd +++ b/chapter4.qmd @@ -68,8 +68,22 @@ the extent to which a feature has a \"meaningful\" impact on the predicted outcome [@laan2006]. As expected, TSH is the leading variable in importance rankings, leading all other variables by over 2000's points. The following three variables are all parts of a Complete Blood -Count (CBC), followed by the patients glucose value. +Count (CBC), followed by the patient's glucose value. ![Variable Importance Plot](figures/vip_class){#fig-vip-class} ## Predictability of Free T4 Results (Regression) + +Today, it has become widely accepted that a more sound approach to +assessing model performance is to assess the predictive accuracy via +loss functions. Loss functions are metrics that compare the predicted +values to the actual value (the output of a loss function is often +referred to as the error or pseudo residual) [@boehmke2020]. The loss +function used to evaluate the final model was selected as the Root Mean +Square Error, and the final testing data achieved an RMSE of 0.334. +@fig-reg-pred shows the plotted results. The predicted results were also +used to add the diagnostic classification of Free T4. These results +achieved an accuracy of 0.790, and thus very similar to the +classification model. + +![Regression Predictions Plot](figures/reggression_pred){#fig-reg-pred}