From a5fc4dd7d647d541398137863749cc0161bdde9b Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Mon, 5 Jun 2023 16:44:39 -0400 Subject: [PATCH] Update 2-modeling.R --- ML/2-modeling.R | 133 +++++++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/ML/2-modeling.R b/ML/2-modeling.R index 1096549..aa05e52 100644 --- a/ML/2-modeling.R +++ b/ML/2-modeling.R @@ -106,6 +106,8 @@ ys$accuracy(final_rf_predict,truth = ft4_dia, estimate = .pred_class ) final_conf_rf <- ys$conf_mat(final_rf_predict, ft4_dia, .pred_class) + + # fitting test data class_test_results <- @@ -120,75 +122,78 @@ class_test_result_conf_matrix <- ys$conf_mat( ys$accuracy(class_test_results %>% tune::collect_predictions() ,truth = ft4_dia, estimate = .pred_class ) +class_test_results %>% + workflows::extract_fit_parsnip() %>% + vip::vip(num_features = 10) # x-boost- class ---------------------------------------------------------- -x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>% - r$step_rm(FT4) %>% - r$step_impute_bag(r$all_predictors()) %>% - r$step_dummy(gender) - -xgb_spec <- - p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(), - min_n = tune(), sample_size = tune(), trees = tune()) %>% - p$set_engine("xgboost") %>% - p$set_mode("classification") - -xboost_wf <- wf$workflow() %>% - wf$add_model(xgb_spec) %>% - wf$add_recipe(x_boost_rec) - -xboost_parms <- p$extract_parameter_set_dials(xgb_spec) - -# takes around 6 hours to tune -# xboost_tune <- xboost_wf %>% -# tune::tune_grid( -# data_fold -# ,grid = xboost_parms%>% d$grid_regular() -# ,control = tune::control_grid(verbose = TRUE) +# x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>% +# r$step_rm(FT4) %>% +# r$step_impute_bag(r$all_predictors()) %>% +# r$step_dummy(gender) +# +# xgb_spec <- +# p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(), +# min_n = tune(), sample_size = tune(), trees = tune()) %>% +# p$set_engine("xgboost") %>% +# p$set_mode("classification") +# +# xboost_wf <- wf$workflow() %>% +# wf$add_model(xgb_spec) %>% +# wf$add_recipe(x_boost_rec) +# +# xboost_parms <- p$extract_parameter_set_dials(xgb_spec) +# +# # takes around 6 hours to tune +# # xboost_tune <- xboost_wf %>% +# # tune::tune_grid( +# # data_fold +# # ,grid = xboost_parms%>% d$grid_regular() +# # ,control = tune::control_grid(verbose = TRUE) +# # ) +# +# +# xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>% +# tune::select_best(metric = "accuracy") +# +# +# final_xboost_wf <- xboost_wf %>% +# tune::finalize_workflow(xboost_best_params) +# +# # fit training data to best model +# +# final_xboost_fit <- p$fit(final_xboost_wf, ds_train) +# +# final_xboost_predict <- ds_train %>% +# dplyr::select(ft4_dia) %>% +# dplyr::bind_cols( +# predict(final_xboost_fit, ds_train) +# ,predict(final_xboost_fit, ds_train, type = "prob") # ) - - -xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>% - tune::select_best(metric = "accuracy") - - -final_xboost_wf <- xboost_wf %>% - tune::finalize_workflow(xboost_best_params) - -# fit training data to best model - -final_xboost_fit <- p$fit(final_xboost_wf, ds_train) - -final_xboost_predict <- ds_train %>% - dplyr::select(ft4_dia) %>% - dplyr::bind_cols( - predict(final_xboost_fit, ds_train) - ,predict(final_xboost_fit, ds_train, type = "prob") - ) - -ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class ) - -final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class) - - -# fitting test data - -class_test_results_boost <- - final_xboost_fit %>% - tune::last_fit(split = model_data_split) - - -ys$accuracy(class_test_results_boost %>% tune::collect_predictions() - ,truth = ft4_dia, estimate = .pred_class ) - -class_test_result_conf_matrix <- ys$conf_mat( - class_test_results_boost %>% tune::collect_predictions() - ,truth = ft4_dia - ,estimate = .pred_class -) - +# +# ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class ) +# +# final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class) +# +# +# # fitting test data +# +# class_test_results_boost <- +# final_xboost_fit %>% +# tune::last_fit(split = model_data_split) +# +# +# ys$accuracy(class_test_results_boost %>% tune::collect_predictions() +# ,truth = ft4_dia, estimate = .pred_class ) +# +# class_test_result_conf_matrix <- ys$conf_mat( +# class_test_results_boost %>% tune::collect_predictions() +# ,truth = ft4_dia +# ,estimate = .pred_class +# ) +# # random forest regression ------------------------------------------------