Update 2-modeling.R

This commit is contained in:
Kyle Belanger 2023-06-05 16:44:39 -04:00
parent cbcd76da52
commit a5fc4dd7d6

View file

@ -106,6 +106,8 @@ ys$accuracy(final_rf_predict,truth = ft4_dia, estimate = .pred_class )
final_conf_rf <- ys$conf_mat(final_rf_predict, ft4_dia, .pred_class) final_conf_rf <- ys$conf_mat(final_rf_predict, ft4_dia, .pred_class)
# fitting test data # fitting test data
class_test_results <- class_test_results <-
@ -120,75 +122,78 @@ class_test_result_conf_matrix <- ys$conf_mat(
ys$accuracy(class_test_results %>% tune::collect_predictions() ,truth = ft4_dia, estimate = .pred_class ) ys$accuracy(class_test_results %>% tune::collect_predictions() ,truth = ft4_dia, estimate = .pred_class )
class_test_results %>%
workflows::extract_fit_parsnip() %>%
vip::vip(num_features = 10)
# x-boost- class ---------------------------------------------------------- # x-boost- class ----------------------------------------------------------
x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>% # x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>%
r$step_rm(FT4) %>% # r$step_rm(FT4) %>%
r$step_impute_bag(r$all_predictors()) %>% # r$step_impute_bag(r$all_predictors()) %>%
r$step_dummy(gender) # r$step_dummy(gender)
#
xgb_spec <- # xgb_spec <-
p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(), # p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(),
min_n = tune(), sample_size = tune(), trees = tune()) %>% # min_n = tune(), sample_size = tune(), trees = tune()) %>%
p$set_engine("xgboost") %>% # p$set_engine("xgboost") %>%
p$set_mode("classification") # p$set_mode("classification")
#
xboost_wf <- wf$workflow() %>% # xboost_wf <- wf$workflow() %>%
wf$add_model(xgb_spec) %>% # wf$add_model(xgb_spec) %>%
wf$add_recipe(x_boost_rec) # wf$add_recipe(x_boost_rec)
#
xboost_parms <- p$extract_parameter_set_dials(xgb_spec) # xboost_parms <- p$extract_parameter_set_dials(xgb_spec)
#
# takes around 6 hours to tune # # takes around 6 hours to tune
# xboost_tune <- xboost_wf %>% # # xboost_tune <- xboost_wf %>%
# tune::tune_grid( # # tune::tune_grid(
# data_fold # # data_fold
# ,grid = xboost_parms%>% d$grid_regular() # # ,grid = xboost_parms%>% d$grid_regular()
# ,control = tune::control_grid(verbose = TRUE) # # ,control = tune::control_grid(verbose = TRUE)
# # )
#
#
# xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>%
# tune::select_best(metric = "accuracy")
#
#
# final_xboost_wf <- xboost_wf %>%
# tune::finalize_workflow(xboost_best_params)
#
# # fit training data to best model
#
# final_xboost_fit <- p$fit(final_xboost_wf, ds_train)
#
# final_xboost_predict <- ds_train %>%
# dplyr::select(ft4_dia) %>%
# dplyr::bind_cols(
# predict(final_xboost_fit, ds_train)
# ,predict(final_xboost_fit, ds_train, type = "prob")
# ) # )
#
# ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class )
xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>% #
tune::select_best(metric = "accuracy") # final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class)
#
#
final_xboost_wf <- xboost_wf %>% # # fitting test data
tune::finalize_workflow(xboost_best_params) #
# class_test_results_boost <-
# fit training data to best model # final_xboost_fit %>%
# tune::last_fit(split = model_data_split)
final_xboost_fit <- p$fit(final_xboost_wf, ds_train) #
#
final_xboost_predict <- ds_train %>% # ys$accuracy(class_test_results_boost %>% tune::collect_predictions()
dplyr::select(ft4_dia) %>% # ,truth = ft4_dia, estimate = .pred_class )
dplyr::bind_cols( #
predict(final_xboost_fit, ds_train) # class_test_result_conf_matrix <- ys$conf_mat(
,predict(final_xboost_fit, ds_train, type = "prob") # class_test_results_boost %>% tune::collect_predictions()
) # ,truth = ft4_dia
# ,estimate = .pred_class
ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class ) # )
#
final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class)
# fitting test data
class_test_results_boost <-
final_xboost_fit %>%
tune::last_fit(split = model_data_split)
ys$accuracy(class_test_results_boost %>% tune::collect_predictions()
,truth = ft4_dia, estimate = .pred_class )
class_test_result_conf_matrix <- ys$conf_mat(
class_test_results_boost %>% tune::collect_predictions()
,truth = ft4_dia
,estimate = .pred_class
)
# random forest regression ------------------------------------------------ # random forest regression ------------------------------------------------