Update 2-modeling.R
This commit is contained in:
parent
cbcd76da52
commit
a5fc4dd7d6
1 changed files with 69 additions and 64 deletions
133
ML/2-modeling.R
133
ML/2-modeling.R
|
@ -106,6 +106,8 @@ ys$accuracy(final_rf_predict,truth = ft4_dia, estimate = .pred_class )
|
||||||
|
|
||||||
final_conf_rf <- ys$conf_mat(final_rf_predict, ft4_dia, .pred_class)
|
final_conf_rf <- ys$conf_mat(final_rf_predict, ft4_dia, .pred_class)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# fitting test data
|
# fitting test data
|
||||||
|
|
||||||
class_test_results <-
|
class_test_results <-
|
||||||
|
@ -120,75 +122,78 @@ class_test_result_conf_matrix <- ys$conf_mat(
|
||||||
|
|
||||||
ys$accuracy(class_test_results %>% tune::collect_predictions() ,truth = ft4_dia, estimate = .pred_class )
|
ys$accuracy(class_test_results %>% tune::collect_predictions() ,truth = ft4_dia, estimate = .pred_class )
|
||||||
|
|
||||||
|
class_test_results %>%
|
||||||
|
workflows::extract_fit_parsnip() %>%
|
||||||
|
vip::vip(num_features = 10)
|
||||||
|
|
||||||
|
|
||||||
# x-boost- class ----------------------------------------------------------
|
# x-boost- class ----------------------------------------------------------
|
||||||
|
|
||||||
x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>%
|
# x_boost_rec <- r$recipe(ft4_dia ~ . , data = ds_train) %>%
|
||||||
r$step_rm(FT4) %>%
|
# r$step_rm(FT4) %>%
|
||||||
r$step_impute_bag(r$all_predictors()) %>%
|
# r$step_impute_bag(r$all_predictors()) %>%
|
||||||
r$step_dummy(gender)
|
# r$step_dummy(gender)
|
||||||
|
#
|
||||||
xgb_spec <-
|
# xgb_spec <-
|
||||||
p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(),
|
# p$boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(),
|
||||||
min_n = tune(), sample_size = tune(), trees = tune()) %>%
|
# min_n = tune(), sample_size = tune(), trees = tune()) %>%
|
||||||
p$set_engine("xgboost") %>%
|
# p$set_engine("xgboost") %>%
|
||||||
p$set_mode("classification")
|
# p$set_mode("classification")
|
||||||
|
#
|
||||||
xboost_wf <- wf$workflow() %>%
|
# xboost_wf <- wf$workflow() %>%
|
||||||
wf$add_model(xgb_spec) %>%
|
# wf$add_model(xgb_spec) %>%
|
||||||
wf$add_recipe(x_boost_rec)
|
# wf$add_recipe(x_boost_rec)
|
||||||
|
#
|
||||||
xboost_parms <- p$extract_parameter_set_dials(xgb_spec)
|
# xboost_parms <- p$extract_parameter_set_dials(xgb_spec)
|
||||||
|
#
|
||||||
# takes around 6 hours to tune
|
# # takes around 6 hours to tune
|
||||||
# xboost_tune <- xboost_wf %>%
|
# # xboost_tune <- xboost_wf %>%
|
||||||
# tune::tune_grid(
|
# # tune::tune_grid(
|
||||||
# data_fold
|
# # data_fold
|
||||||
# ,grid = xboost_parms%>% d$grid_regular()
|
# # ,grid = xboost_parms%>% d$grid_regular()
|
||||||
# ,control = tune::control_grid(verbose = TRUE)
|
# # ,control = tune::control_grid(verbose = TRUE)
|
||||||
|
# # )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>%
|
||||||
|
# tune::select_best(metric = "accuracy")
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# final_xboost_wf <- xboost_wf %>%
|
||||||
|
# tune::finalize_workflow(xboost_best_params)
|
||||||
|
#
|
||||||
|
# # fit training data to best model
|
||||||
|
#
|
||||||
|
# final_xboost_fit <- p$fit(final_xboost_wf, ds_train)
|
||||||
|
#
|
||||||
|
# final_xboost_predict <- ds_train %>%
|
||||||
|
# dplyr::select(ft4_dia) %>%
|
||||||
|
# dplyr::bind_cols(
|
||||||
|
# predict(final_xboost_fit, ds_train)
|
||||||
|
# ,predict(final_xboost_fit, ds_train, type = "prob")
|
||||||
# )
|
# )
|
||||||
|
#
|
||||||
|
# ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class )
|
||||||
xboost_best_params <- readRDS(here::here("ML", "outputs", "xboosttune_class.rds")) %>%
|
#
|
||||||
tune::select_best(metric = "accuracy")
|
# final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class)
|
||||||
|
#
|
||||||
|
#
|
||||||
final_xboost_wf <- xboost_wf %>%
|
# # fitting test data
|
||||||
tune::finalize_workflow(xboost_best_params)
|
#
|
||||||
|
# class_test_results_boost <-
|
||||||
# fit training data to best model
|
# final_xboost_fit %>%
|
||||||
|
# tune::last_fit(split = model_data_split)
|
||||||
final_xboost_fit <- p$fit(final_xboost_wf, ds_train)
|
#
|
||||||
|
#
|
||||||
final_xboost_predict <- ds_train %>%
|
# ys$accuracy(class_test_results_boost %>% tune::collect_predictions()
|
||||||
dplyr::select(ft4_dia) %>%
|
# ,truth = ft4_dia, estimate = .pred_class )
|
||||||
dplyr::bind_cols(
|
#
|
||||||
predict(final_xboost_fit, ds_train)
|
# class_test_result_conf_matrix <- ys$conf_mat(
|
||||||
,predict(final_xboost_fit, ds_train, type = "prob")
|
# class_test_results_boost %>% tune::collect_predictions()
|
||||||
)
|
# ,truth = ft4_dia
|
||||||
|
# ,estimate = .pred_class
|
||||||
ys$accuracy(final_xboost_predict,truth = ft4_dia, estimate = .pred_class )
|
# )
|
||||||
|
#
|
||||||
final_conf_xboost <- ys$conf_mat(final_xboost_predict, ft4_dia, .pred_class)
|
|
||||||
|
|
||||||
|
|
||||||
# fitting test data
|
|
||||||
|
|
||||||
class_test_results_boost <-
|
|
||||||
final_xboost_fit %>%
|
|
||||||
tune::last_fit(split = model_data_split)
|
|
||||||
|
|
||||||
|
|
||||||
ys$accuracy(class_test_results_boost %>% tune::collect_predictions()
|
|
||||||
,truth = ft4_dia, estimate = .pred_class )
|
|
||||||
|
|
||||||
class_test_result_conf_matrix <- ys$conf_mat(
|
|
||||||
class_test_results_boost %>% tune::collect_predictions()
|
|
||||||
,truth = ft4_dia
|
|
||||||
,estimate = .pred_class
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# random forest regression ------------------------------------------------
|
# random forest regression ------------------------------------------------
|
||||||
|
|
Loading…
Reference in a new issue