diff --git a/.gitignore b/.gitignore index e3c9989..fa89596 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,4 @@ ML/outputs Final Paper/ +test.Rda diff --git a/ML/2-modeling.R b/ML/2-modeling.R index b9735d8..a3cf251 100644 --- a/ML/2-modeling.R +++ b/ML/2-modeling.R @@ -47,9 +47,12 @@ ds_train <- rsample$training(model_data_split) ds_test <- rsample$testing(model_data_split) # verify distribution of data -table(ds_train$ft4_dia) %>% prop.table() -table(ds_test$ft4_dia) %>% prop.table() +strata1 <- table(ds_train$ft4_dia) %>% prop.table() %>% tibble::enframe() %>% dplyr::rename(Train = value) +strata2 <- table(ds_test$ft4_dia) %>% prop.table() %>% tibble::enframe() %>% dplyr::rename(Test = value) +strata_table <- strata1 %>% + dplyr::left_join(strata2) %>% + dplyr::rename(Class = name) # random forest classification ----------------------------------------------------------- diff --git a/_quarto.yml b/_quarto.yml index a5a5489..71a5344 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -10,6 +10,8 @@ book: - chapter1.qmd - chapter2.qmd - chapter3.qmd + - chapter4.qmd + - chapter5.qmd - references.qmd abstract: "This is a test to see what happens with this" diff --git a/chapter4.qmd b/chapter4.qmd new file mode 100644 index 0000000..980ab16 --- /dev/null +++ b/chapter4.qmd @@ -0,0 +1,45 @@ +# Results + +```{r} +#| include: false +#| cache: true + +library(magrittr) +load("test.Rda") + +``` + +The final data set used for this analysis consisted of 11,340 +observations. All observations contained a TSH and Free T4 result and +less than three missing results from all other analytes selected for the +study. The dataset was then randomly split into a training set +containing 9071 observations and a testing set containing 2269 +observations. The data was split using stratification of the Free T4 +laboratory diagnostic value. @tbl-strata shows the split percentages. + +```{r} +#| label: tbl-strata +#| tbl-cap: Data Stratification +#| echo: false + +strata_table %>% knitr::kable() + +``` + +First, the report shows the ability of classification algorithms to +predict whether Free T4 will be diagnostic, with the prediction quality +measured by Area Under Curve (AUC) and accuracy. Data regarding the +univariate association between each predictor analyte and the Free T4 +Diagnostic value is then presented. Finally, data is presented with the +extent to which FT4 can be predicted by examining the correlation +statistics denoting the relationship between measured and predicted Free +T4 values. + +## Predictability of Free T4 Classifications + +In clinical decision-making, a key consideration in interpreting +numerical laboratory results is often just whether the results fall +within the normal reference range [@luo2016]. In the case of Free T4 +reflex testing, the results will either fall within the normal range +indicating the Free T4 is not diagnostic of Hyper or Hypo Throydism, or +they will fall outside those ranges indicating they are diagnostic. diff --git a/chapter5.qmd b/chapter5.qmd new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/chapter5.qmd @@ -0,0 +1 @@ + diff --git a/references.bib b/references.bib index 60cb331..526df30 100644 --- a/references.bib +++ b/references.bib @@ -335,3 +335,18 @@ DOI: 10.13026/S6N6-XD98} url = {https://dl.acm.org/doi/10.1145/2939672.2939785}, address = {New York, NY, USA} } + +@article{luo2016, + title = {Using Machine Learning to Predict Laboratory Test Results}, + author = {Luo, Yuan and Szolovits, Peter and Dighe, Anand S. and Baron, Jason M.}, + year = {2016}, + month = {06}, + date = {2016-06}, + journal = {American Journal of Clinical Pathology}, + pages = {778--788}, + volume = {145}, + number = {6}, + doi = {10.1093/ajcp/aqw064}, + note = {PMID: 27329638}, + langid = {eng} +}