From c629848502edd4ce1da5ecba4b3433b99d050576 Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Mon, 9 Jan 2023 09:07:17 -0500 Subject: [PATCH] Create 1-data-exploration.R --- ML/1-data-exploration.R | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 ML/1-data-exploration.R diff --git a/ML/1-data-exploration.R b/ML/1-data-exploration.R new file mode 100644 index 0000000..8052b6d --- /dev/null +++ b/ML/1-data-exploration.R @@ -0,0 +1,44 @@ +rm(list = ls(all.names = TRUE)) # Clear the memory of variables from previous run. +cat("\014") # Clear the console + + +# load packages ----------------------------------------------------------- + +box::use( + magrittr[`%>%`] + ,here[here] + ,dplyr + ,readr + ,tidyr + ,ggplot2 +) + + + + +# load data --------------------------------------------------------------- + +ds_high_tsh <- readr$read_rds( + here("ML","data-unshared","ds_high_tsh.RDS") + ) + + + +# data manipulation ------------------------------------------------------- + +#here I am adding a column to determine if the Free T4 Value is diagnostic or not +# using the FT4 Referance range low as the cut off (0.93) + + +ds_high_tsh <- ds_high_tsh %>% + dplyr$mutate(ft4_dia = dplyr$if_else(`50995` < 0.93, 1, 0)) + + + +# basic visualization ----------------------------------------------------- + +g1 <- ds_high_tsh %>% + dplyr$select(-subject_id, - charttime) %>% + dplyr$mutate(dplyr$across(gender, ~dplyr$recode(.,M = 1, F = 2))) %>% + tidyr$pivot_longer(cols = dplyr$everything()) %>% +