Update 1-data-exploration.R
This commit is contained in:
		
							parent
							
								
									538f9f76e5
								
							
						
					
					
						commit
						bbe5cd1ae8
					
				
					 1 changed files with 4 additions and 4 deletions
				
			
		| 
						 | 
					@ -30,7 +30,6 @@ ds0 <- readr$read_rds(here("ML","data-unshared","ds_final.RDS"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ds1 <- ds0 %>%
 | 
					ds1 <- ds0 %>%
 | 
				
			||||||
  dplyr$select(-subject_id, -charttime) %>%
 | 
					 | 
				
			||||||
  dplyr$mutate(dplyr$across(
 | 
					  dplyr$mutate(dplyr$across(
 | 
				
			||||||
    ft4_dia
 | 
					    ft4_dia
 | 
				
			||||||
    , ~factor(., levels = c("Hypo", "Non-Hypo","Hyper", "Non-Hyper")
 | 
					    , ~factor(., levels = c("Hypo", "Non-Hypo","Hyper", "Non-Hyper")
 | 
				
			||||||
| 
						 | 
					@ -61,6 +60,7 @@ ds_recode <- ds1 %>%
 | 
				
			||||||
#summary Table
 | 
					#summary Table
 | 
				
			||||||
 | 
					
 | 
				
			||||||
summary_tbl <- ds1 %>%
 | 
					summary_tbl <- ds1 %>%
 | 
				
			||||||
 | 
					  dplyr$select(-subject_id, -charttime) %>%
 | 
				
			||||||
  gtsummary$tbl_summary(
 | 
					  gtsummary$tbl_summary(
 | 
				
			||||||
    by = ft4_dia
 | 
					    by = ft4_dia
 | 
				
			||||||
    ,missing = "no"
 | 
					    ,missing = "no"
 | 
				
			||||||
| 
						 | 
					@ -88,7 +88,7 @@ summary_tbl <- ds1 %>%
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# correlation plot
 | 
					# correlation plot
 | 
				
			||||||
corr_plot <- cor(
 | 
					corr_plot <- cor(
 | 
				
			||||||
  ds1 %>% dplyr$select(-gender,-ft4_dia)
 | 
					  ds1 %>% dplyr$select(-gender,-ft4_dia, -subject_id, -charttime)
 | 
				
			||||||
  ,use = "complete.obs"
 | 
					  ,use = "complete.obs"
 | 
				
			||||||
  ) %>%
 | 
					  ) %>%
 | 
				
			||||||
  corrplot::corrplot(method = "number", type = "lower", tl.col = "black", tl.srt = 45
 | 
					  corrplot::corrplot(method = "number", type = "lower", tl.col = "black", tl.srt = 45
 | 
				
			||||||
| 
						 | 
					@ -105,7 +105,7 @@ dev.off()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#quick recode of gender, will still do recoding during feature engineering
 | 
					#quick recode of gender, will still do recoding during feature engineering
 | 
				
			||||||
g1 <- ds1 %>%
 | 
					g1 <- ds1 %>%
 | 
				
			||||||
  dplyr$select(-gender, -ft4_dia) %>%
 | 
					  dplyr$select(-gender,-ft4_dia, -subject_id, -charttime) %>%
 | 
				
			||||||
  tidyr$pivot_longer(cols = dplyr$everything()) %>%
 | 
					  tidyr$pivot_longer(cols = dplyr$everything()) %>%
 | 
				
			||||||
  ggplot(aes(x = value)) +
 | 
					  ggplot(aes(x = value)) +
 | 
				
			||||||
  gp2$geom_histogram(na.rm = TRUE) +
 | 
					  gp2$geom_histogram(na.rm = TRUE) +
 | 
				
			||||||
| 
						 | 
					@ -128,7 +128,7 @@ gp2$ggsave(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# this takes a bit to load.  No discernible patterns in the data
 | 
					# this takes a bit to load.  No discernible patterns in the data
 | 
				
			||||||
g2 <- ds_recode %>%
 | 
					g2 <- ds_recode %>%
 | 
				
			||||||
  dplyr$select(-gender) %>%
 | 
					  dplyr$select(-gender, -subject_id, -charttime) %>%
 | 
				
			||||||
  dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
 | 
					  dplyr$mutate(dplyr$across(-ft4_dia, log)) %>%
 | 
				
			||||||
  tidyr$pivot_longer(cols = !ft4_dia) %>%
 | 
					  tidyr$pivot_longer(cols = !ft4_dia) %>%
 | 
				
			||||||
  ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
 | 
					  ggplot(aes(x = factor(ft4_dia), y = value, fill = factor(ft4_dia))) +
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue