# ---- Clean Environment ---- rm(list = ls()) # Load Packages ---- box::use( readr[read_csv], ) # Load Data ---- url <- "https://bit.ly/gacttCSV" ds_raw <- read_csv(url) # Count NAs ---- nrow(ds_raw) na_count <- ds_raw |> dplyr::mutate(num_na = rowSums(is.na(ds))) |> dplyr::summarise( n = dplyr::n(), .by = num_na ) # don't show this in blog but note that I did it ds_raw |> dplyr::summarise(dplyr::across(dplyr::everything(), ~ sum(is.na(.)), .names = "{.col}")) |> tidyr::pivot_longer(tidyr::everything()) |> dplyr::arrange(desc(value)) |> print(n = 50) # Clean Data ---- ds <- ds_raw |> dplyr::select( ID = `Submission ID`, age = `What is your age?`, cups = `How many cups of coffee do you typically drink per day?`, where_drink = `Where do you typically drink coffee?`, brew_method = `How do you brew coffee at home?`, favorite = `What is your favorite coffee drink?`, additions = `Do you usually add anything to your coffee?`, style = `Before today's tasting, which of the following best described what kind of coffee you like?`, strength = `How strong do you like your coffee?`, roast_level = `What roast level of coffee do you prefer?`, why_drink = `Why do you drink coffee?`, taste = `Do you like the taste of coffee?`, gender = Gender, education_level = `Education Level`, ethnicity = `Ethnicity/Race`, employment = `Employment Status`, political_view = `Political Affiliation` ) |> tidyr::drop_na()