From 2c62fcd14d001334afd7a42bc8c6c3184603b7dd Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Fri, 17 May 2024 16:19:09 -0400 Subject: [PATCH] add new draft post --- .gitignore | 1 + .lintr | 9 + .../cdc_wonder_request.xml | 313 ++++++++++++++++++ posts/2024-05-15-US-NHL-Birthrate/index.qmd | 57 ++++ .../nhl_usa_births.R | 116 +++++++ 5 files changed, 496 insertions(+) create mode 100644 .lintr create mode 100644 posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml create mode 100644 posts/2024-05-15-US-NHL-Birthrate/index.qmd create mode 100644 posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R diff --git a/.gitignore b/.gitignore index b7a4307..ea15bce 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ po/*~ rsconnect/ /.quarto/ +cspell.json diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..a6bb6eb --- /dev/null +++ b/.lintr @@ -0,0 +1,9 @@ +linters: linters_with_defaults( + line_length_linter(120) + , commented_code_linter = NULL + , object_usage_linter = NULL + , indentation_linter( + hanging_indent_style = "never" + ) + ) # see vignette("lintr") +encoding: "UTF-8" diff --git a/posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml b/posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml new file mode 100644 index 0000000..161c1ed --- /dev/null +++ b/posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml @@ -0,0 +1,313 @@ + + + B_1 + D66.V20 + + + B_2 + D66.V25 + + + B_3 + *None* + + + B_4 + *None* + + + B_5 + *None* + + + F_D66.V21 + *All* + + + F_D66.V22 + *All* + + + F_D66.V37 + *All* + + + I_D66.V21 + *All* (The United States) + + + + I_D66.V22 + *All* (The United States) + + + + I_D66.V37 + *All* (The United States) + + + + M_1 + D66.M1 + + + O_V21_fmode + freg + + + O_V22_fmode + freg + + + O_V37_fmode + freg + + + O_age + D66.V1 + + + O_gestation + D66.V23 + + + O_javascript + on + + + O_location + D66.V21 + + + O_oc-sect1-request + close + + + O_oe_gestation + D66.V33 + + + O_precision + 2 + + + O_rate_per + 1000 + + + O_show_totals + true + + + O_timeout + 600 + + + O_title + + + + O_urban + D66.V154 + + + O_weight + D66.V9 + + + V_D66.V1 + *All* + + + V_D66.V10 + *All* + + + V_D66.V14 + *All* + + + V_D66.V154 + *All* + + + V_D66.V155 + *All* + + + V_D66.V156 + *All* + + + V_D66.V157 + *All* + + + V_D66.V16 + *All* + + + V_D66.V17 + *All* + + + V_D66.V18 + *All* + + + V_D66.V2 + *All* + + + V_D66.V20 + *All* + + + V_D66.V21 + + + + V_D66.V22 + + + + V_D66.V23 + *All* + + + V_D66.V24 + *All* + + + V_D66.V25 + *All* + + + V_D66.V26 + *All* + + + V_D66.V27 + *All* + + + V_D66.V28 + *All* + + + V_D66.V29 + *All* + + + V_D66.V3 + *All* + + + V_D66.V30 + *All* + + + V_D66.V31 + *All* + + + V_D66.V32 + *All* + + + V_D66.V33 + *All* + + + V_D66.V34 + *All* + + + V_D66.V35 + *All* + + + V_D66.V37 + + + + V_D66.V38 + *All* + + + V_D66.V39 + *All* + + + V_D66.V4 + *All* + + + V_D66.V41 + *All* + + + V_D66.V42 + *All* + + + V_D66.V43 + *All* + + + V_D66.V5 + *All* + + + V_D66.V6 + *All* + + + V_D66.V7 + *All* + + + V_D66.V8 + *All* + + + V_D66.V9 + *All* + + + action-Send + Send + + + dataset_code + D66 + + + dataset_label + Natality, 2007-2022 + + + dataset_vintage + 2022 + + + finder-stage-D66.V21 + codeset + + + finder-stage-D66.V22 + codeset + + + finder-stage-D66.V37 + codeset + + + saved_id + + + + stage + request + + \ No newline at end of file diff --git a/posts/2024-05-15-US-NHL-Birthrate/index.qmd b/posts/2024-05-15-US-NHL-Birthrate/index.qmd new file mode 100644 index 0000000..bab0b4a --- /dev/null +++ b/posts/2024-05-15-US-NHL-Birthrate/index.qmd @@ -0,0 +1,57 @@ +--- +title: "Does a US Born Players Birthdate affect their shot at the NHL" +description: "Inspired by TidyTuesday Week 2 - 2024 dataset about Candian Players, lets look at the same anaylyis for American Born Players" +date: "5/15/2024" #Update when live +categories: + - tidytuesday + - R + - dataViz +draft: TRUE +--- + +```{r} +#| include: false + +knitr::read_chunk(here::here("posts", "2024-05-15-US-NHL-Birthrate", "nhl_usa_births.R")) + +``` + +```{r } +#| label: Setup +#| echo: FALSE +#| warning: false +#| message: false +``` +This post is inspired by this fantastic [blog post](https://jlaw.netlify.app/2023/12/04/are-birth-dates-still-destiny-for-canadian-nhl-players/) on Jlaws Blog. In it they explore how in the first chapter Malcolm Gladwell’s Outliers he discusses how in Canadian Junior Hockey there is a higher likelihood for players to be born in the first quarter of the year. As it appears cutoff dates for USA hockey are different and they are currently using June 1st (if my internet searches are to be believed), I wondered if the same analysis would hold true for American Born Players. + +## Distribution of Births by Month in the United States + +The data for US Birth Rates can be pulled from [CDC Wonder](https://wonder.cdc.gov/). The particular table of interest is the Natality, 2007 - 2022. CDC Wonder has a quite interesting API that requires a request with quite a few XML parameters. Thankfully you can build the request on the website and a nice package already exists to send the query. Check out the [Wonderapi Page](https://socdatar.github.io/wonderapi/index.html) for more info. + +```{r} +#| label: USA_Birth_Data +``` + +### Graph Distribution of Births Compared to Expected + +note use of month.name to sort + +```{r} +#| label: USA_Distro +``` + +## Hockey Data + +```{r} +#| label: Roster_Data +``` + + +## Graph It + + +```{r} +#| label: Graph_It +#| fig-height: 7 +#| fig-width: 7 +``` \ No newline at end of file diff --git a/posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R b/posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R new file mode 100644 index 0000000..ce7807a --- /dev/null +++ b/posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R @@ -0,0 +1,116 @@ +#These first few lines run only when the file is run in RStudio, !!NOT when an Rmd/Rnw file calls it!! +rm(list = ls(all = TRUE)) #Clear the variables from previous runs. +cat("\f") # clear console + +# cSpell:disable disable spell checking for this document + +## ---- Setup ---- +library(magrittr) +library(ggplot2) +library(ggtext) +library(ggimage) + +## ---- USA_Birth_Data ---- +usa_raw <- wonderapi::send_query("D66", here::here("posts", "2024-05-15-US-NHL-Birthrate", "cdc_wonder_request.xml")) + +usa_births <- usa_raw %>% + dplyr::group_by(Month) %>% + dplyr::summarise(country_births = sum(Births), .groups = "drop") %>% + dplyr::mutate(country_pct = country_births / sum(country_births)) + +## ---- USA_Distro ---- + +usa_births %>% + dplyr::mutate(expected_births = dplyr::case_when( + Month %in% c("April", "June", "September", "November") ~ 30 / 365 + , Month == "February" ~ 28 / 365 + , .default = 31 / 365 + ) + , difference = country_pct - expected_births + , dplyr::across(Month, ~factor(., levels = month.name)) + , dplyr::across(c(country_pct, expected_births, difference), ~scales::percent(., accuracy = .1)) + ) %>% + dplyr::arrange(Month) %>% + dplyr::rename_with(~stringr::str_replace_all(., "_", " ")) %>% + dplyr::rename_with(stringr::str_to_title) %>% + kableExtra::kbl() %>% + kableExtra::kable_styling() + +## ---- Roster_Data ---- + +teams <- httr::GET("https://api.nhle.com/stats/rest/en/team") %>% + httr::content() %>% + .[["data"]] %>% + tibble::tibble(data = .) %>% + tidyr::unnest_wider(data) + +get_roster <- function(team){ + httr::GET(glue::glue("https://api-web.nhle.com/v1/roster/{team}/20232024")) %>% + httr::content() %>% + purrr::flatten() %>% + tibble::tibble(data = .) %>% + tidyr::hoist( + .col = "data" + , "firstName" = list("firstName", 1L) + , "lastName" = list("lastName", 1L) + , "positionCode" + , "birthDate" + , "birthCountry" + ) +} + +usa_roster <- purrr::map(teams$triCode, get_roster) %>% + purrr::list_rbind() %>% + dplyr::filter(!is.na(firstName)) %>% + dplyr::filter(birthCountry == "USA") %>% + dplyr::mutate( + mob = lubridate::month(lubridate::ymd(birthDate), label = TRUE, abbr = FALSE) + , mob_id = lubridate::month(lubridate::ymd(birthDate)) + ) %>% + dplyr::count(mob_id, mob, name = "players") %>% + dplyr::mutate(player_pct = players / sum(players)) + +## ---- Graph_It ---- + +nhl_icon <- "https://pbs.twimg.com/media/F9sTTAYakAAkRv6.png" +usa_icon <- "https://cdn-icons-png.flaticon.com/512/197/197484.png" + +combined <- usa_roster %>% + dplyr::left_join(usa_births, by = c("mob" = "Month")) %>% + dplyr::mutate( + random = dplyr::case_when( + mob_id %in% c(4, 6, 9, 11) ~ 30 / 365, + mob_id %in% c(1, 3, 5, 7, 8, 10, 12) ~ 31 / 365, + mob_id == 2 ~ 28 / 365 + ) + ) + +g1 <- combined %>% + ggplot(aes(x = forcats::fct_reorder(mob, -mob_id))) + + geom_line(aes(y = random, group = 1), linetype = 2, color = "grey60") + + geom_linerange(aes(ymin = country_pct, ymax = player_pct)) + + geom_image(aes(image = nhl_icon, y = player_pct)) + + geom_image(aes(image = usa_icon, y = country_pct), size = 0.04) + + geom_text(aes(label = scales::percent(player_pct, accuracy = .1), + y = dplyr::if_else(player_pct > country_pct, player_pct + .005, player_pct - .005))) + + geom_text(aes(label = scales::percent(country_pct, accuracy = .1), + y = dplyr::if_else(country_pct > player_pct, country_pct + .005, country_pct - .005))) + + scale_y_continuous(labels = scales::percent) + + coord_flip() + + labs( + x = "Month of Birth" + , y = "Percentage of Births" + , title = "Are United States Born NHL Players More Likely to be Born Early in the Year?" + , subtitle = "Comparing the distribution of birth months between Canadian NHL players and Canada in general" + , caption = glue::glue( + " - US NHL Players Birth Month Distribution
+ - US Birth Month (2007-2022) Distribution" + ) + ) + + theme_minimal() + + theme( + plot.caption = element_markdown() + ,plot.title.position = "plot" + ) + +g1