add new draft post

This commit is contained in:
Kyle Belanger 2024-05-17 16:19:09 -04:00
parent 733c3acdcb
commit 2c62fcd14d
5 changed files with 496 additions and 0 deletions

1
.gitignore vendored
View file

@ -49,3 +49,4 @@ po/*~
rsconnect/
/.quarto/
cspell.json

9
.lintr Normal file
View file

@ -0,0 +1,9 @@
linters: linters_with_defaults(
line_length_linter(120)
, commented_code_linter = NULL
, object_usage_linter = NULL
, indentation_linter(
hanging_indent_style = "never"
)
) # see vignette("lintr")
encoding: "UTF-8"

View file

@ -0,0 +1,313 @@
<?xml version="1.0" encoding="UTF-8"?><request-parameters>
<parameter>
<name>B_1</name>
<value>D66.V20</value>
</parameter>
<parameter>
<name>B_2</name>
<value>D66.V25</value>
</parameter>
<parameter>
<name>B_3</name>
<value>*None*</value>
</parameter>
<parameter>
<name>B_4</name>
<value>*None*</value>
</parameter>
<parameter>
<name>B_5</name>
<value>*None*</value>
</parameter>
<parameter>
<name>F_D66.V21</name>
<value>*All*</value>
</parameter>
<parameter>
<name>F_D66.V22</name>
<value>*All*</value>
</parameter>
<parameter>
<name>F_D66.V37</name>
<value>*All*</value>
</parameter>
<parameter>
<name>I_D66.V21</name>
<value>*All* (The United States)
</value>
</parameter>
<parameter>
<name>I_D66.V22</name>
<value>*All* (The United States)
</value>
</parameter>
<parameter>
<name>I_D66.V37</name>
<value>*All* (The United States)
</value>
</parameter>
<parameter>
<name>M_1</name>
<value>D66.M1</value>
</parameter>
<parameter>
<name>O_V21_fmode</name>
<value>freg</value>
</parameter>
<parameter>
<name>O_V22_fmode</name>
<value>freg</value>
</parameter>
<parameter>
<name>O_V37_fmode</name>
<value>freg</value>
</parameter>
<parameter>
<name>O_age</name>
<value>D66.V1</value>
</parameter>
<parameter>
<name>O_gestation</name>
<value>D66.V23</value>
</parameter>
<parameter>
<name>O_javascript</name>
<value>on</value>
</parameter>
<parameter>
<name>O_location</name>
<value>D66.V21</value>
</parameter>
<parameter>
<name>O_oc-sect1-request</name>
<value>close</value>
</parameter>
<parameter>
<name>O_oe_gestation</name>
<value>D66.V33</value>
</parameter>
<parameter>
<name>O_precision</name>
<value>2</value>
</parameter>
<parameter>
<name>O_rate_per</name>
<value>1000</value>
</parameter>
<parameter>
<name>O_show_totals</name>
<value>true</value>
</parameter>
<parameter>
<name>O_timeout</name>
<value>600</value>
</parameter>
<parameter>
<name>O_title</name>
<value/>
</parameter>
<parameter>
<name>O_urban</name>
<value>D66.V154</value>
</parameter>
<parameter>
<name>O_weight</name>
<value>D66.V9</value>
</parameter>
<parameter>
<name>V_D66.V1</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V10</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V14</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V154</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V155</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V156</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V157</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V16</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V17</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V18</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V2</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V20</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V21</name>
<value/>
</parameter>
<parameter>
<name>V_D66.V22</name>
<value/>
</parameter>
<parameter>
<name>V_D66.V23</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V24</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V25</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V26</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V27</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V28</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V29</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V3</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V30</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V31</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V32</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V33</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V34</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V35</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V37</name>
<value/>
</parameter>
<parameter>
<name>V_D66.V38</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V39</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V4</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V41</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V42</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V43</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V5</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V6</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V7</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V8</name>
<value>*All*</value>
</parameter>
<parameter>
<name>V_D66.V9</name>
<value>*All*</value>
</parameter>
<parameter>
<name>action-Send</name>
<value>Send</value>
</parameter>
<parameter>
<name>dataset_code</name>
<value>D66</value>
</parameter>
<parameter>
<name>dataset_label</name>
<value>Natality, 2007-2022</value>
</parameter>
<parameter>
<name>dataset_vintage</name>
<value>2022</value>
</parameter>
<parameter>
<name>finder-stage-D66.V21</name>
<value>codeset</value>
</parameter>
<parameter>
<name>finder-stage-D66.V22</name>
<value>codeset</value>
</parameter>
<parameter>
<name>finder-stage-D66.V37</name>
<value>codeset</value>
</parameter>
<parameter>
<name>saved_id</name>
<value/>
</parameter>
<parameter>
<name>stage</name>
<value>request</value>
</parameter>
</request-parameters>

View file

@ -0,0 +1,57 @@
---
title: "Does a US Born Players Birthdate affect their shot at the NHL"
description: "Inspired by TidyTuesday Week 2 - 2024 dataset about Candian Players, lets look at the same anaylyis for American Born Players"
date: "5/15/2024" #Update when live
categories:
- tidytuesday
- R
- dataViz
draft: TRUE
---
```{r}
#| include: false
knitr::read_chunk(here::here("posts", "2024-05-15-US-NHL-Birthrate", "nhl_usa_births.R"))
```
```{r }
#| label: Setup
#| echo: FALSE
#| warning: false
#| message: false
```
This post is inspired by this fantastic [blog post](https://jlaw.netlify.app/2023/12/04/are-birth-dates-still-destiny-for-canadian-nhl-players/) on Jlaws Blog. In it they explore how in the first chapter Malcolm Gladwells Outliers he discusses how in Canadian Junior Hockey there is a higher likelihood for players to be born in the first quarter of the year. As it appears cutoff dates for USA hockey are different and they are currently using June 1st (if my internet searches are to be believed), I wondered if the same analysis would hold true for American Born Players.
## Distribution of Births by Month in the United States
The data for US Birth Rates can be pulled from [CDC Wonder](https://wonder.cdc.gov/). The particular table of interest is the Natality, 2007 - 2022. CDC Wonder has a quite interesting API that requires a request with quite a few XML parameters. Thankfully you can build the request on the website and a nice package already exists to send the query. Check out the [Wonderapi Page](https://socdatar.github.io/wonderapi/index.html) for more info.
```{r}
#| label: USA_Birth_Data
```
### Graph Distribution of Births Compared to Expected
note use of month.name to sort
```{r}
#| label: USA_Distro
```
## Hockey Data
```{r}
#| label: Roster_Data
```
## Graph It
```{r}
#| label: Graph_It
#| fig-height: 7
#| fig-width: 7
```

View file

@ -0,0 +1,116 @@
#These first few lines run only when the file is run in RStudio, !!NOT when an Rmd/Rnw file calls it!!
rm(list = ls(all = TRUE)) #Clear the variables from previous runs.
cat("\f") # clear console
# cSpell:disable disable spell checking for this document
## ---- Setup ----
library(magrittr)
library(ggplot2)
library(ggtext)
library(ggimage)
## ---- USA_Birth_Data ----
usa_raw <- wonderapi::send_query("D66", here::here("posts", "2024-05-15-US-NHL-Birthrate", "cdc_wonder_request.xml"))
usa_births <- usa_raw %>%
dplyr::group_by(Month) %>%
dplyr::summarise(country_births = sum(Births), .groups = "drop") %>%
dplyr::mutate(country_pct = country_births / sum(country_births))
## ---- USA_Distro ----
usa_births %>%
dplyr::mutate(expected_births = dplyr::case_when(
Month %in% c("April", "June", "September", "November") ~ 30 / 365
, Month == "February" ~ 28 / 365
, .default = 31 / 365
)
, difference = country_pct - expected_births
, dplyr::across(Month, ~factor(., levels = month.name))
, dplyr::across(c(country_pct, expected_births, difference), ~scales::percent(., accuracy = .1))
) %>%
dplyr::arrange(Month) %>%
dplyr::rename_with(~stringr::str_replace_all(., "_", " ")) %>%
dplyr::rename_with(stringr::str_to_title) %>%
kableExtra::kbl() %>%
kableExtra::kable_styling()
## ---- Roster_Data ----
teams <- httr::GET("https://api.nhle.com/stats/rest/en/team") %>%
httr::content() %>%
.[["data"]] %>%
tibble::tibble(data = .) %>%
tidyr::unnest_wider(data)
get_roster <- function(team){
httr::GET(glue::glue("https://api-web.nhle.com/v1/roster/{team}/20232024")) %>%
httr::content() %>%
purrr::flatten() %>%
tibble::tibble(data = .) %>%
tidyr::hoist(
.col = "data"
, "firstName" = list("firstName", 1L)
, "lastName" = list("lastName", 1L)
, "positionCode"
, "birthDate"
, "birthCountry"
)
}
usa_roster <- purrr::map(teams$triCode, get_roster) %>%
purrr::list_rbind() %>%
dplyr::filter(!is.na(firstName)) %>%
dplyr::filter(birthCountry == "USA") %>%
dplyr::mutate(
mob = lubridate::month(lubridate::ymd(birthDate), label = TRUE, abbr = FALSE)
, mob_id = lubridate::month(lubridate::ymd(birthDate))
) %>%
dplyr::count(mob_id, mob, name = "players") %>%
dplyr::mutate(player_pct = players / sum(players))
## ---- Graph_It ----
nhl_icon <- "https://pbs.twimg.com/media/F9sTTAYakAAkRv6.png"
usa_icon <- "https://cdn-icons-png.flaticon.com/512/197/197484.png"
combined <- usa_roster %>%
dplyr::left_join(usa_births, by = c("mob" = "Month")) %>%
dplyr::mutate(
random = dplyr::case_when(
mob_id %in% c(4, 6, 9, 11) ~ 30 / 365,
mob_id %in% c(1, 3, 5, 7, 8, 10, 12) ~ 31 / 365,
mob_id == 2 ~ 28 / 365
)
)
g1 <- combined %>%
ggplot(aes(x = forcats::fct_reorder(mob, -mob_id))) +
geom_line(aes(y = random, group = 1), linetype = 2, color = "grey60") +
geom_linerange(aes(ymin = country_pct, ymax = player_pct)) +
geom_image(aes(image = nhl_icon, y = player_pct)) +
geom_image(aes(image = usa_icon, y = country_pct), size = 0.04) +
geom_text(aes(label = scales::percent(player_pct, accuracy = .1),
y = dplyr::if_else(player_pct > country_pct, player_pct + .005, player_pct - .005))) +
geom_text(aes(label = scales::percent(country_pct, accuracy = .1),
y = dplyr::if_else(country_pct > player_pct, country_pct + .005, country_pct - .005))) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
labs(
x = "Month of Birth"
, y = "Percentage of Births"
, title = "Are United States Born NHL Players More Likely to be Born Early in the Year?"
, subtitle = "Comparing the distribution of birth months between Canadian NHL players and Canada in general"
, caption = glue::glue(
"<img src = {nhl_icon} width = '15' height=' 15' /> - US NHL Players Birth Month Distribution <br />
<img src = {usa_icon} width = '15' height=' 15' /> - US Birth Month (2007-2022) Distribution"
)
) +
theme_minimal() +
theme(
plot.caption = element_markdown()
,plot.title.position = "plot"
)
g1