add new draft post
This commit is contained in:
parent
733c3acdcb
commit
2c62fcd14d
5 changed files with 496 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -49,3 +49,4 @@ po/*~
|
|||
rsconnect/
|
||||
|
||||
/.quarto/
|
||||
cspell.json
|
||||
|
|
9
.lintr
Normal file
9
.lintr
Normal file
|
@ -0,0 +1,9 @@
|
|||
linters: linters_with_defaults(
|
||||
line_length_linter(120)
|
||||
, commented_code_linter = NULL
|
||||
, object_usage_linter = NULL
|
||||
, indentation_linter(
|
||||
hanging_indent_style = "never"
|
||||
)
|
||||
) # see vignette("lintr")
|
||||
encoding: "UTF-8"
|
313
posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml
Normal file
313
posts/2024-05-15-US-NHL-Birthrate/cdc_wonder_request.xml
Normal file
|
@ -0,0 +1,313 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?><request-parameters>
|
||||
<parameter>
|
||||
<name>B_1</name>
|
||||
<value>D66.V20</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>B_2</name>
|
||||
<value>D66.V25</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>B_3</name>
|
||||
<value>*None*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>B_4</name>
|
||||
<value>*None*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>B_5</name>
|
||||
<value>*None*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>F_D66.V21</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>F_D66.V22</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>F_D66.V37</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>I_D66.V21</name>
|
||||
<value>*All* (The United States)
|
||||
</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>I_D66.V22</name>
|
||||
<value>*All* (The United States)
|
||||
</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>I_D66.V37</name>
|
||||
<value>*All* (The United States)
|
||||
</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>M_1</name>
|
||||
<value>D66.M1</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_V21_fmode</name>
|
||||
<value>freg</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_V22_fmode</name>
|
||||
<value>freg</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_V37_fmode</name>
|
||||
<value>freg</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_age</name>
|
||||
<value>D66.V1</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_gestation</name>
|
||||
<value>D66.V23</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_javascript</name>
|
||||
<value>on</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_location</name>
|
||||
<value>D66.V21</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_oc-sect1-request</name>
|
||||
<value>close</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_oe_gestation</name>
|
||||
<value>D66.V33</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_precision</name>
|
||||
<value>2</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_rate_per</name>
|
||||
<value>1000</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_show_totals</name>
|
||||
<value>true</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_timeout</name>
|
||||
<value>600</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_title</name>
|
||||
<value/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_urban</name>
|
||||
<value>D66.V154</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>O_weight</name>
|
||||
<value>D66.V9</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V1</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V10</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V14</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V154</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V155</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V156</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V157</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V16</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V17</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V18</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V2</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V20</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V21</name>
|
||||
<value/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V22</name>
|
||||
<value/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V23</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V24</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V25</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V26</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V27</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V28</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V29</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V3</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V30</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V31</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V32</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V33</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V34</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V35</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V37</name>
|
||||
<value/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V38</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V39</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V4</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V41</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V42</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V43</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V5</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V6</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V7</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V8</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>V_D66.V9</name>
|
||||
<value>*All*</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>action-Send</name>
|
||||
<value>Send</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>dataset_code</name>
|
||||
<value>D66</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>dataset_label</name>
|
||||
<value>Natality, 2007-2022</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>dataset_vintage</name>
|
||||
<value>2022</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>finder-stage-D66.V21</name>
|
||||
<value>codeset</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>finder-stage-D66.V22</name>
|
||||
<value>codeset</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>finder-stage-D66.V37</name>
|
||||
<value>codeset</value>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>saved_id</name>
|
||||
<value/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stage</name>
|
||||
<value>request</value>
|
||||
</parameter>
|
||||
</request-parameters>
|
57
posts/2024-05-15-US-NHL-Birthrate/index.qmd
Normal file
57
posts/2024-05-15-US-NHL-Birthrate/index.qmd
Normal file
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
title: "Does a US Born Players Birthdate affect their shot at the NHL"
|
||||
description: "Inspired by TidyTuesday Week 2 - 2024 dataset about Candian Players, lets look at the same anaylyis for American Born Players"
|
||||
date: "5/15/2024" #Update when live
|
||||
categories:
|
||||
- tidytuesday
|
||||
- R
|
||||
- dataViz
|
||||
draft: TRUE
|
||||
---
|
||||
|
||||
```{r}
|
||||
#| include: false
|
||||
|
||||
knitr::read_chunk(here::here("posts", "2024-05-15-US-NHL-Birthrate", "nhl_usa_births.R"))
|
||||
|
||||
```
|
||||
|
||||
```{r }
|
||||
#| label: Setup
|
||||
#| echo: FALSE
|
||||
#| warning: false
|
||||
#| message: false
|
||||
```
|
||||
This post is inspired by this fantastic [blog post](https://jlaw.netlify.app/2023/12/04/are-birth-dates-still-destiny-for-canadian-nhl-players/) on Jlaws Blog. In it they explore how in the first chapter Malcolm Gladwell’s Outliers he discusses how in Canadian Junior Hockey there is a higher likelihood for players to be born in the first quarter of the year. As it appears cutoff dates for USA hockey are different and they are currently using June 1st (if my internet searches are to be believed), I wondered if the same analysis would hold true for American Born Players.
|
||||
|
||||
## Distribution of Births by Month in the United States
|
||||
|
||||
The data for US Birth Rates can be pulled from [CDC Wonder](https://wonder.cdc.gov/). The particular table of interest is the Natality, 2007 - 2022. CDC Wonder has a quite interesting API that requires a request with quite a few XML parameters. Thankfully you can build the request on the website and a nice package already exists to send the query. Check out the [Wonderapi Page](https://socdatar.github.io/wonderapi/index.html) for more info.
|
||||
|
||||
```{r}
|
||||
#| label: USA_Birth_Data
|
||||
```
|
||||
|
||||
### Graph Distribution of Births Compared to Expected
|
||||
|
||||
note use of month.name to sort
|
||||
|
||||
```{r}
|
||||
#| label: USA_Distro
|
||||
```
|
||||
|
||||
## Hockey Data
|
||||
|
||||
```{r}
|
||||
#| label: Roster_Data
|
||||
```
|
||||
|
||||
|
||||
## Graph It
|
||||
|
||||
|
||||
```{r}
|
||||
#| label: Graph_It
|
||||
#| fig-height: 7
|
||||
#| fig-width: 7
|
||||
```
|
116
posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R
Normal file
116
posts/2024-05-15-US-NHL-Birthrate/nhl_usa_births.R
Normal file
|
@ -0,0 +1,116 @@
|
|||
#These first few lines run only when the file is run in RStudio, !!NOT when an Rmd/Rnw file calls it!!
|
||||
rm(list = ls(all = TRUE)) #Clear the variables from previous runs.
|
||||
cat("\f") # clear console
|
||||
|
||||
# cSpell:disable disable spell checking for this document
|
||||
|
||||
## ---- Setup ----
|
||||
library(magrittr)
|
||||
library(ggplot2)
|
||||
library(ggtext)
|
||||
library(ggimage)
|
||||
|
||||
## ---- USA_Birth_Data ----
|
||||
usa_raw <- wonderapi::send_query("D66", here::here("posts", "2024-05-15-US-NHL-Birthrate", "cdc_wonder_request.xml"))
|
||||
|
||||
usa_births <- usa_raw %>%
|
||||
dplyr::group_by(Month) %>%
|
||||
dplyr::summarise(country_births = sum(Births), .groups = "drop") %>%
|
||||
dplyr::mutate(country_pct = country_births / sum(country_births))
|
||||
|
||||
## ---- USA_Distro ----
|
||||
|
||||
usa_births %>%
|
||||
dplyr::mutate(expected_births = dplyr::case_when(
|
||||
Month %in% c("April", "June", "September", "November") ~ 30 / 365
|
||||
, Month == "February" ~ 28 / 365
|
||||
, .default = 31 / 365
|
||||
)
|
||||
, difference = country_pct - expected_births
|
||||
, dplyr::across(Month, ~factor(., levels = month.name))
|
||||
, dplyr::across(c(country_pct, expected_births, difference), ~scales::percent(., accuracy = .1))
|
||||
) %>%
|
||||
dplyr::arrange(Month) %>%
|
||||
dplyr::rename_with(~stringr::str_replace_all(., "_", " ")) %>%
|
||||
dplyr::rename_with(stringr::str_to_title) %>%
|
||||
kableExtra::kbl() %>%
|
||||
kableExtra::kable_styling()
|
||||
|
||||
## ---- Roster_Data ----
|
||||
|
||||
teams <- httr::GET("https://api.nhle.com/stats/rest/en/team") %>%
|
||||
httr::content() %>%
|
||||
.[["data"]] %>%
|
||||
tibble::tibble(data = .) %>%
|
||||
tidyr::unnest_wider(data)
|
||||
|
||||
get_roster <- function(team){
|
||||
httr::GET(glue::glue("https://api-web.nhle.com/v1/roster/{team}/20232024")) %>%
|
||||
httr::content() %>%
|
||||
purrr::flatten() %>%
|
||||
tibble::tibble(data = .) %>%
|
||||
tidyr::hoist(
|
||||
.col = "data"
|
||||
, "firstName" = list("firstName", 1L)
|
||||
, "lastName" = list("lastName", 1L)
|
||||
, "positionCode"
|
||||
, "birthDate"
|
||||
, "birthCountry"
|
||||
)
|
||||
}
|
||||
|
||||
usa_roster <- purrr::map(teams$triCode, get_roster) %>%
|
||||
purrr::list_rbind() %>%
|
||||
dplyr::filter(!is.na(firstName)) %>%
|
||||
dplyr::filter(birthCountry == "USA") %>%
|
||||
dplyr::mutate(
|
||||
mob = lubridate::month(lubridate::ymd(birthDate), label = TRUE, abbr = FALSE)
|
||||
, mob_id = lubridate::month(lubridate::ymd(birthDate))
|
||||
) %>%
|
||||
dplyr::count(mob_id, mob, name = "players") %>%
|
||||
dplyr::mutate(player_pct = players / sum(players))
|
||||
|
||||
## ---- Graph_It ----
|
||||
|
||||
nhl_icon <- "https://pbs.twimg.com/media/F9sTTAYakAAkRv6.png"
|
||||
usa_icon <- "https://cdn-icons-png.flaticon.com/512/197/197484.png"
|
||||
|
||||
combined <- usa_roster %>%
|
||||
dplyr::left_join(usa_births, by = c("mob" = "Month")) %>%
|
||||
dplyr::mutate(
|
||||
random = dplyr::case_when(
|
||||
mob_id %in% c(4, 6, 9, 11) ~ 30 / 365,
|
||||
mob_id %in% c(1, 3, 5, 7, 8, 10, 12) ~ 31 / 365,
|
||||
mob_id == 2 ~ 28 / 365
|
||||
)
|
||||
)
|
||||
|
||||
g1 <- combined %>%
|
||||
ggplot(aes(x = forcats::fct_reorder(mob, -mob_id))) +
|
||||
geom_line(aes(y = random, group = 1), linetype = 2, color = "grey60") +
|
||||
geom_linerange(aes(ymin = country_pct, ymax = player_pct)) +
|
||||
geom_image(aes(image = nhl_icon, y = player_pct)) +
|
||||
geom_image(aes(image = usa_icon, y = country_pct), size = 0.04) +
|
||||
geom_text(aes(label = scales::percent(player_pct, accuracy = .1),
|
||||
y = dplyr::if_else(player_pct > country_pct, player_pct + .005, player_pct - .005))) +
|
||||
geom_text(aes(label = scales::percent(country_pct, accuracy = .1),
|
||||
y = dplyr::if_else(country_pct > player_pct, country_pct + .005, country_pct - .005))) +
|
||||
scale_y_continuous(labels = scales::percent) +
|
||||
coord_flip() +
|
||||
labs(
|
||||
x = "Month of Birth"
|
||||
, y = "Percentage of Births"
|
||||
, title = "Are United States Born NHL Players More Likely to be Born Early in the Year?"
|
||||
, subtitle = "Comparing the distribution of birth months between Canadian NHL players and Canada in general"
|
||||
, caption = glue::glue(
|
||||
"<img src = {nhl_icon} width = '15' height=' 15' /> - US NHL Players Birth Month Distribution <br />
|
||||
<img src = {usa_icon} width = '15' height=' 15' /> - US Birth Month (2007-2022) Distribution"
|
||||
)
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
plot.caption = element_markdown()
|
||||
,plot.title.position = "plot"
|
||||
)
|
||||
|
||||
g1
|
Loading…
Reference in a new issue