quarto-blog/_site/posts/2024-08-09-learning-Julia/index.html

949 lines
59 KiB
HTML
Raw Normal View History

2024-08-23 16:30:17 -04:00
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.5.56">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Kyle Belanger">
2024-08-27 11:43:02 -04:00
<meta name="dcterms.date" content="2024-08-27">
2024-08-23 16:30:17 -04:00
<title>Learning Julia by WebScraping Amtrak Data Kyle Belanger</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js" integrity="sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==" crossorigin="anonymous"></script><script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
<script src="../../site_libs/quarto-nav/headroom.min.js"></script>
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="../../">
<script src="../../site_libs/quarto-html/quarto.js"></script>
<script src="../../site_libs/quarto-html/popper.min.js"></script>
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "end",
"type": "overlay",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" integrity="sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==" crossorigin="anonymous"></script>
<script type="application/javascript">define('jquery', [],function() {return window.jQuery;})</script>
<link rel="stylesheet" href="../../styles.css">
</head>
<body class="floating nav-fixed">
<div id="quarto-search-results"></div>
2024-08-27 11:43:02 -04:00
<header id="quarto-header" class="headroom fixed-top quarto-banner">
2024-08-23 16:30:17 -04:00
<nav class="navbar navbar-expand-lg " data-bs-theme="dark">
<div class="navbar-container container-fluid">
<div class="navbar-brand-container mx-auto">
<a class="navbar-brand" href="../../index.html">
<span class="navbar-title">Kyle Belanger</span>
</a>
</div>
<div id="quarto-search" class="" title="Search"></div>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav navbar-nav-scroll ms-auto">
<li class="nav-item">
<a class="nav-link" href="../../blog.html">
<span class="menu-text">Posts</span></a>
</li>
<li class="nav-item">
<a class="nav-link" href="../../kyle_resume.pdf">
<span class="menu-text">Resume</span></a>
</li>
<li class="nav-item compact">
<a class="nav-link" href="https://github.com/mmmmtoasty19"> <i class="bi bi-github" role="img">
</i>
<span class="menu-text"></span></a>
</li>
</ul>
</div> <!-- /navcollapse -->
<div class="quarto-navbar-tools">
</div>
</div> <!-- /container-fluid -->
</nav>
</header>
<!-- content -->
<header id="title-block-header" class="quarto-title-block default toc-left page-columns page-full">
<div class="quarto-title-banner page-columns page-full">
<div class="quarto-title column-body">
<h1 class="title">Learning Julia by WebScraping Amtrak Data</h1>
<div class="quarto-categories">
<div class="quarto-category">Julia</div>
<div class="quarto-category">dataViz</div>
</div>
</div>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p><a href="https://kyleb.rbind.io/">Kyle Belanger</a> </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
2024-08-27 11:43:02 -04:00
<p class="date">August 27, 2024</p>
2024-08-23 16:30:17 -04:00
</div>
</div>
</div>
</header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#load-packages" id="toc-load-packages" class="nav-link active" data-scroll-target="#load-packages">Load Packages</a></li>
<li><a href="#setting-up-the-web-scraping" id="toc-setting-up-the-web-scraping" class="nav-link" data-scroll-target="#setting-up-the-web-scraping">Setting up the Web Scraping</a></li>
<li><a href="#creating-the-dataframe" id="toc-creating-the-dataframe" class="nav-link" data-scroll-target="#creating-the-dataframe">Creating the DataFrame</a></li>
<li><a href="#cleaning-the-dataframe" id="toc-cleaning-the-dataframe" class="nav-link" data-scroll-target="#cleaning-the-dataframe">Cleaning the DataFrame</a></li>
<li><a href="#grouping-and-summarizing" id="toc-grouping-and-summarizing" class="nav-link" data-scroll-target="#grouping-and-summarizing">Grouping and Summarizing</a></li>
<li><a href="#plotting" id="toc-plotting" class="nav-link" data-scroll-target="#plotting">Plotting</a></li>
<li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion">Conclusion</a></li>
</ul>
</nav>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar zindex-bottom">
</div>
<!-- main -->
<main class="content quarto-banner-title-block" id="quarto-document-content">
<p>Recently two things happened quite close together that started me on the journey to this post.</p>
<ol type="1">
<li>First I have been planning on fiddling around and learning Julia for a while. I love R and that love will not change but I thought it was good to try something different.</li>
<li>My mom took a train and it was super late! I started looking at the station and it seemed like it was always late.</li>
</ol>
<p>So these two things lead me to this, pulling Amtrak data from the web using Julia. I do not claim to be an expert on Julia but I am learning and I wanted to share my journey, nor to I claim to be an expert at Web Scraping. Taking those things in account lets follow along.</p>
<section id="load-packages" class="level2">
<h2 class="anchored" data-anchor-id="load-packages">Load Packages</h2>
<p>First off I will load the Julia packages I am going to use. The first three all have to do with web scraping, and getting the data off the website. CairoMakie will be used to make the plot. All of the rest are for data wrangling. I already have all of these packages in this project environment so I just need to let the Julia REPL know to load them. If you are brand new to Julia this <a href="https://towardsdatascience.com/how-to-setup-project-environments-in-julia-ec8ae73afe9c">site</a> really helped explain the idea of project environments to me. I also use <a href="https://code.visualstudio.com/">VSCode</a> along with the <a href="https://marketplace.visualstudio.com/items?itemName=julialang.language-julia">Julia extension</a> which does a great job of handling the project environment.</p>
<div id="2" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">HTTP</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">Gumbo</span></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">Cascadia</span></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">DataFrames</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">DataFramesMeta</span></span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">Dates</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">Statistics</span></span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">CategoricalArrays</span></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a><span class="im">using</span> <span class="bu">CairoMakie</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="setting-up-the-web-scraping" class="level2">
<h2 class="anchored" data-anchor-id="setting-up-the-web-scraping">Setting up the Web Scraping</h2>
<p>Now that the packages are loaded, we can start setting up the web scraping. From my internet searching I found that Amtrak does have an API but it is quite challenging to use. I found this website <a href="https://juckins.net/amtrak_status/archive/html/home.php">Amtrak Status</a> which does a great job of showing the data I was looking for. In this example I am just going to pull data for two trains, train 97 and train 98. You can see in the link I set those as the train numbers, and if you follow the link you will see it sets it up in a nice table to view the historical data. When then use the HTTP package to get the raw website data and then use Gumbo to parse the HTML into a table. The Cascadia package gives the various CSS selectors to help pull the info I want of the entire page. The page table does not have an ids but it is also the only table on the page. I was able to use the CSS Selector “tr” to get each row of the table into a vector. If we examine the third item in the rows vector we see that it has the information we want (the first two rows are headers for the table)</p>
<!-- cspell: disable -->
<div id="4" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>url <span class="op">=</span> <span class="st">"https://juckins.net/amtrak_status/archive/html/history.php?train_num=97%2C98&amp;station=&amp;date_start=07%2F01%2F2024&amp;date_end=07%2F31%2F2024"</span>;</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>resp <span class="op">=</span> HTTP.<span class="fu">get</span>(url);</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>page <span class="op">=</span> <span class="fu">parsehtml</span>(<span class="fu">String</span>(resp.body));</span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>rows <span class="op">=</span> <span class="fu">eachmatch</span>(sel<span class="st">"tr"</span>,page.root);</span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>rows[<span class="fl">3</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<!-- cspell: enable -->
</section>
<section id="creating-the-dataframe" class="level2">
<h2 class="anchored" data-anchor-id="creating-the-dataframe">Creating the DataFrame</h2>
<p>Now that each row of the table is stored in a vector we need to rebuild the table into a dataframe in Julia. First I am intializing an empty dataframe by creating each column that will hold data. The column names match those of the header in the table on the website. Then I loop through each item in the rows vector. The text variable is a vector of all the td elements in the row. If the text vector is not empty and has more than one item in it, then we loop through the items and push the text into the row_data vector. Finally we push the row_data vector into the dataframe created prior to the loop. By having the nested if I can remove the footer column at the end of the table from the website. The website table header uses a different CSS selector than the rest of the table but the footer does not. At the end of the loop I now have the same table that is on the website but stored as a dataframe in Julia.</p>
<div id="6" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># create empty DataFrame and then populate it with the table from website</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> <span class="fu">DataFrame</span>(train <span class="op">=</span> <span class="dt">String</span>[], origin_date <span class="op">=</span> [], station <span class="op">=</span> <span class="dt">String</span>[], sch_dp <span class="op">=</span> [], act_dp <span class="op">=</span> <span class="dt">String</span>[], comments <span class="op">=</span> [], s_disrupt <span class="op">=</span> [], cancellations <span class="op">=</span> [])</span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> rows</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> text <span class="op">=</span> <span class="fu">eachmatch</span>(<span class="fu">Selector</span>(<span class="st">"td"</span>), i)</span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> row_data <span class="op">=</span> []</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> !<span class="fu">isempty</span>(text) <span class="op">&amp;&amp;</span> <span class="fu">length</span>(text) <span class="op">&gt;</span> <span class="fl">1</span></span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="cf">for</span> item <span class="kw">in</span> text</span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">push!</span>(row_data, <span class="fu">nodeText</span>(item))</span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> <span class="cf">end</span></span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">push!</span>(df, row_data)</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> <span class="cf">end</span></span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a><span class="cf">end</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="cleaning-the-dataframe" class="level2">
<h2 class="anchored" data-anchor-id="cleaning-the-dataframe">Cleaning the DataFrame</h2>
<p>Coming from R I am quite familiar with data cleaning using dpylr and the rest of the tidyverse packages. When looking at options I really liked what the DataFramesMeta package brings, so I have used that here to get the data were I want it. I first filter out any trains that have a service disruption as well as any that are blank in the departure column. Next I select only the station, train, and the comments column. I originally tried using the two departure columns but was having an issue with trains that arrived at the stations on day but then left the next. These were causing the delay to be quite large as it was calculating as if it actually left before arriving. The comments column has what I needed I just had to pull the string out and convert it to a numeric. After selecting the columns I first create the delay column. This pulled the comment string out of the comment column only if it contains Dp: as this indicates how late or early the train left. Next I am pulling out the time in minutes and hours from the delay string and converting those numbers to integers. The total delay column adds the minutes and hours together and if the word late is not in the column it will convert the number to negative. A negative delay in this case means the train left early. Finally I transform the columns to categorical so that they are easier to work with in the future. You can notice that for the last transformation I could not figure out how to select two columns using the transform macro. Also for those coming from R note the .=&gt; this is the broadcast operator and it lets Julia know to perform the action on the entire vector (I think I am explaining this right!) I end the block by showing the first 5 rows of the modified dataframe.</p>
<div id="8" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>mod_df <span class="op">=</span> <span class="pp">@chain</span> df <span class="cf">begin</span></span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rsubset</span> <span class="op">:</span>act_dp <span class="op">!=</span> <span class="st">""</span> <span class="op">&amp;&amp;</span> <span class="op">:</span>s_disrupt <span class="op">!=</span> <span class="st">"SD"</span></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="pp">@select</span> <span class="op">:</span>train <span class="op">:</span>station <span class="op">:</span>comments</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="co">#can't perform match if there is nothing there</span></span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rtransform</span> <span class="op">:</span>delay <span class="op">=</span> <span class="fu">occursin</span>(<span class="st">r"Dp:"</span>, <span class="op">:</span>comments) ? <span class="fu">match</span>(<span class="st">r"Dp:</span><span class="sc">.*</span><span class="st">"</span>, <span class="op">:</span>comments).match <span class="op">:</span> <span class="st">""</span></span>
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rtransform</span> <span class="op">:</span>min <span class="op">=</span> <span class="fu">occursin</span>(<span class="st">r"min"</span>, <span class="op">:</span>delay) ? <span class="fu">parse</span>(<span class="dt">Int</span>,<span class="fu">match</span>(<span class="st">r"</span><span class="ch">([</span><span class="st">0-9</span><span class="ch">]</span><span class="sc">*</span><span class="ch">)</span><span class="st"> min"</span>, <span class="op">:</span>delay)[<span class="fl">1</span>]) <span class="op">:</span> <span class="fu">Int</span>(<span class="fl">0</span>)</span>
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rtransform</span> <span class="op">:</span>hour <span class="op">=</span> <span class="fu">occursin</span>(<span class="st">r"hr"</span>, <span class="op">:</span>delay) ? <span class="fu">parse</span>(<span class="dt">Int</span>,<span class="fu">match</span>(<span class="st">r"</span><span class="ch">([</span><span class="st">0-9</span><span class="ch">]</span><span class="sc">*</span><span class="ch">)</span><span class="st"> hr"</span>, <span class="op">:</span>delay)[<span class="fl">1</span>]) <span class="op">*</span><span class="fl">60</span> <span class="op">:</span> <span class="fu">Int</span>(<span class="fl">0</span>)</span>
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rtransform</span> <span class="op">:</span>total_delay_mins <span class="op">=</span> <span class="op">:</span>min <span class="op">+</span> <span class="op">:</span>hour <span class="op">|&gt;</span> x <span class="op">-&gt;</span> <span class="fu">occursin</span>(<span class="st">r"late"</span>, <span class="op">:</span>delay) ? x <span class="op">:</span> x <span class="op">*-</span><span class="fl">1</span> <span class="co">#if word late does not appear, train left early</span></span>
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">transform</span>([<span class="op">:</span>station, <span class="op">:</span>train] <span class="op">.=&gt;</span> categorical, renamecols <span class="op">=</span> <span class="cn">false</span>)</span>
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a><span class="cf">end</span></span>
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a><span class="fu">first</span>(mod_df, <span class="fl">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-display" data-execution_count="1">
<div><div style="float: left;"><span>5×7 DataFrame</span></div><div style="clear: both;"></div></div><div class="data-frame" style="overflow-x: scroll;">
<table class="data-frame caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th class="rowNumber" data-quarto-table-cell-role="th" style="text-align: right; font-weight: bold;">Row</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">train</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">station</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">comments</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">delay</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">min</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">hour</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">total_delay_mins</th>
</tr>
<tr class="odd subheader headerLastRow">
<th class="rowNumber" data-quarto-table-cell-role="th" style="text-align: right; font-weight: bold;"></th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="CategoricalArrays.CategoricalValue{String, UInt32}">Cat…</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="CategoricalArrays.CategoricalValue{String, UInt32}">Cat…</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Any">Any</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="AbstractString">Abstract…</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">1</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">RMT</td>
<td style="text-align: left;">Dp: 1 min late.</td>
<td style="text-align: left;">Dp: 1 min late.</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">1</td>
</tr>
<tr class="even">
<td class="rowNumber" style="text-align: right; font-weight: bold;">2</td>
<td style="text-align: left;">98</td>
<td style="text-align: left;">FLO</td>
<td style="text-align: left;">Ar: 7 min early. | Dp: On time.</td>
<td style="text-align: left;">Dp: On time.</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">3</td>
<td style="text-align: left;">98</td>
<td style="text-align: left;">KTR</td>
<td style="text-align: left;">Dp: 12 min late.</td>
<td style="text-align: left;">Dp: 12 min late.</td>
<td style="text-align: right;">12</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">12</td>
</tr>
<tr class="even">
<td class="rowNumber" style="text-align: right; font-weight: bold;">4</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">PTB</td>
<td style="text-align: left;">Dp: 6 min late.</td>
<td style="text-align: left;">Dp: 6 min late.</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">6</td>
</tr>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">5</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">RVR</td>
<td style="text-align: left;">Ar: 8 min late. | Dp: 5 min late.</td>
<td style="text-align: left;">Dp: 5 min late.</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">5</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</section>
<section id="grouping-and-summarizing" class="level2">
<h2 class="anchored" data-anchor-id="grouping-and-summarizing">Grouping and Summarizing</h2>
<p>Now that I have the data I want, I want to group and summarize to create some graphs. Again using DataFramesMeta and the by keyword I can group by the train and station columns and then create the mean, median, max, and min columns. This action felt very to summarize in dplyr. DataFramesMeta does allow you to do the grouping and combining as two separate steps, but the by keyword combines in into one step. I then ordered by the station column and then by the train column. I then created a column that shows the difference in the mean delay between the two trains. I didnt end up using this for now but I might make something with it later. Last I created two columns that contain the level code for the station and train columns. I will talk about the reason for this in the next section. The function levelcode is from the CategoricalArrays package and it creates an integer column that matches the level of the categorical name. Last I display the first 5 rows of the dataframe.</p>
<div id="10" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>gd <span class="op">=</span> <span class="pp">@chain</span> mod_df <span class="cf">begin</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="pp">@by</span> _ [<span class="op">:</span>train,<span class="op">:</span>station] <span class="cf">begin</span></span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>mean <span class="op">=</span> <span class="dt">Float32</span>[<span class="bu">Statistics</span>.<span class="fu">mean</span>(<span class="op">:</span>total_delay_mins)]</span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>median <span class="op">=</span> <span class="bu">Statistics</span>.<span class="fu">median</span>(<span class="op">:</span>total_delay_mins)</span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>max <span class="op">=</span> <span class="fu">maximum</span>(<span class="op">:</span>total_delay_mins)</span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>min <span class="op">=</span> <span class="fu">minimum</span>(<span class="op">:</span>total_delay_mins) </span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">end</span> </span>
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a> <span class="pp">@orderby</span> <span class="op">:</span>station <span class="op">:</span>train</span>
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a> <span class="pp">@groupby</span> <span class="op">:</span>station</span>
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a> <span class="pp">@transform</span> <span class="op">:</span>diff <span class="op">=</span> [<span class="cn">missing</span>; <span class="fu">diff</span>(<span class="op">:</span>mean)]</span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a> <span class="pp">@rtransform</span> _ <span class="cf">begin</span></span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>station_code <span class="op">=</span> <span class="fu">levelcode</span>(<span class="op">:</span>station)</span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a> <span class="op">:</span>train_code <span class="op">=</span> <span class="fu">levelcode</span>(<span class="op">:</span>train)</span>
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">end</span></span>
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a><span class="cf">end</span></span>
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a><span class="fu">first</span>(gd, <span class="fl">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-display" data-execution_count="1">
<div><div style="float: left;"><span>5×9 DataFrame</span></div><div style="clear: both;"></div></div><div class="data-frame" style="overflow-x: scroll;">
<table class="data-frame caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th class="rowNumber" data-quarto-table-cell-role="th" style="text-align: right; font-weight: bold;">Row</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">train</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">station</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">mean</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">median</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">max</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">min</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">diff</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">station_code</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">train_code</th>
</tr>
<tr class="odd subheader headerLastRow">
<th class="rowNumber" data-quarto-table-cell-role="th" style="text-align: right; font-weight: bold;"></th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="CategoricalArrays.CategoricalValue{String, UInt32}">Cat…</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="CategoricalArrays.CategoricalValue{String, UInt32}">Cat…</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Float32">Float32</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Float64">Float64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Union{Missing, Float32}">Float32?</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
<th style="text-align: left;" data-quarto-table-cell-role="th" title="Int64">Int64</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">1</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">ALX</td>
<td style="text-align: right;">70.4</td>
<td style="text-align: right;">50.0</td>
<td style="text-align: right;">287</td>
<td style="text-align: right;">0</td>
<td style="text-align: right; font-style: italic;">missing</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">1</td>
</tr>
<tr class="even">
<td class="rowNumber" style="text-align: right; font-weight: bold;">2</td>
<td style="text-align: left;">98</td>
<td style="text-align: left;">ALX</td>
<td style="text-align: right;">101.387</td>
<td style="text-align: right;">77.0</td>
<td style="text-align: right;">399</td>
<td style="text-align: right;">-16</td>
<td style="text-align: right;">30.9871</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">2</td>
</tr>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">3</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">BAL</td>
<td style="text-align: right;">53.3333</td>
<td style="text-align: right;">27.0</td>
<td style="text-align: right;">267</td>
<td style="text-align: right;">3</td>
<td style="text-align: right; font-style: italic;">missing</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">1</td>
</tr>
<tr class="even">
<td class="rowNumber" style="text-align: right; font-weight: bold;">4</td>
<td style="text-align: left;">98</td>
<td style="text-align: left;">BAL</td>
<td style="text-align: right;">120.226</td>
<td style="text-align: right;">104.0</td>
<td style="text-align: right;">414</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">66.8925</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">2</td>
</tr>
<tr class="odd">
<td class="rowNumber" style="text-align: right; font-weight: bold;">5</td>
<td style="text-align: left;">97</td>
<td style="text-align: left;">CHS</td>
<td style="text-align: right;">71.1</td>
<td style="text-align: right;">53.0</td>
<td style="text-align: right;">286</td>
<td style="text-align: right;">0</td>
<td style="text-align: right; font-style: italic;">missing</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">1</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</section>
<section id="plotting" class="level2">
<h2 class="anchored" data-anchor-id="plotting">Plotting</h2>
<p>Coming from R and the ggplot package (also having played around a bit in Plotly for R) there was a rather step learning curve to Makie! I do feel there is a ton of flexibility in Makie, but learning to use it is a beast, and was probably the hardest part of this whole thing. The first challenge was Makie does not like categorical variables (at least for barplots, dont know if this is always true), thus the need for using the level codes so I could pass a numerical vector to the x axis. I am then able to label that axis with the categorical labels. Makie does also allow you to just call the barplot function without all the other set up, and it will automatically create the figure and axis, however I wanted to do it manually and really build up the graph. First step was setting a color gradient, I used Dark2 from the ColorBrewer schemes, just as a personal preference for one I really like. Next up I create the figure. Directly from the Makie docs, The Figure is the outermost container object. I could pass some arguments to the Figure constructor, and change size or colors, but for this one I just left everything as the defaults. Next up is creating the axis. I placed it at position 1,1 within the previously created figure. I also pass labels for the x and y axis, a title, and then the labels for the xticks. The label roation is in radian so pi/2 rotates the labels 90 degrees. Next I generate the barplot. Not the ! in the function call allows for plotting on an existing axis. (<a href="https://docs.julialang.org/en/v1/manual/style-guide/#bang-convention">More info on the Bang Operator</a>) Last I set up Labels and Colors for the Legend, and the place the Legend at position 1,2 of the existing figure.</p>
<div id="12" class="cell" data-execution_count="1">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode julia code-with-copy"><code class="sourceCode julia"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>colors <span class="op">=</span> <span class="fu">cgrad</span>(<span class="op">:</span>Dark2_6)</span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>f <span class="op">=</span> <span class="fu">Figure</span>();</span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>ax <span class="op">=</span> <span class="fu">Axis</span>(f[<span class="fl">1</span>,<span class="fl">1</span>], xlabel <span class="op">=</span> <span class="st">"Station"</span>, ylabel <span class="op">=</span> <span class="st">"Mean Delay (mins)"</span>, title <span class="op">=</span> <span class="st">"Mean Delay by Station"</span>, xticks <span class="op">=</span> (<span class="fl">1</span><span class="op">:</span><span class="fu">length</span>(<span class="fu">levels</span>(gd.station_code)), <span class="fu">levels</span>(gd.station)), xticklabelrotation <span class="op">=</span> <span class="cn">pi</span><span class="op">/</span><span class="fl">2</span>)</span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="fu">barplot!</span>(ax, gd.station_code, gd.mean, dodge <span class="op">=</span> gd.train_code, color <span class="op">=</span> colors[gd.train_code]) </span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>labels <span class="op">=</span> [<span class="st">"</span><span class="sc">$</span>i<span class="st">"</span> for i <span class="kw">in</span> <span class="fu">unique</span>(gd.train)]</span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>elements <span class="op">=</span> [<span class="fu">PolyElement</span>(polycolor <span class="op">=</span> colors[i]) for i <span class="kw">in</span> <span class="fu">unique</span>(gd.train_code)]</span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a><span class="fu">Legend</span>(f[<span class="fl">1</span>,<span class="fl">2</span>],elements, labels, <span class="st">"Train Number"</span>)</span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>f</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-display" data-execution_count="1">
<div>
<figure class="figure">
<p><img src="index_files/figure-html/cell-7-output-1.svg" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
</div>
</section>
<section id="conclusion" class="level2">
<h2 class="anchored" data-anchor-id="conclusion">Conclusion</h2>
<p>There is still a lot that could be done with this data set, and I am interested to keep playing around with it to see what kind of insights I could gather. Overall I learned a lot about Julia but as I learned with R there is always more to learn! I look forward to see where this journey takes me.</p>
</section>
<div id="quarto-appendix" class="default"><section class="quarto-appendix-contents" id="quarto-reuse"><h2 class="anchored quarto-appendix-heading">Reuse</h2><div class="quarto-appendix-contents"><div><a rel="license" href="https://creativecommons.org/licenses/by/4.0/">CC BY 4.0</a></div></div></section><section class="quarto-appendix-contents" id="quarto-citation"><h2 class="anchored quarto-appendix-heading">Citation</h2><div><div class="quarto-appendix-secondary-label">BibTeX citation:</div><pre class="sourceCode code-with-copy quarto-appendix-bibtex"><code class="sourceCode bibtex">@online{belanger2024,
author = {Belanger, Kyle},
title = {Learning {Julia} by {WebScraping} {Amtrak} {Data}},
2024-08-27 11:43:02 -04:00
date = {2024-08-27},
2024-08-23 16:30:17 -04:00
langid = {en}
}
</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre><div class="quarto-appendix-secondary-label">For attribution, please cite this work as:</div><div id="ref-belanger2024" class="csl-entry quarto-appendix-citeas" role="listitem">
Belanger, Kyle. 2024. <span>“Learning Julia by WebScraping Amtrak
2024-08-27 11:43:02 -04:00
Data.”</span> August 27, 2024.
2024-08-23 16:30:17 -04:00
</div></div></section></div></main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
// For code content inside modals, clipBoardJS needs to be initialized with a container option
// TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>