quarto-blog/_site/posts/2020-02-13_basic-who-TB-data/basic-exploration-of-who-tuberculosis-data.html
2024-06-08 08:28:40 -04:00

925 lines
No EOL
65 KiB
HTML

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.4.553">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Kyle Belanger">
<meta name="dcterms.date" content="2020-02-13">
<title>Kyle Belanger - Basic Exploration of WHO Tuberculosis Data</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
<script src="../../site_libs/quarto-nav/headroom.min.js"></script>
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="../../">
<script src="../../site_libs/quarto-html/quarto.js"></script>
<script src="../../site_libs/quarto-html/popper.min.js"></script>
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "end",
"type": "overlay",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<link rel="stylesheet" href="../../styles.css">
</head>
<body class="floating nav-fixed">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top quarto-banner">
<nav class="navbar navbar-expand-lg " data-bs-theme="dark">
<div class="navbar-container container-fluid">
<div class="navbar-brand-container mx-auto">
<a class="navbar-brand" href="../../index.html">
<span class="navbar-title">Kyle Belanger</span>
</a>
</div>
<div id="quarto-search" class="" title="Search"></div>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav navbar-nav-scroll ms-auto">
<li class="nav-item">
<a class="nav-link" href="../../blog.html">
<span class="menu-text">Posts</span></a>
</li>
<li class="nav-item">
<a class="nav-link" href="../../kyle_resume.pdf">
<span class="menu-text">Resume</span></a>
</li>
<li class="nav-item compact">
<a class="nav-link" href="https://github.com/mmmmtoasty19"> <i class="bi bi-github" role="img">
</i>
<span class="menu-text"></span></a>
</li>
</ul>
</div> <!-- /navcollapse -->
<div class="quarto-navbar-tools">
</div>
</div> <!-- /container-fluid -->
</nav>
</header>
<!-- content -->
<header id="title-block-header" class="quarto-title-block default toc-left page-columns page-full">
<div class="quarto-title-banner page-columns page-full">
<div class="quarto-title column-body">
<h1 class="title">Basic Exploration of WHO Tuberculosis Data</h1>
<p class="subtitle lead"></p><p>Today I am going to dive into some real life data from the World Health Organization (WHO), exploring new and relapse cases of Tuberculosis. I clean up the data, and then make a few graphs to explore different variables.</p><p></p>
</div>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p><a href="https://kyleb.rbind.io/">Kyle Belanger</a> </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">February 13, 2020</p>
</div>
</div>
</div>
</header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#tldr" id="toc-tldr" class="nav-link active" data-scroll-target="#tldr">TL:DR</a></li>
<li><a href="#load-packages" id="toc-load-packages" class="nav-link" data-scroll-target="#load-packages">Load Packages</a></li>
<li><a href="#load-in-data" id="toc-load-in-data" class="nav-link" data-scroll-target="#load-in-data">Load in Data</a></li>
<li><a href="#transform" id="toc-transform" class="nav-link" data-scroll-target="#transform">Transform</a></li>
<li><a href="#join-data" id="toc-join-data" class="nav-link" data-scroll-target="#join-data">Join Data</a></li>
<li><a href="#analyze" id="toc-analyze" class="nav-link" data-scroll-target="#analyze">Analyze</a>
<ul class="collapse">
<li><a href="#subset" id="toc-subset" class="nav-link" data-scroll-target="#subset">Subset</a></li>
<li><a href="#a-different-way-to-look" id="toc-a-different-way-to-look" class="nav-link" data-scroll-target="#a-different-way-to-look">A different way to look</a>
<ul class="collapse">
<li><a href="#further-exploration" id="toc-further-exploration" class="nav-link" data-scroll-target="#further-exploration">Further Exploration</a></li>
</ul></li>
<li><a href="#last-exploration" id="toc-last-exploration" class="nav-link" data-scroll-target="#last-exploration">Last Exploration</a></li>
</ul></li>
<li><a href="#next-steps" id="toc-next-steps" class="nav-link" data-scroll-target="#next-steps">Next Steps</a></li>
</ul>
</nav>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
</div>
<!-- main -->
<main class="content quarto-banner-title-block" id="quarto-document-content">
<section id="tldr" class="level1">
<h1>TL:DR</h1>
<p>Today I am going to dive into some real life data from the World Health Organization (WHO), exploring new and relapse cases of Tuberculosis. I clean up the data, and then make a few graphs to explore different variables.</p>
</section>
<section id="load-packages" class="level1">
<h1>Load Packages</h1>
<p>Since I am going to use quite a few packages in the tidyverse I am going to load them all in at once instead of individually.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyverse)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="load-in-data" class="level1">
<h1>Load in Data</h1>
<p>We are using the WHO data set which contains tuberculosis (TB) cases broken down by year, this data set is contained in the Tidyr package, however its only recent up to 2014. For a little added fun I have downloaded the latest data from the WHO website, <a href="https://www.who.int/tb/country/data/download/en/">Found here</a>. For some added fun I have also included GDP per Capita data from World bank <a href="https://data.worldbank.org/indicator/NY.GDP.PCAP.CD">Found here</a></p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>who_raw <span class="ot">&lt;-</span> <span class="fu">read.csv</span>(<span class="st">"TB_notifications_2020-02-11.csv"</span>)</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co">#GPD file contains 4 rows of instrusctions above the actually data, we can tell</span></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="co">#read.csv to skip these using the skip command</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>gpd_raw <span class="ot">&lt;-</span> <span class="fu">read.csv</span>(<span class="st">"API_NY.GDP.PCAP.CD_DS2_en_csv_v2_713080.csv"</span>,</span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a> <span class="at">skip =</span> <span class="dv">4</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="transform" class="level1">
<h1>Transform</h1>
<p>This data set is very ugly looking! The first 3 columns are all country Identifiers, with column four indicating the WHO region. This is redundant and can be dropped down to one Identifier and Region. As we can see there are quite a few Variable columns that are in fact values and not true Variables. When reading the data dictionary for this data set, WHO has changed their reporting over the years, so for our purposes we can strip a lot of the extra data out. Lets try and look at three types of TB, Extrapulmonary, Lab Diagnosed, and Clinician Diagnosed. As well as try and look at the breakdowns by Age and Sex of new and relapse case (post 2012) Lots of Cleaning to do, lets get to it!</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>who1 <span class="ot">&lt;-</span> who_raw <span class="sc">%&gt;%</span> </span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="co">#lets drop some columns not needed for our exploration, what each column means can be found in the CSV Data dictionary file</span></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>iso2</span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>iso_numeric</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>(rdx_data_available<span class="sc">:</span>hiv_reg_new2)</span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>(new_sp<span class="sc">:</span>rel_in_agesex_flg)</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">%&gt;%</span></span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="co">#Lets just look at new date</span></span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(year <span class="sc">&gt;=</span> <span class="dv">2013</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> <span class="co">#Move the values that are currently stored as variables to observations</span></span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_longer</span>(<span class="at">cols =</span> newrel_m04<span class="sc">:</span>newrel_sexunkageunk</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> ,<span class="at">names_to =</span> <span class="st">"key"</span></span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> ,<span class="at">values_to =</span> <span class="st">"values"</span></span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">%&gt;%</span> </span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">separate</span>(<span class="at">col =</span> key</span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> ,<span class="at">into =</span> <span class="fu">c</span>(<span class="st">"new"</span>,<span class="st">"sexage"</span>)</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> ,<span class="at">sep =</span> <span class="st">"_"</span></span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">%&gt;%</span> </span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> <span class="co">#the data set contains male, female and unknown</span></span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate_if</span>(is.character</span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> ,str_replace_all</span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> ,<span class="at">pattern =</span> <span class="st">"sexunk"</span></span>
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> , <span class="at">replacement =</span> <span class="st">"u"</span></span>
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">%&gt;%</span> </span>
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a> <span class="fu">separate</span>(<span class="at">col =</span> sexage</span>
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a> ,<span class="at">into =</span> <span class="fu">c</span>(<span class="st">"sex"</span>,<span class="st">"age"</span>)</span>
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a> ,<span class="at">sep =</span> <span class="dv">1</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">age_start =</span> <span class="fu">case_when</span>(</span>
<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a> <span class="fu">str_detect</span>(age, <span class="st">"65"</span>) <span class="sc">~</span> <span class="st">"65"</span></span>
<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">2</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S"</span>)</span>
<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">3</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S"</span>)</span>
<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">4</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S</span><span class="sc">\\</span><span class="st">S"</span>)</span>
<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a> ,<span class="cn">TRUE</span> <span class="sc">~</span> <span class="st">""</span></span>
<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a> )</span>
<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a> ,<span class="at">age_end =</span> <span class="fu">case_when</span>(</span>
<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a> <span class="fu">str_detect</span>(age, <span class="st">"65"</span>) <span class="sc">~</span> <span class="st">"&amp; Over"</span></span>
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">2</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S$"</span>)</span>
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">3</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S</span><span class="sc">\\</span><span class="st">S$"</span>)</span>
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_length</span>(age) <span class="sc">==</span> <span class="dv">4</span>) <span class="sc">~</span> <span class="fu">str_match</span>(age, <span class="st">"</span><span class="sc">\\</span><span class="st">S</span><span class="sc">\\</span><span class="st">S$"</span>)</span>
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a> ,<span class="cn">TRUE</span> <span class="sc">~</span> <span class="st">""</span></span>
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a> ))</span>
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a><span class="co">#overall WHO data is now cleaned and tidy. </span></span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a><span class="co"># Lets tidy up the GPD data so we can match it to our WHO data set</span></span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a>gdp1 <span class="ot">&lt;-</span> gpd_raw <span class="sc">%&gt;%</span> </span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>(Indicator.Name<span class="sc">:</span>X2012)</span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>X2019</span>
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>X) <span class="sc">%&gt;%</span> </span>
<span id="cb3-51"><a href="#cb3-51" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_longer</span>(<span class="at">cols =</span> X2013<span class="sc">:</span>X2018</span>
<span id="cb3-52"><a href="#cb3-52" aria-hidden="true" tabindex="-1"></a> ,<span class="at">names_to =</span> <span class="st">"year"</span> </span>
<span id="cb3-53"><a href="#cb3-53" aria-hidden="true" tabindex="-1"></a> ,<span class="at">values_to =</span> <span class="st">"gdp"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb3-54"><a href="#cb3-54" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate_if</span>(is.character</span>
<span id="cb3-55"><a href="#cb3-55" aria-hidden="true" tabindex="-1"></a> ,str_remove_all</span>
<span id="cb3-56"><a href="#cb3-56" aria-hidden="true" tabindex="-1"></a> ,<span class="at">pattern =</span> <span class="st">"X(?=</span><span class="sc">\\</span><span class="st">d*)"</span>) <span class="co"># regex to check for an X followed by a digit</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="join-data" class="level1">
<h1>Join Data</h1>
<p>Lets combine the data sets so we can later visual TB Cases based on a countries GDP per capita.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>who_combined <span class="ot">&lt;-</span> who1 <span class="sc">%&gt;%</span> </span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">rename</span>(<span class="at">Country.Code =</span> iso3) <span class="sc">%&gt;%</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">year =</span> <span class="fu">as.character</span>(year)) <span class="sc">%&gt;%</span> </span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">left_join</span>(<span class="at">y =</span> gdp1) <span class="sc">%&gt;%</span> </span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>Country.Name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="analyze" class="level1">
<h1>Analyze</h1>
<p>Lets first explore 2018 and see if GDP has any affect on the amount of TB cases in a particular country.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>g1 <span class="ot">&lt;-</span> who_combined <span class="sc">%&gt;%</span> </span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(<span class="fu">str_detect</span>(age,<span class="st">"014|15plus|u"</span>),year <span class="sc">==</span> <span class="dv">2018</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(country) <span class="sc">%&gt;%</span> </span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">sum_tb_cases =</span> (<span class="fu">sum</span>(values,<span class="at">na.rm =</span> <span class="cn">TRUE</span>)<span class="sc">/</span><span class="dv">10000</span>)</span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> ,<span class="at">gdp =</span> <span class="fu">first</span>(gdp)<span class="sc">/</span><span class="dv">1000</span></span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> ,<span class="at">who_region =</span> <span class="fu">first</span>(g_whoregion)) <span class="sc">%&gt;%</span> </span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a> <span class="at">label =</span> <span class="fu">ifelse</span>((sum_tb_cases<span class="sc">&gt;</span><span class="dv">50</span>), <span class="at">yes =</span> <span class="fu">as.character</span>(country),<span class="at">no =</span> <span class="st">""</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> gdp, <span class="at">y =</span> sum_tb_cases )) <span class="sc">+</span></span>
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">color =</span> who_region)) <span class="sc">+</span></span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a> ggrepel<span class="sc">::</span><span class="fu">geom_text_repel</span>(<span class="fu">aes</span>(<span class="at">x =</span> gdp, <span class="at">y =</span> sum_tb_cases, <span class="at">label =</span> label)) <span class="sc">+</span></span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Total TB Cases by Country compared to Gross Domestic Product (GDP)"</span></span>
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a> ,<span class="at">x =</span> <span class="st">"GDP (per 1,000USD)"</span></span>
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a> ,<span class="at">y =</span> <span class="st">"Total TB Case (per 10,000 cases)"</span></span>
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a> ,<span class="at">color =</span> <span class="st">"WHO Region"</span></span>
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_bw</span>() </span>
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a>g1</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="basic-exploration-of-who-tuberculosis-data_files/figure-html/unnamed-chunk-5-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<section id="subset" class="level3">
<h3 class="anchored" data-anchor-id="subset">Subset</h3>
<p>Lets subset the above data to remove some of the outliers.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>g2 <span class="ot">&lt;-</span> who_combined <span class="sc">%&gt;%</span> </span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(<span class="fu">str_detect</span>(age,<span class="st">"014|15plus|u"</span>),year <span class="sc">==</span> <span class="dv">2018</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(country) <span class="sc">%&gt;%</span> </span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">sum_tb_cases =</span> (<span class="fu">sum</span>(values,<span class="at">na.rm =</span> <span class="cn">TRUE</span>)<span class="sc">/</span><span class="dv">10000</span>)</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> ,<span class="at">gdp =</span> <span class="fu">first</span>(gdp)<span class="sc">/</span><span class="dv">1000</span></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> ,<span class="at">who_region =</span> <span class="fu">first</span>(g_whoregion)) <span class="sc">%&gt;%</span> </span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="at">label =</span> <span class="fu">ifelse</span>((sum_tb_cases<span class="sc">&gt;</span><span class="dv">50</span>), <span class="at">yes =</span> <span class="fu">as.character</span>(country),<span class="at">no =</span> <span class="st">""</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> gdp, <span class="at">y =</span> sum_tb_cases )) <span class="sc">+</span></span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">color =</span> who_region)) <span class="sc">+</span></span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> ggrepel<span class="sc">::</span><span class="fu">geom_text_repel</span>(<span class="fu">aes</span>(<span class="at">x =</span> gdp, <span class="at">y =</span> sum_tb_cases, <span class="at">label =</span> label)) <span class="sc">+</span></span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Total TB Cases by Country compared to Gross Domestic Product (GDP)"</span></span>
<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a> ,<span class="at">x =</span> <span class="st">"GDP (per 1,000USD)"</span></span>
<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a> ,<span class="at">y =</span> <span class="st">"Total TB Case (per 10,000 cases)"</span></span>
<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a> ,<span class="at">color =</span> <span class="st">"WHO Region"</span></span>
<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">xlim</span>(<span class="dv">0</span>,<span class="dv">50</span>) <span class="sc">+</span></span>
<span id="cb6-19"><a href="#cb6-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylim</span>(<span class="dv">0</span>,<span class="dv">50</span>) <span class="sc">+</span></span>
<span id="cb6-20"><a href="#cb6-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_bw</span>() </span>
<span id="cb6-21"><a href="#cb6-21" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-22"><a href="#cb6-22" aria-hidden="true" tabindex="-1"></a>g2</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="basic-exploration-of-who-tuberculosis-data_files/figure-html/unnamed-chunk-6-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>We can see in the graph above there seems to be a small correlation between lower GDP and amount of TB cases.</p>
</section>
<section id="a-different-way-to-look" class="level2">
<h2 class="anchored" data-anchor-id="a-different-way-to-look">A different way to look</h2>
<p>Could there be any correlation between a countries population and the amount of TB cases? Maybe its just as simple as having more people means more people to get sick? Lets bring in another data set, again from World Bank <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">Found Here</a>, this contains total population data by country.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pop_raw <span class="ot">&lt;-</span> <span class="fu">read.csv</span>(<span class="st">"API_SP.POP.TOTL_DS2_en_csv_v2_713131.csv"</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> ,<span class="at">skip =</span> <span class="dv">4</span>)</span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co">#If this looks famialer its because it is, the data set looks very simalar to the GDP data</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="co">#In the future this could be moved to a function to allow cleaning much easier</span></span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>pop1 <span class="ot">&lt;-</span> pop_raw <span class="sc">%&gt;%</span> </span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>(Indicator.Name<span class="sc">:</span>X2012)</span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>X2019</span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">-</span>X) <span class="sc">%&gt;%</span> </span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_longer</span>(<span class="at">cols =</span> X2013<span class="sc">:</span>X2018</span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a> ,<span class="at">names_to =</span> <span class="st">"year"</span> </span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a> ,<span class="at">values_to =</span> <span class="st">"population"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate_if</span>(is.character</span>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a> ,str_remove_all</span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a> ,<span class="at">pattern =</span> <span class="st">"X(?=</span><span class="sc">\\</span><span class="st">d*)"</span>)</span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a><span class="co">#now lets combine this into are overall data set</span></span>
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>who_combined <span class="ot">&lt;-</span> who_combined <span class="sc">%&gt;%</span> </span>
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">year =</span> <span class="fu">as.character</span>(year)) <span class="sc">%&gt;%</span> </span>
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">left_join</span>(<span class="at">y =</span> pop1) <span class="sc">%&gt;%</span> </span>
<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>Country.Name)</span>
<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a><span class="co">#now lets Graph again</span></span>
<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>g3 <span class="ot">&lt;-</span> who_combined <span class="sc">%&gt;%</span> </span>
<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(<span class="fu">str_detect</span>(age,<span class="st">"014|15plus|u"</span>),year <span class="sc">==</span> <span class="dv">2018</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(country) <span class="sc">%&gt;%</span> </span>
<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">sum_tb_cases =</span> (<span class="fu">sum</span>(values,<span class="at">na.rm =</span> <span class="cn">TRUE</span>)<span class="sc">/</span><span class="dv">10000</span>)</span>
<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a> ,<span class="at">population =</span> <span class="fu">first</span>(population)<span class="sc">/</span><span class="dv">1000000</span></span>
<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a> ,<span class="at">who_region =</span> <span class="fu">first</span>(g_whoregion)) <span class="sc">%&gt;%</span> </span>
<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a> <span class="at">label =</span> <span class="fu">ifelse</span>((population<span class="sc">&gt;</span><span class="dv">250</span>), <span class="at">yes =</span> <span class="fu">as.character</span>(country),<span class="at">no =</span> <span class="st">""</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> population, <span class="at">y =</span> sum_tb_cases )) <span class="sc">+</span></span>
<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">color =</span> who_region)) <span class="sc">+</span></span>
<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a> ggrepel<span class="sc">::</span><span class="fu">geom_text_repel</span>(<span class="fu">aes</span>(<span class="at">x =</span> population, <span class="at">y =</span> sum_tb_cases, <span class="at">label =</span> label)) <span class="sc">+</span></span>
<span id="cb7-36"><a href="#cb7-36" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
<span id="cb7-37"><a href="#cb7-37" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"Total TB Cases by Country compared to Gross Domestic Product (GDP)"</span></span>
<span id="cb7-38"><a href="#cb7-38" aria-hidden="true" tabindex="-1"></a> ,<span class="at">x =</span> <span class="st">"Population (in Millions)"</span></span>
<span id="cb7-39"><a href="#cb7-39" aria-hidden="true" tabindex="-1"></a> ,<span class="at">y =</span> <span class="st">"Total TB Case (per 10,000 cases)"</span></span>
<span id="cb7-40"><a href="#cb7-40" aria-hidden="true" tabindex="-1"></a> ,<span class="at">color =</span> <span class="st">"WHO Region"</span></span>
<span id="cb7-41"><a href="#cb7-41" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">+</span></span>
<span id="cb7-42"><a href="#cb7-42" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_bw</span>() </span>
<span id="cb7-43"><a href="#cb7-43" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb7-44"><a href="#cb7-44" aria-hidden="true" tabindex="-1"></a> g3 </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="basic-exploration-of-who-tuberculosis-data_files/figure-html/unnamed-chunk-7-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<section id="further-exploration" class="level3">
<h3 class="anchored" data-anchor-id="further-exploration">Further Exploration</h3>
<p>Maybe we are on to something, the more people, the more likely they are to get sick! However India seems to have a very large number of cases so lets break these cases down further by age group for 2018.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>g4 <span class="ot">&lt;-</span> who_combined <span class="sc">%&gt;%</span> </span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(year <span class="sc">==</span> <span class="dv">2018</span></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> ,country <span class="sc">==</span> <span class="st">"India"</span></span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> ,<span class="sc">!</span>(<span class="fu">str_detect</span>(age,<span class="st">"15plus|ageunk|u|014"</span>))</span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> ,(<span class="fu">str_detect</span>(sex,<span class="st">"m|f"</span>))</span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">%&gt;%</span> </span>
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">age_range =</span> glue<span class="sc">::</span><span class="fu">glue</span>(<span class="st">"{age_start} -- {age_end}"</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(age_range, <span class="fu">as.numeric</span>(age_start)), <span class="at">y =</span> (values<span class="sc">/</span><span class="dv">1000</span>), <span class="at">fill =</span> sex)) <span class="sc">+</span></span>
<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_col</span>(<span class="at">position =</span> <span class="st">"dodge"</span>) <span class="sc">+</span></span>
<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"TB Case in India by age and gender 2018"</span></span>
<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a> ,<span class="at">x =</span> <span class="cn">NULL</span></span>
<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a> ,<span class="at">y =</span> <span class="st">"Total Cases (per 1000)"</span></span>
<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a> ,<span class="at">fill =</span> <span class="st">"Gender"</span>) <span class="sc">+</span></span>
<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_manual</span>(<span class="at">labels =</span> <span class="fu">c</span>(<span class="st">"Female"</span>,<span class="st">"Male"</span>), <span class="at">values =</span> <span class="fu">c</span>(<span class="st">"#e9a3c9"</span>,<span class="st">"#67a9cf"</span>) )</span>
<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a>g4</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="basic-exploration-of-who-tuberculosis-data_files/figure-html/unnamed-chunk-8-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>There seems to be a huge spike in cases after adolescences. Females have a sharp decline the older they get, where as male case stay elevated with a slight decrease at 55.</p>
</section>
</section>
<section id="last-exploration" class="level2">
<h2 class="anchored" data-anchor-id="last-exploration">Last Exploration</h2>
<p>Lets look at overall cases in India, going back to 1980 and see if there as been any trends. To get these numbers we will go back to our raw data and strip everything out expect the total count</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>g5 <span class="ot">&lt;-</span> who_raw <span class="sc">%&gt;%</span> </span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(country <span class="sc">==</span> <span class="st">"India"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(year, c_newinc) <span class="sc">%&gt;%</span> </span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> year, <span class="at">y =</span> c_newinc<span class="sc">/</span><span class="dv">1000000</span>)) <span class="sc">+</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>() <span class="sc">+</span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">labs</span>(</span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> <span class="at">title =</span> <span class="st">"New and Relapse Tuberculosis Cases In India </span><span class="sc">\n</span><span class="st">1980 -- 2018"</span></span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a> ,<span class="at">x =</span> <span class="cn">NULL</span></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a> ,<span class="at">y =</span> <span class="st">"Total Cases (in millions)"</span>) <span class="sc">+</span></span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme_bw</span>() <span class="sc">+</span></span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">hjust =</span> <span class="fl">0.5</span>)) <span class="sc">+</span> <span class="co">#center title </span></span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_continuous</span>(<span class="at">breaks =</span> <span class="fu">seq</span>(<span class="dv">1980</span>,<span class="dv">2020</span>,<span class="dv">5</span>)) <span class="sc">+</span></span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_y_continuous</span>(<span class="at">breaks =</span> scales<span class="sc">::</span><span class="fu">pretty_breaks</span>(<span class="at">n=</span><span class="dv">10</span>)) <span class="co">#different way to add tick marks</span></span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>g5</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="basic-exploration-of-who-tuberculosis-data_files/figure-html/unnamed-chunk-9-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>Cases were steadily rising from 1980 to 1990, then suddenly feel off. Starting in the early 2010s there was a sharp increase and the amount of new and relapse cases just keep growing.</p>
</section>
</section>
<section id="next-steps" class="level1">
<h1>Next Steps</h1>
<p>While no other country has the amount of cases that India does, the sudden spike in cases at adolescences asks the question do other countries follow this same trend? We can also see the sudden spike in the 2010s, again is this just based in India or do we see this trend in other countries. There is much more exploration we can do with this data set at a later time!</p>
</section>
<div id="quarto-appendix" class="default"><section class="quarto-appendix-contents" id="quarto-reuse"><h2 class="anchored quarto-appendix-heading">Reuse</h2><div class="quarto-appendix-contents"><div><a rel="license" href="https://creativecommons.org/licenses/by/4.0/">CC BY 4.0</a></div></div></section><section class="quarto-appendix-contents" id="quarto-citation"><h2 class="anchored quarto-appendix-heading">Citation</h2><div><div class="quarto-appendix-secondary-label">BibTeX citation:</div><pre class="sourceCode code-with-copy quarto-appendix-bibtex"><code class="sourceCode bibtex">@online{belanger2020,
author = {Belanger, Kyle},
title = {Basic {Exploration} of {WHO} {Tuberculosis} {Data}},
date = {2020-02-13},
langid = {en}
}
</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre><div class="quarto-appendix-secondary-label">For attribution, please cite this work as:</div><div id="ref-belanger2020" class="csl-entry quarto-appendix-citeas" role="listitem">
Belanger, Kyle. 2020. <span>“Basic Exploration of WHO Tuberculosis
Data.”</span> February 13, 2020.
</div></div></section></div></main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const clipboard = new window.ClipboardJS('.code-copy-button', {
text: function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>