EVOLUTION-MANAGER
Edit File: rectangle.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Rectangling</title> <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); </script> <script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> // v0.0.1 // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. document.addEventListener('DOMContentLoaded', function() { const codeList = document.getElementsByClassName("sourceCode"); for (var i = 0; i < codeList.length; i++) { var linkList = codeList[i].getElementsByTagName('a'); for (var j = 0; j < linkList.length; j++) { if (linkList[j].innerHTML === "") { linkList[j].setAttribute('aria-hidden', 'true'); } } } }); </script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css" data-origin="pandoc"> pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } for (var j = 0; j < rules.length; j++) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') continue; // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Rectangling</h1> <div id="introduction" class="section level2"> <h2>Introduction</h2> <p>Rectangling is the art and craft of taking a deeply nested list (often sourced from wild caught JSON or XML) and taming it into a tidy data set of rows and columns. There are three functions from tidyr that are particularly useful for rectangling:</p> <ul> <li><code>unnest_longer()</code> takes each element of a list-column and makes a new row.</li> <li><code>unnest_wider()</code> takes each element of a list-column and makes a new column.</li> <li><code>unnest_auto()</code> guesses whether you want <code>unnest_longer()</code> or <code>unnest_wider()</code>.</li> <li><code>hoist()</code> is similar to <code>unnest_wider()</code> but only plucks out selected components, and can reach down multiple levels.</li> </ul> <p>A very large number of data rectangling problems can be solved by combining these functions with a splash of dplyr (largely eliminating prior approaches that combined <code>mutate()</code> with multiple <code>purrr::map()</code>s).</p> <p>To illustrate these techniques, we’ll use the repurrrsive package, which provides a number deeply nested lists originally mostly captured from web APIs.</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(tidyr)</span> <span id="cb1-2"><a href="#cb1-2"></a><span class="kw">library</span>(dplyr)</span> <span id="cb1-3"><a href="#cb1-3"></a><span class="kw">library</span>(repurrrsive)</span></code></pre></div> </div> <div id="github-users" class="section level2"> <h2>GitHub users</h2> <p>We’ll start with <code>gh_users</code>, a list which contains information about six GitHub users. To begin, we put the <code>gh_users</code> list into a data frame:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>users <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">user =</span> gh_users)</span></code></pre></div> <p>This seems a bit counter-intuitive: why is the first step in making a list simpler to make it more complicated? But a data frame has a big advantage: it bundles together multiple vectors so that everything is tracked together in a single object.</p> <p>Each <code>user</code> is a named list, where each element represents a column.</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a><span class="kw">names</span>(users<span class="op">$</span>user[[<span class="dv">1</span>]])</span> <span id="cb3-2"><a href="#cb3-2"></a><span class="co">#> [1] "login" "id" "avatar_url" </span></span> <span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> [4] "gravatar_id" "url" "html_url" </span></span> <span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> [7] "followers_url" "following_url" "gists_url" </span></span> <span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> [10] "starred_url" "subscriptions_url" "organizations_url" </span></span> <span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> [13] "repos_url" "events_url" "received_events_url"</span></span> <span id="cb3-7"><a href="#cb3-7"></a><span class="co">#> [16] "type" "site_admin" "name" </span></span> <span id="cb3-8"><a href="#cb3-8"></a><span class="co">#> [19] "company" "blog" "location" </span></span> <span id="cb3-9"><a href="#cb3-9"></a><span class="co">#> [22] "email" "hireable" "bio" </span></span> <span id="cb3-10"><a href="#cb3-10"></a><span class="co">#> [25] "public_repos" "public_gists" "followers" </span></span> <span id="cb3-11"><a href="#cb3-11"></a><span class="co">#> [28] "following" "created_at" "updated_at"</span></span></code></pre></div> <p>There are two ways to turn the list components into columns. <code>unnest_wider()</code> takes every component and makes a new column:</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>users <span class="op">%>%</span><span class="st"> </span><span class="kw">unnest_wider</span>(user)</span> <span id="cb4-2"><a href="#cb4-2"></a><span class="co">#> # A tibble: 6 x 30</span></span> <span id="cb4-3"><a href="#cb4-3"></a><span class="co">#> login id avatar_url gravatar_id url html_url followers_url following_url</span></span> <span id="cb4-4"><a href="#cb4-4"></a><span class="co">#> <chr> <int> <chr> <chr> <chr> <chr> <chr> <chr> </span></span> <span id="cb4-5"><a href="#cb4-5"></a><span class="co">#> 1 gabo… 6.60e5 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> 2 jenn… 5.99e5 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-7"><a href="#cb4-7"></a><span class="co">#> 3 jtle… 1.57e6 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-8"><a href="#cb4-8"></a><span class="co">#> 4 juli… 1.25e7 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-9"><a href="#cb4-9"></a><span class="co">#> 5 leep… 3.51e6 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-10"><a href="#cb4-10"></a><span class="co">#> 6 masa… 8.36e6 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb4-11"><a href="#cb4-11"></a><span class="co">#> # … with 22 more variables: gists_url <chr>, starred_url <chr>,</span></span> <span id="cb4-12"><a href="#cb4-12"></a><span class="co">#> # subscriptions_url <chr>, organizations_url <chr>, repos_url <chr>,</span></span> <span id="cb4-13"><a href="#cb4-13"></a><span class="co">#> # events_url <chr>, received_events_url <chr>, type <chr>, site_admin <lgl>,</span></span> <span id="cb4-14"><a href="#cb4-14"></a><span class="co">#> # name <chr>, company <chr>, blog <chr>, location <chr>, email <chr>,</span></span> <span id="cb4-15"><a href="#cb4-15"></a><span class="co">#> # public_repos <int>, public_gists <int>, followers <int>, following <int>,</span></span> <span id="cb4-16"><a href="#cb4-16"></a><span class="co">#> # created_at <chr>, updated_at <chr>, bio <chr>, hireable <lgl></span></span></code></pre></div> <p>But in this case, there are many components and we don’t need most of them so we can instead use <code>hoist()</code>. <code>hoist()</code> allows us to pull out selected components using the same syntax as <code>purrr::pluck()</code>:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>users <span class="op">%>%</span><span class="st"> </span><span class="kw">hoist</span>(user, </span> <span id="cb5-2"><a href="#cb5-2"></a> <span class="dt">followers =</span> <span class="st">"followers"</span>, </span> <span id="cb5-3"><a href="#cb5-3"></a> <span class="dt">login =</span> <span class="st">"login"</span>, </span> <span id="cb5-4"><a href="#cb5-4"></a> <span class="dt">url =</span> <span class="st">"html_url"</span></span> <span id="cb5-5"><a href="#cb5-5"></a>)</span> <span id="cb5-6"><a href="#cb5-6"></a><span class="co">#> # A tibble: 6 x 4</span></span> <span id="cb5-7"><a href="#cb5-7"></a><span class="co">#> followers login url user </span></span> <span id="cb5-8"><a href="#cb5-8"></a><span class="co">#> <int> <chr> <chr> <list> </span></span> <span id="cb5-9"><a href="#cb5-9"></a><span class="co">#> 1 303 gaborcsardi https://github.com/gaborcsardi <named list [27]></span></span> <span id="cb5-10"><a href="#cb5-10"></a><span class="co">#> 2 780 jennybc https://github.com/jennybc <named list [27]></span></span> <span id="cb5-11"><a href="#cb5-11"></a><span class="co">#> 3 3958 jtleek https://github.com/jtleek <named list [27]></span></span> <span id="cb5-12"><a href="#cb5-12"></a><span class="co">#> 4 115 juliasilge https://github.com/juliasilge <named list [27]></span></span> <span id="cb5-13"><a href="#cb5-13"></a><span class="co">#> 5 213 leeper https://github.com/leeper <named list [27]></span></span> <span id="cb5-14"><a href="#cb5-14"></a><span class="co">#> 6 34 masalmon https://github.com/masalmon <named list [27]></span></span></code></pre></div> <p><code>hoist()</code> removes the named components from the <code>user</code> list-column, so you can think of it as moving components out of the inner list into the top-level data frame.</p> </div> <div id="github-repos" class="section level2"> <h2>GitHub repos</h2> <p>We start off <code>gh_repos</code> similarly, by putting it in a tibble:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>repos <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">repo =</span> gh_repos)</span> <span id="cb6-2"><a href="#cb6-2"></a>repos</span> <span id="cb6-3"><a href="#cb6-3"></a><span class="co">#> # A tibble: 6 x 1</span></span> <span id="cb6-4"><a href="#cb6-4"></a><span class="co">#> repo </span></span> <span id="cb6-5"><a href="#cb6-5"></a><span class="co">#> <list> </span></span> <span id="cb6-6"><a href="#cb6-6"></a><span class="co">#> 1 <list [30]></span></span> <span id="cb6-7"><a href="#cb6-7"></a><span class="co">#> 2 <list [30]></span></span> <span id="cb6-8"><a href="#cb6-8"></a><span class="co">#> 3 <list [30]></span></span> <span id="cb6-9"><a href="#cb6-9"></a><span class="co">#> 4 <list [26]></span></span> <span id="cb6-10"><a href="#cb6-10"></a><span class="co">#> 5 <list [30]></span></span> <span id="cb6-11"><a href="#cb6-11"></a><span class="co">#> 6 <list [30]></span></span></code></pre></div> <p>This time the elements of <code>user</code> are a list of repositories that belong to that user. These are observations, so should become new rows, so we use <code>unnest_longer()</code> rather than <code>unnest_wider()</code>:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>repos <-<span class="st"> </span>repos <span class="op">%>%</span><span class="st"> </span><span class="kw">unnest_longer</span>(repo)</span> <span id="cb7-2"><a href="#cb7-2"></a>repos</span> <span id="cb7-3"><a href="#cb7-3"></a><span class="co">#> # A tibble: 176 x 1</span></span> <span id="cb7-4"><a href="#cb7-4"></a><span class="co">#> repo </span></span> <span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> <list> </span></span> <span id="cb7-6"><a href="#cb7-6"></a><span class="co">#> 1 <named list [68]></span></span> <span id="cb7-7"><a href="#cb7-7"></a><span class="co">#> 2 <named list [68]></span></span> <span id="cb7-8"><a href="#cb7-8"></a><span class="co">#> 3 <named list [68]></span></span> <span id="cb7-9"><a href="#cb7-9"></a><span class="co">#> 4 <named list [68]></span></span> <span id="cb7-10"><a href="#cb7-10"></a><span class="co">#> 5 <named list [68]></span></span> <span id="cb7-11"><a href="#cb7-11"></a><span class="co">#> 6 <named list [68]></span></span> <span id="cb7-12"><a href="#cb7-12"></a><span class="co">#> # … with 170 more rows</span></span></code></pre></div> <p>Then we can use <code>unnest_wider()</code> or <code>hoist()</code>:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>repos <span class="op">%>%</span><span class="st"> </span><span class="kw">hoist</span>(repo, </span> <span id="cb8-2"><a href="#cb8-2"></a> <span class="dt">login =</span> <span class="kw">c</span>(<span class="st">"owner"</span>, <span class="st">"login"</span>), </span> <span id="cb8-3"><a href="#cb8-3"></a> <span class="dt">name =</span> <span class="st">"name"</span>,</span> <span id="cb8-4"><a href="#cb8-4"></a> <span class="dt">homepage =</span> <span class="st">"homepage"</span>,</span> <span id="cb8-5"><a href="#cb8-5"></a> <span class="dt">watchers =</span> <span class="st">"watchers_count"</span></span> <span id="cb8-6"><a href="#cb8-6"></a>)</span> <span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> # A tibble: 176 x 5</span></span> <span id="cb8-8"><a href="#cb8-8"></a><span class="co">#> login name homepage watchers repo </span></span> <span id="cb8-9"><a href="#cb8-9"></a><span class="co">#> <chr> <chr> <chr> <int> <list> </span></span> <span id="cb8-10"><a href="#cb8-10"></a><span class="co">#> 1 gaborcsardi after <NA> 5 <named list [65]></span></span> <span id="cb8-11"><a href="#cb8-11"></a><span class="co">#> 2 gaborcsardi argufy <NA> 19 <named list [65]></span></span> <span id="cb8-12"><a href="#cb8-12"></a><span class="co">#> 3 gaborcsardi ask <NA> 5 <named list [65]></span></span> <span id="cb8-13"><a href="#cb8-13"></a><span class="co">#> 4 gaborcsardi baseimports <NA> 0 <named list [65]></span></span> <span id="cb8-14"><a href="#cb8-14"></a><span class="co">#> 5 gaborcsardi citest <NA> 0 <named list [65]></span></span> <span id="cb8-15"><a href="#cb8-15"></a><span class="co">#> 6 gaborcsardi clisymbols "" 18 <named list [65]></span></span> <span id="cb8-16"><a href="#cb8-16"></a><span class="co">#> # … with 170 more rows</span></span></code></pre></div> <p>Note the use of <code>c("owner", "login")</code>: this allows us to reach two levels deep inside of a list. An alternative approach would be to pull out just <code>owner</code> and then put each element of it in a column:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>repos <span class="op">%>%</span><span class="st"> </span></span> <span id="cb9-2"><a href="#cb9-2"></a><span class="st"> </span><span class="kw">hoist</span>(repo, <span class="dt">owner =</span> <span class="st">"owner"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb9-3"><a href="#cb9-3"></a><span class="st"> </span><span class="kw">unnest_wider</span>(owner)</span> <span id="cb9-4"><a href="#cb9-4"></a><span class="co">#> # A tibble: 176 x 18</span></span> <span id="cb9-5"><a href="#cb9-5"></a><span class="co">#> login id avatar_url gravatar_id url html_url followers_url following_url</span></span> <span id="cb9-6"><a href="#cb9-6"></a><span class="co">#> <chr> <int> <chr> <chr> <chr> <chr> <chr> <chr> </span></span> <span id="cb9-7"><a href="#cb9-7"></a><span class="co">#> 1 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-8"><a href="#cb9-8"></a><span class="co">#> 2 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-9"><a href="#cb9-9"></a><span class="co">#> 3 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-10"><a href="#cb9-10"></a><span class="co">#> 4 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-11"><a href="#cb9-11"></a><span class="co">#> 5 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-12"><a href="#cb9-12"></a><span class="co">#> 6 gabo… 660288 https://a… "" http… https:/… https://api.… https://api.…</span></span> <span id="cb9-13"><a href="#cb9-13"></a><span class="co">#> # … with 170 more rows, and 10 more variables: gists_url <chr>,</span></span> <span id="cb9-14"><a href="#cb9-14"></a><span class="co">#> # starred_url <chr>, subscriptions_url <chr>, organizations_url <chr>,</span></span> <span id="cb9-15"><a href="#cb9-15"></a><span class="co">#> # repos_url <chr>, events_url <chr>, received_events_url <chr>, type <chr>,</span></span> <span id="cb9-16"><a href="#cb9-16"></a><span class="co">#> # site_admin <lgl>, repo <list></span></span></code></pre></div> <p>Instead of looking at the list and carefully thinking about whether it needs to become rows or columns, you can use <code>unnest_auto()</code>. It uses a handful of heuristics to figure out whether <code>unnest_longer()</code> or <code>unnest_wider()</code> is appropriate, and tells you about its reasoning.</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a><span class="kw">tibble</span>(<span class="dt">repo =</span> gh_repos) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb10-2"><a href="#cb10-2"></a><span class="st"> </span><span class="kw">unnest_auto</span>(repo) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb10-3"><a href="#cb10-3"></a><span class="st"> </span><span class="kw">unnest_auto</span>(repo)</span> <span id="cb10-4"><a href="#cb10-4"></a><span class="co">#> Using `unnest_longer(repo)`; no element has names</span></span> <span id="cb10-5"><a href="#cb10-5"></a><span class="co">#> Using `unnest_wider(repo)`; elements have 68 names in common</span></span> <span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> # A tibble: 176 x 67</span></span> <span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> id name full_name owner private html_url description fork url </span></span> <span id="cb10-8"><a href="#cb10-8"></a><span class="co">#> <int> <chr> <chr> <lis> <lgl> <chr> <chr> <lgl> <chr></span></span> <span id="cb10-9"><a href="#cb10-9"></a><span class="co">#> 1 6.12e7 after gaborcsa… <nam… FALSE https:/… Run Code i… FALSE http…</span></span> <span id="cb10-10"><a href="#cb10-10"></a><span class="co">#> 2 4.05e7 argu… gaborcsa… <nam… FALSE https:/… Declarativ… FALSE http…</span></span> <span id="cb10-11"><a href="#cb10-11"></a><span class="co">#> 3 3.64e7 ask gaborcsa… <nam… FALSE https:/… Friendly C… FALSE http…</span></span> <span id="cb10-12"><a href="#cb10-12"></a><span class="co">#> 4 3.49e7 base… gaborcsa… <nam… FALSE https:/… Do we get … FALSE http…</span></span> <span id="cb10-13"><a href="#cb10-13"></a><span class="co">#> 5 6.16e7 cite… gaborcsa… <nam… FALSE https:/… Test R pac… TRUE http…</span></span> <span id="cb10-14"><a href="#cb10-14"></a><span class="co">#> 6 3.39e7 clis… gaborcsa… <nam… FALSE https:/… Unicode sy… FALSE http…</span></span> <span id="cb10-15"><a href="#cb10-15"></a><span class="co">#> # … with 170 more rows, and 58 more variables: forks_url <chr>, keys_url <chr>,</span></span> <span id="cb10-16"><a href="#cb10-16"></a><span class="co">#> # collaborators_url <chr>, teams_url <chr>, hooks_url <chr>,</span></span> <span id="cb10-17"><a href="#cb10-17"></a><span class="co">#> # issue_events_url <chr>, events_url <chr>, assignees_url <chr>,</span></span> <span id="cb10-18"><a href="#cb10-18"></a><span class="co">#> # branches_url <chr>, tags_url <chr>, blobs_url <chr>, git_tags_url <chr>,</span></span> <span id="cb10-19"><a href="#cb10-19"></a><span class="co">#> # git_refs_url <chr>, trees_url <chr>, statuses_url <chr>,</span></span> <span id="cb10-20"><a href="#cb10-20"></a><span class="co">#> # languages_url <chr>, stargazers_url <chr>, contributors_url <chr>,</span></span> <span id="cb10-21"><a href="#cb10-21"></a><span class="co">#> # subscribers_url <chr>, subscription_url <chr>, commits_url <chr>,</span></span> <span id="cb10-22"><a href="#cb10-22"></a><span class="co">#> # git_commits_url <chr>, comments_url <chr>, issue_comment_url <chr>,</span></span> <span id="cb10-23"><a href="#cb10-23"></a><span class="co">#> # contents_url <chr>, compare_url <chr>, merges_url <chr>, archive_url <chr>,</span></span> <span id="cb10-24"><a href="#cb10-24"></a><span class="co">#> # downloads_url <chr>, issues_url <chr>, pulls_url <chr>,</span></span> <span id="cb10-25"><a href="#cb10-25"></a><span class="co">#> # milestones_url <chr>, notifications_url <chr>, labels_url <chr>,</span></span> <span id="cb10-26"><a href="#cb10-26"></a><span class="co">#> # releases_url <chr>, deployments_url <chr>, created_at <chr>,</span></span> <span id="cb10-27"><a href="#cb10-27"></a><span class="co">#> # updated_at <chr>, pushed_at <chr>, git_url <chr>, ssh_url <chr>,</span></span> <span id="cb10-28"><a href="#cb10-28"></a><span class="co">#> # clone_url <chr>, svn_url <chr>, size <int>, stargazers_count <int>,</span></span> <span id="cb10-29"><a href="#cb10-29"></a><span class="co">#> # watchers_count <int>, language <chr>, has_issues <lgl>,</span></span> <span id="cb10-30"><a href="#cb10-30"></a><span class="co">#> # has_downloads <lgl>, has_wiki <lgl>, has_pages <lgl>, forks_count <int>,</span></span> <span id="cb10-31"><a href="#cb10-31"></a><span class="co">#> # open_issues_count <int>, forks <int>, open_issues <int>, watchers <int>,</span></span> <span id="cb10-32"><a href="#cb10-32"></a><span class="co">#> # default_branch <chr>, homepage <chr></span></span></code></pre></div> </div> <div id="game-of-thrones-characters" class="section level2"> <h2>Game of Thrones characters</h2> <p><code>got_chars</code> has a similar structure to <code>gh_users</code>: it’s a list of named lists, where each element of the inner list describes some attribute of a GoT character. We start in the same way, first by creating a data frame and then by unnesting each component into a column:</p> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>chars <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">char =</span> got_chars)</span> <span id="cb11-2"><a href="#cb11-2"></a>chars</span> <span id="cb11-3"><a href="#cb11-3"></a><span class="co">#> # A tibble: 30 x 1</span></span> <span id="cb11-4"><a href="#cb11-4"></a><span class="co">#> char </span></span> <span id="cb11-5"><a href="#cb11-5"></a><span class="co">#> <list> </span></span> <span id="cb11-6"><a href="#cb11-6"></a><span class="co">#> 1 <named list [18]></span></span> <span id="cb11-7"><a href="#cb11-7"></a><span class="co">#> 2 <named list [18]></span></span> <span id="cb11-8"><a href="#cb11-8"></a><span class="co">#> 3 <named list [18]></span></span> <span id="cb11-9"><a href="#cb11-9"></a><span class="co">#> 4 <named list [18]></span></span> <span id="cb11-10"><a href="#cb11-10"></a><span class="co">#> 5 <named list [18]></span></span> <span id="cb11-11"><a href="#cb11-11"></a><span class="co">#> 6 <named list [18]></span></span> <span id="cb11-12"><a href="#cb11-12"></a><span class="co">#> # … with 24 more rows</span></span> <span id="cb11-13"><a href="#cb11-13"></a></span> <span id="cb11-14"><a href="#cb11-14"></a>chars2 <-<span class="st"> </span>chars <span class="op">%>%</span><span class="st"> </span><span class="kw">unnest_wider</span>(char)</span> <span id="cb11-15"><a href="#cb11-15"></a>chars2</span> <span id="cb11-16"><a href="#cb11-16"></a><span class="co">#> # A tibble: 30 x 18</span></span> <span id="cb11-17"><a href="#cb11-17"></a><span class="co">#> url id name gender culture born died alive titles aliases father</span></span> <span id="cb11-18"><a href="#cb11-18"></a><span class="co">#> <chr> <int> <chr> <chr> <chr> <chr> <chr> <lgl> <list> <list> <chr> </span></span> <span id="cb11-19"><a href="#cb11-19"></a><span class="co">#> 1 http… 1022 Theo… Male "Ironb… "In … "" TRUE <chr … <chr [… "" </span></span> <span id="cb11-20"><a href="#cb11-20"></a><span class="co">#> 2 http… 1052 Tyri… Male "" "In … "" TRUE <chr … <chr [… "" </span></span> <span id="cb11-21"><a href="#cb11-21"></a><span class="co">#> 3 http… 1074 Vict… Male "Ironb… "In … "" TRUE <chr … <chr [… "" </span></span> <span id="cb11-22"><a href="#cb11-22"></a><span class="co">#> 4 http… 1109 Will Male "" "" "In … FALSE <chr … <chr [… "" </span></span> <span id="cb11-23"><a href="#cb11-23"></a><span class="co">#> 5 http… 1166 Areo… Male "Norvo… "In … "" TRUE <chr … <chr [… "" </span></span> <span id="cb11-24"><a href="#cb11-24"></a><span class="co">#> 6 http… 1267 Chett Male "" "At … "In … FALSE <chr … <chr [… "" </span></span> <span id="cb11-25"><a href="#cb11-25"></a><span class="co">#> # … with 24 more rows, and 7 more variables: mother <chr>, spouse <chr>,</span></span> <span id="cb11-26"><a href="#cb11-26"></a><span class="co">#> # allegiances <list>, books <list>, povBooks <list>, tvSeries <list>,</span></span> <span id="cb11-27"><a href="#cb11-27"></a><span class="co">#> # playedBy <list></span></span></code></pre></div> <p>This is more complex than <code>gh_users</code> because some component of <code>char</code> are themselves a list, giving us a collection of list-columns:</p> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>chars2 <span class="op">%>%</span><span class="st"> </span><span class="kw">select_if</span>(is.list)</span> <span id="cb12-2"><a href="#cb12-2"></a><span class="co">#> # A tibble: 30 x 7</span></span> <span id="cb12-3"><a href="#cb12-3"></a><span class="co">#> titles aliases allegiances books povBooks tvSeries playedBy </span></span> <span id="cb12-4"><a href="#cb12-4"></a><span class="co">#> <list> <list> <list> <list> <list> <list> <list> </span></span> <span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> 1 <chr [3]> <chr [4]> <chr [1]> <chr [3]> <chr [2]> <chr [6]> <chr [1]></span></span> <span id="cb12-6"><a href="#cb12-6"></a><span class="co">#> 2 <chr [2]> <chr [11]> <chr [1]> <chr [2]> <chr [4]> <chr [6]> <chr [1]></span></span> <span id="cb12-7"><a href="#cb12-7"></a><span class="co">#> 3 <chr [2]> <chr [1]> <chr [1]> <chr [3]> <chr [2]> <chr [1]> <chr [1]></span></span> <span id="cb12-8"><a href="#cb12-8"></a><span class="co">#> 4 <chr [1]> <chr [1]> <NULL> <chr [1]> <chr [1]> <chr [1]> <chr [1]></span></span> <span id="cb12-9"><a href="#cb12-9"></a><span class="co">#> 5 <chr [1]> <chr [1]> <chr [1]> <chr [3]> <chr [2]> <chr [2]> <chr [1]></span></span> <span id="cb12-10"><a href="#cb12-10"></a><span class="co">#> 6 <chr [1]> <chr [1]> <NULL> <chr [2]> <chr [1]> <chr [1]> <chr [1]></span></span> <span id="cb12-11"><a href="#cb12-11"></a><span class="co">#> # … with 24 more rows</span></span></code></pre></div> <p>What you do next will depend on the purposes of the analysis. Maybe you want a row for every book and TV series that the character appears in:</p> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1"></a>chars2 <span class="op">%>%</span><span class="st"> </span></span> <span id="cb13-2"><a href="#cb13-2"></a><span class="st"> </span><span class="kw">select</span>(name, books, tvSeries) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb13-3"><a href="#cb13-3"></a><span class="st"> </span><span class="kw">pivot_longer</span>(<span class="kw">c</span>(books, tvSeries), <span class="dt">names_to =</span> <span class="st">"media"</span>, <span class="dt">values_to =</span> <span class="st">"value"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb13-4"><a href="#cb13-4"></a><span class="st"> </span><span class="kw">unnest_longer</span>(value)</span> <span id="cb13-5"><a href="#cb13-5"></a><span class="co">#> # A tibble: 180 x 3</span></span> <span id="cb13-6"><a href="#cb13-6"></a><span class="co">#> name media value </span></span> <span id="cb13-7"><a href="#cb13-7"></a><span class="co">#> <chr> <chr> <chr> </span></span> <span id="cb13-8"><a href="#cb13-8"></a><span class="co">#> 1 Theon Greyjoy books A Game of Thrones</span></span> <span id="cb13-9"><a href="#cb13-9"></a><span class="co">#> 2 Theon Greyjoy books A Storm of Swords</span></span> <span id="cb13-10"><a href="#cb13-10"></a><span class="co">#> 3 Theon Greyjoy books A Feast for Crows</span></span> <span id="cb13-11"><a href="#cb13-11"></a><span class="co">#> 4 Theon Greyjoy tvSeries Season 1 </span></span> <span id="cb13-12"><a href="#cb13-12"></a><span class="co">#> 5 Theon Greyjoy tvSeries Season 2 </span></span> <span id="cb13-13"><a href="#cb13-13"></a><span class="co">#> 6 Theon Greyjoy tvSeries Season 3 </span></span> <span id="cb13-14"><a href="#cb13-14"></a><span class="co">#> # … with 174 more rows</span></span></code></pre></div> <p>Or maybe you want to build a table that lets you match title to name:</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>chars2 <span class="op">%>%</span><span class="st"> </span></span> <span id="cb14-2"><a href="#cb14-2"></a><span class="st"> </span><span class="kw">select</span>(name, <span class="dt">title =</span> titles) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb14-3"><a href="#cb14-3"></a><span class="st"> </span><span class="kw">unnest_longer</span>(title)</span> <span id="cb14-4"><a href="#cb14-4"></a><span class="co">#> # A tibble: 60 x 2</span></span> <span id="cb14-5"><a href="#cb14-5"></a><span class="co">#> name title </span></span> <span id="cb14-6"><a href="#cb14-6"></a><span class="co">#> <chr> <chr> </span></span> <span id="cb14-7"><a href="#cb14-7"></a><span class="co">#> 1 Theon Greyjoy Prince of Winterfell </span></span> <span id="cb14-8"><a href="#cb14-8"></a><span class="co">#> 2 Theon Greyjoy Captain of Sea Bitch </span></span> <span id="cb14-9"><a href="#cb14-9"></a><span class="co">#> 3 Theon Greyjoy Lord of the Iron Islands (by law of the green lands)</span></span> <span id="cb14-10"><a href="#cb14-10"></a><span class="co">#> 4 Tyrion Lannister Acting Hand of the King (former) </span></span> <span id="cb14-11"><a href="#cb14-11"></a><span class="co">#> 5 Tyrion Lannister Master of Coin (former) </span></span> <span id="cb14-12"><a href="#cb14-12"></a><span class="co">#> 6 Victarion Greyjoy Lord Captain of the Iron Fleet </span></span> <span id="cb14-13"><a href="#cb14-13"></a><span class="co">#> # … with 54 more rows</span></span></code></pre></div> <p>(Note that the empty titles (<code>""</code>) are due to an infelicity in the input <code>got_chars</code>: ideally people without titles would have a title vector of length 0, not a title vector of length 1 containing an empty string.)</p> <p>Again, we could rewrite using <code>unnest_auto()</code>. This is convenient for exploration, but I wouldn’t rely on it in the long term - <code>unnest_auto()</code> has the undesirable property that it will always succeed. That means if your data structure changes, <code>unnest_auto()</code> will continue to work, but might give very different output that causes cryptic failures from downstream functions.</p> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a><span class="kw">tibble</span>(<span class="dt">char =</span> got_chars) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-2"><a href="#cb15-2"></a><span class="st"> </span><span class="kw">unnest_auto</span>(char) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-3"><a href="#cb15-3"></a><span class="st"> </span><span class="kw">select</span>(name, <span class="dt">title =</span> titles) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-4"><a href="#cb15-4"></a><span class="st"> </span><span class="kw">unnest_auto</span>(title)</span> <span id="cb15-5"><a href="#cb15-5"></a><span class="co">#> Using `unnest_wider(char)`; elements have 18 names in common</span></span> <span id="cb15-6"><a href="#cb15-6"></a><span class="co">#> Using `unnest_longer(title)`; no element has names</span></span> <span id="cb15-7"><a href="#cb15-7"></a><span class="co">#> # A tibble: 60 x 2</span></span> <span id="cb15-8"><a href="#cb15-8"></a><span class="co">#> name title </span></span> <span id="cb15-9"><a href="#cb15-9"></a><span class="co">#> <chr> <chr> </span></span> <span id="cb15-10"><a href="#cb15-10"></a><span class="co">#> 1 Theon Greyjoy Prince of Winterfell </span></span> <span id="cb15-11"><a href="#cb15-11"></a><span class="co">#> 2 Theon Greyjoy Captain of Sea Bitch </span></span> <span id="cb15-12"><a href="#cb15-12"></a><span class="co">#> 3 Theon Greyjoy Lord of the Iron Islands (by law of the green lands)</span></span> <span id="cb15-13"><a href="#cb15-13"></a><span class="co">#> 4 Tyrion Lannister Acting Hand of the King (former) </span></span> <span id="cb15-14"><a href="#cb15-14"></a><span class="co">#> 5 Tyrion Lannister Master of Coin (former) </span></span> <span id="cb15-15"><a href="#cb15-15"></a><span class="co">#> 6 Victarion Greyjoy Lord Captain of the Iron Fleet </span></span> <span id="cb15-16"><a href="#cb15-16"></a><span class="co">#> # … with 54 more rows</span></span></code></pre></div> </div> <div id="geocoding-with-google" class="section level2"> <h2>Geocoding with google</h2> <p>Next we’ll tackle a more complex form of data that comes from Google’s geocoding service. It’s against the terms of service to cache this data, so I first write a very simple wrapper around the API. This relies on having an Google maps API key stored in an environment; if that’s not available these code chunks won’t be run.</p> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>has_key <-<span class="st"> </span><span class="op">!</span><span class="kw">identical</span>(<span class="kw">Sys.getenv</span>(<span class="st">"GOOGLE_MAPS_API_KEY"</span>), <span class="st">""</span>)</span> <span id="cb16-2"><a href="#cb16-2"></a><span class="cf">if</span> (<span class="op">!</span>has_key) {</span> <span id="cb16-3"><a href="#cb16-3"></a> <span class="kw">message</span>(<span class="st">"No Google Maps API key found; code chunks will not be run"</span>)</span> <span id="cb16-4"><a href="#cb16-4"></a>}</span> <span id="cb16-5"><a href="#cb16-5"></a></span> <span id="cb16-6"><a href="#cb16-6"></a><span class="co"># https://developers.google.com/maps/documentation/geocoding</span></span> <span id="cb16-7"><a href="#cb16-7"></a>geocode <-<span class="st"> </span><span class="cf">function</span>(address, <span class="dt">api_key =</span> <span class="kw">Sys.getenv</span>(<span class="st">"GOOGLE_MAPS_API_KEY"</span>)) {</span> <span id="cb16-8"><a href="#cb16-8"></a> url <-<span class="st"> "https://maps.googleapis.com/maps/api/geocode/json"</span></span> <span id="cb16-9"><a href="#cb16-9"></a> url <-<span class="st"> </span><span class="kw">paste0</span>(url, <span class="st">"?address="</span>, <span class="kw">URLencode</span>(address), <span class="st">"&key="</span>, api_key)</span> <span id="cb16-10"><a href="#cb16-10"></a></span> <span id="cb16-11"><a href="#cb16-11"></a> jsonlite<span class="op">::</span><span class="kw">read_json</span>(url)</span> <span id="cb16-12"><a href="#cb16-12"></a>}</span></code></pre></div> <p>The list that this function returns is quite complex:</p> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>houston <-<span class="st"> </span><span class="kw">geocode</span>(<span class="st">"Houston TX"</span>)</span> <span id="cb17-2"><a href="#cb17-2"></a><span class="kw">str</span>(houston)</span> <span id="cb17-3"><a href="#cb17-3"></a><span class="co">#> List of 2</span></span> <span id="cb17-4"><a href="#cb17-4"></a><span class="co">#> $ results:List of 1</span></span> <span id="cb17-5"><a href="#cb17-5"></a><span class="co">#> ..$ :List of 5</span></span> <span id="cb17-6"><a href="#cb17-6"></a><span class="co">#> .. ..$ address_components:List of 4</span></span> <span id="cb17-7"><a href="#cb17-7"></a><span class="co">#> .. .. ..$ :List of 3</span></span> <span id="cb17-8"><a href="#cb17-8"></a><span class="co">#> .. .. .. ..$ long_name : chr "Houston"</span></span> <span id="cb17-9"><a href="#cb17-9"></a><span class="co">#> .. .. .. ..$ short_name: chr "Houston"</span></span> <span id="cb17-10"><a href="#cb17-10"></a><span class="co">#> .. .. .. ..$ types :List of 2</span></span> <span id="cb17-11"><a href="#cb17-11"></a><span class="co">#> .. .. .. .. ..$ : chr "locality"</span></span> <span id="cb17-12"><a href="#cb17-12"></a><span class="co">#> .. .. .. .. ..$ : chr "political"</span></span> <span id="cb17-13"><a href="#cb17-13"></a><span class="co">#> .. .. ..$ :List of 3</span></span> <span id="cb17-14"><a href="#cb17-14"></a><span class="co">#> .. .. .. ..$ long_name : chr "Harris County"</span></span> <span id="cb17-15"><a href="#cb17-15"></a><span class="co">#> .. .. .. ..$ short_name: chr "Harris County"</span></span> <span id="cb17-16"><a href="#cb17-16"></a><span class="co">#> .. .. .. ..$ types :List of 2</span></span> <span id="cb17-17"><a href="#cb17-17"></a><span class="co">#> .. .. .. .. ..$ : chr "administrative_area_level_2"</span></span> <span id="cb17-18"><a href="#cb17-18"></a><span class="co">#> .. .. .. .. ..$ : chr "political"</span></span> <span id="cb17-19"><a href="#cb17-19"></a><span class="co">#> .. .. ..$ :List of 3</span></span> <span id="cb17-20"><a href="#cb17-20"></a><span class="co">#> .. .. .. ..$ long_name : chr "Texas"</span></span> <span id="cb17-21"><a href="#cb17-21"></a><span class="co">#> .. .. .. ..$ short_name: chr "TX"</span></span> <span id="cb17-22"><a href="#cb17-22"></a><span class="co">#> .. .. .. ..$ types :List of 2</span></span> <span id="cb17-23"><a href="#cb17-23"></a><span class="co">#> .. .. .. .. ..$ : chr "administrative_area_level_1"</span></span> <span id="cb17-24"><a href="#cb17-24"></a><span class="co">#> .. .. .. .. ..$ : chr "political"</span></span> <span id="cb17-25"><a href="#cb17-25"></a><span class="co">#> .. .. ..$ :List of 3</span></span> <span id="cb17-26"><a href="#cb17-26"></a><span class="co">#> .. .. .. ..$ long_name : chr "United States"</span></span> <span id="cb17-27"><a href="#cb17-27"></a><span class="co">#> .. .. .. ..$ short_name: chr "US"</span></span> <span id="cb17-28"><a href="#cb17-28"></a><span class="co">#> .. .. .. ..$ types :List of 2</span></span> <span id="cb17-29"><a href="#cb17-29"></a><span class="co">#> .. .. .. .. ..$ : chr "country"</span></span> <span id="cb17-30"><a href="#cb17-30"></a><span class="co">#> .. .. .. .. ..$ : chr "political"</span></span> <span id="cb17-31"><a href="#cb17-31"></a><span class="co">#> .. ..$ formatted_address : chr "Houston, TX, USA"</span></span> <span id="cb17-32"><a href="#cb17-32"></a><span class="co">#> .. ..$ geometry :List of 4</span></span> <span id="cb17-33"><a href="#cb17-33"></a><span class="co">#> .. .. ..$ bounds :List of 2</span></span> <span id="cb17-34"><a href="#cb17-34"></a><span class="co">#> .. .. .. ..$ northeast:List of 2</span></span> <span id="cb17-35"><a href="#cb17-35"></a><span class="co">#> .. .. .. .. ..$ lat: num 30.1</span></span> <span id="cb17-36"><a href="#cb17-36"></a><span class="co">#> .. .. .. .. ..$ lng: num -95</span></span> <span id="cb17-37"><a href="#cb17-37"></a><span class="co">#> .. .. .. ..$ southwest:List of 2</span></span> <span id="cb17-38"><a href="#cb17-38"></a><span class="co">#> .. .. .. .. ..$ lat: num 29.5</span></span> <span id="cb17-39"><a href="#cb17-39"></a><span class="co">#> .. .. .. .. ..$ lng: num -95.8</span></span> <span id="cb17-40"><a href="#cb17-40"></a><span class="co">#> .. .. ..$ location :List of 2</span></span> <span id="cb17-41"><a href="#cb17-41"></a><span class="co">#> .. .. .. ..$ lat: num 29.8</span></span> <span id="cb17-42"><a href="#cb17-42"></a><span class="co">#> .. .. .. ..$ lng: num -95.4</span></span> <span id="cb17-43"><a href="#cb17-43"></a><span class="co">#> .. .. ..$ location_type: chr "APPROXIMATE"</span></span> <span id="cb17-44"><a href="#cb17-44"></a><span class="co">#> .. .. ..$ viewport :List of 2</span></span> <span id="cb17-45"><a href="#cb17-45"></a><span class="co">#> .. .. .. ..$ northeast:List of 2</span></span> <span id="cb17-46"><a href="#cb17-46"></a><span class="co">#> .. .. .. .. ..$ lat: num 30.1</span></span> <span id="cb17-47"><a href="#cb17-47"></a><span class="co">#> .. .. .. .. ..$ lng: num -95</span></span> <span id="cb17-48"><a href="#cb17-48"></a><span class="co">#> .. .. .. ..$ southwest:List of 2</span></span> <span id="cb17-49"><a href="#cb17-49"></a><span class="co">#> .. .. .. .. ..$ lat: num 29.5</span></span> <span id="cb17-50"><a href="#cb17-50"></a><span class="co">#> .. .. .. .. ..$ lng: num -95.8</span></span> <span id="cb17-51"><a href="#cb17-51"></a><span class="co">#> .. ..$ place_id : chr "ChIJAYWNSLS4QIYROwVl894CDco"</span></span> <span id="cb17-52"><a href="#cb17-52"></a><span class="co">#> .. ..$ types :List of 2</span></span> <span id="cb17-53"><a href="#cb17-53"></a><span class="co">#> .. .. ..$ : chr "locality"</span></span> <span id="cb17-54"><a href="#cb17-54"></a><span class="co">#> .. .. ..$ : chr "political"</span></span> <span id="cb17-55"><a href="#cb17-55"></a><span class="co">#> $ status : chr "OK"</span></span></code></pre></div> <p>Fortunately, we can attack the problem step by step with tidyr functions. To make the problem a bit harder (!) and more realistic, I’ll start by geocoding a few cities:</p> <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a>city <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Houston"</span>, <span class="st">"LA"</span>, <span class="st">"New York"</span>, <span class="st">"Chicago"</span>, <span class="st">"Springfield"</span>)</span> <span id="cb18-2"><a href="#cb18-2"></a>city_geo <-<span class="st"> </span>purrr<span class="op">::</span><span class="kw">map</span>(city, geocode)</span></code></pre></div> <p>I’ll put these results in a tibble, next to the original city name:</p> <div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a>loc <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">city =</span> city, <span class="dt">json =</span> city_geo)</span> <span id="cb19-2"><a href="#cb19-2"></a>loc</span> <span id="cb19-3"><a href="#cb19-3"></a><span class="co">#> # A tibble: 5 x 2</span></span> <span id="cb19-4"><a href="#cb19-4"></a><span class="co">#> city json </span></span> <span id="cb19-5"><a href="#cb19-5"></a><span class="co">#> <chr> <list> </span></span> <span id="cb19-6"><a href="#cb19-6"></a><span class="co">#> 1 Houston <named list [2]></span></span> <span id="cb19-7"><a href="#cb19-7"></a><span class="co">#> 2 LA <named list [2]></span></span> <span id="cb19-8"><a href="#cb19-8"></a><span class="co">#> 3 New York <named list [2]></span></span> <span id="cb19-9"><a href="#cb19-9"></a><span class="co">#> 4 Chicago <named list [2]></span></span> <span id="cb19-10"><a href="#cb19-10"></a><span class="co">#> 5 Springfield <named list [2]></span></span></code></pre></div> <p>The first level contains components <code>status</code> and <code>result</code>, which we can reveal with <code>unnest_wider()</code>:</p> <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a>loc <span class="op">%>%</span></span> <span id="cb20-2"><a href="#cb20-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json)</span> <span id="cb20-3"><a href="#cb20-3"></a><span class="co">#> # A tibble: 5 x 3</span></span> <span id="cb20-4"><a href="#cb20-4"></a><span class="co">#> city results status</span></span> <span id="cb20-5"><a href="#cb20-5"></a><span class="co">#> <chr> <list> <chr> </span></span> <span id="cb20-6"><a href="#cb20-6"></a><span class="co">#> 1 Houston <list [1]> OK </span></span> <span id="cb20-7"><a href="#cb20-7"></a><span class="co">#> 2 LA <list [1]> OK </span></span> <span id="cb20-8"><a href="#cb20-8"></a><span class="co">#> 3 New York <list [1]> OK </span></span> <span id="cb20-9"><a href="#cb20-9"></a><span class="co">#> 4 Chicago <list [1]> OK </span></span> <span id="cb20-10"><a href="#cb20-10"></a><span class="co">#> 5 Springfield <list [1]> OK</span></span></code></pre></div> <p>Notice that <code>results</code> is a list of lists. Most of the cities have 1 element (representing a unique match from the geocoding API), but Springfield has two. We can pull these out into separate rows with <code>unnest_longer()</code>:</p> <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>loc <span class="op">%>%</span></span> <span id="cb21-2"><a href="#cb21-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb21-3"><a href="#cb21-3"></a><span class="st"> </span><span class="kw">unnest_longer</span>(results)</span> <span id="cb21-4"><a href="#cb21-4"></a><span class="co">#> # A tibble: 5 x 3</span></span> <span id="cb21-5"><a href="#cb21-5"></a><span class="co">#> city results status</span></span> <span id="cb21-6"><a href="#cb21-6"></a><span class="co">#> <chr> <list> <chr> </span></span> <span id="cb21-7"><a href="#cb21-7"></a><span class="co">#> 1 Houston <named list [5]> OK </span></span> <span id="cb21-8"><a href="#cb21-8"></a><span class="co">#> 2 LA <named list [5]> OK </span></span> <span id="cb21-9"><a href="#cb21-9"></a><span class="co">#> 3 New York <named list [5]> OK </span></span> <span id="cb21-10"><a href="#cb21-10"></a><span class="co">#> 4 Chicago <named list [5]> OK </span></span> <span id="cb21-11"><a href="#cb21-11"></a><span class="co">#> 5 Springfield <named list [5]> OK</span></span></code></pre></div> <p>Now these all have the same components, as revealed by <code>unnest_wider()</code>:</p> <div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1"></a>loc <span class="op">%>%</span></span> <span id="cb22-2"><a href="#cb22-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb22-3"><a href="#cb22-3"></a><span class="st"> </span><span class="kw">unnest_longer</span>(results) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb22-4"><a href="#cb22-4"></a><span class="st"> </span><span class="kw">unnest_wider</span>(results)</span> <span id="cb22-5"><a href="#cb22-5"></a><span class="co">#> # A tibble: 5 x 7</span></span> <span id="cb22-6"><a href="#cb22-6"></a><span class="co">#> city address_componen… formatted_address geometry place_id types status</span></span> <span id="cb22-7"><a href="#cb22-7"></a><span class="co">#> <chr> <list> <chr> <list> <chr> <lis> <chr> </span></span> <span id="cb22-8"><a href="#cb22-8"></a><span class="co">#> 1 Houston <list [4]> Houston, TX, USA <named l… ChIJAYWNSL… <lis… OK </span></span> <span id="cb22-9"><a href="#cb22-9"></a><span class="co">#> 2 LA <list [4]> Los Angeles, CA,… <named l… ChIJE9on3F… <lis… OK </span></span> <span id="cb22-10"><a href="#cb22-10"></a><span class="co">#> 3 New Yo… <list [3]> New York, NY, USA <named l… ChIJOwg_06… <lis… OK </span></span> <span id="cb22-11"><a href="#cb22-11"></a><span class="co">#> 4 Chicago <list [4]> Chicago, IL, USA <named l… ChIJ7cv00D… <lis… OK </span></span> <span id="cb22-12"><a href="#cb22-12"></a><span class="co">#> 5 Spring… <list [5]> Springfield, MO,… <named l… ChIJP5jIRf… <lis… OK</span></span></code></pre></div> <p>We can find the lat and lon coordinates by unnesting <code>geometry</code>:</p> <div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>loc <span class="op">%>%</span></span> <span id="cb23-2"><a href="#cb23-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb23-3"><a href="#cb23-3"></a><span class="st"> </span><span class="kw">unnest_longer</span>(results) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb23-4"><a href="#cb23-4"></a><span class="st"> </span><span class="kw">unnest_wider</span>(results) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb23-5"><a href="#cb23-5"></a><span class="st"> </span><span class="kw">unnest_wider</span>(geometry)</span> <span id="cb23-6"><a href="#cb23-6"></a><span class="co">#> # A tibble: 5 x 10</span></span> <span id="cb23-7"><a href="#cb23-7"></a><span class="co">#> city address_compone… formatted_addre… bounds location location_type viewport</span></span> <span id="cb23-8"><a href="#cb23-8"></a><span class="co">#> <chr> <list> <chr> <list> <list> <chr> <list> </span></span> <span id="cb23-9"><a href="#cb23-9"></a><span class="co">#> 1 Hous… <list [4]> Houston, TX, USA <name… <named … APPROXIMATE <named …</span></span> <span id="cb23-10"><a href="#cb23-10"></a><span class="co">#> 2 LA <list [4]> Los Angeles, CA… <name… <named … APPROXIMATE <named …</span></span> <span id="cb23-11"><a href="#cb23-11"></a><span class="co">#> 3 New … <list [3]> New York, NY, U… <name… <named … APPROXIMATE <named …</span></span> <span id="cb23-12"><a href="#cb23-12"></a><span class="co">#> 4 Chic… <list [4]> Chicago, IL, USA <name… <named … APPROXIMATE <named …</span></span> <span id="cb23-13"><a href="#cb23-13"></a><span class="co">#> 5 Spri… <list [5]> Springfield, MO… <name… <named … APPROXIMATE <named …</span></span> <span id="cb23-14"><a href="#cb23-14"></a><span class="co">#> # … with 3 more variables: place_id <chr>, types <list>, status <chr></span></span></code></pre></div> <p>And then location:</p> <div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a>loc <span class="op">%>%</span></span> <span id="cb24-2"><a href="#cb24-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json) <span class="op">%>%</span></span> <span id="cb24-3"><a href="#cb24-3"></a><span class="st"> </span><span class="kw">unnest_longer</span>(results) <span class="op">%>%</span></span> <span id="cb24-4"><a href="#cb24-4"></a><span class="st"> </span><span class="kw">unnest_wider</span>(results) <span class="op">%>%</span></span> <span id="cb24-5"><a href="#cb24-5"></a><span class="st"> </span><span class="kw">unnest_wider</span>(geometry) <span class="op">%>%</span></span> <span id="cb24-6"><a href="#cb24-6"></a><span class="st"> </span><span class="kw">unnest_wider</span>(location)</span> <span id="cb24-7"><a href="#cb24-7"></a><span class="co">#> # A tibble: 5 x 11</span></span> <span id="cb24-8"><a href="#cb24-8"></a><span class="co">#> city address_compone… formatted_addre… bounds lat lng location_type</span></span> <span id="cb24-9"><a href="#cb24-9"></a><span class="co">#> <chr> <list> <chr> <list> <dbl> <dbl> <chr> </span></span> <span id="cb24-10"><a href="#cb24-10"></a><span class="co">#> 1 Hous… <list [4]> Houston, TX, USA <name… 29.8 -95.4 APPROXIMATE </span></span> <span id="cb24-11"><a href="#cb24-11"></a><span class="co">#> 2 LA <list [4]> Los Angeles, CA… <name… 34.1 -118. APPROXIMATE </span></span> <span id="cb24-12"><a href="#cb24-12"></a><span class="co">#> 3 New … <list [3]> New York, NY, U… <name… 40.7 -74.0 APPROXIMATE </span></span> <span id="cb24-13"><a href="#cb24-13"></a><span class="co">#> 4 Chic… <list [4]> Chicago, IL, USA <name… 41.9 -87.6 APPROXIMATE </span></span> <span id="cb24-14"><a href="#cb24-14"></a><span class="co">#> 5 Spri… <list [5]> Springfield, MO… <name… 37.2 -93.3 APPROXIMATE </span></span> <span id="cb24-15"><a href="#cb24-15"></a><span class="co">#> # … with 4 more variables: viewport <list>, place_id <chr>, types <list>,</span></span> <span id="cb24-16"><a href="#cb24-16"></a><span class="co">#> # status <chr></span></span></code></pre></div> <p>Again, <code>unnest_auto()</code> makes this simpler with the small risk of failing in unexpected ways if the input structure changes:</p> <div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>loc <span class="op">%>%</span></span> <span id="cb25-2"><a href="#cb25-2"></a><span class="st"> </span><span class="kw">unnest_auto</span>(json) <span class="op">%>%</span></span> <span id="cb25-3"><a href="#cb25-3"></a><span class="st"> </span><span class="kw">unnest_auto</span>(results) <span class="op">%>%</span></span> <span id="cb25-4"><a href="#cb25-4"></a><span class="st"> </span><span class="kw">unnest_auto</span>(results) <span class="op">%>%</span></span> <span id="cb25-5"><a href="#cb25-5"></a><span class="st"> </span><span class="kw">unnest_auto</span>(geometry) <span class="op">%>%</span></span> <span id="cb25-6"><a href="#cb25-6"></a><span class="st"> </span><span class="kw">unnest_auto</span>(location)</span> <span id="cb25-7"><a href="#cb25-7"></a><span class="co">#> Using `unnest_wider(json)`; elements have 2 names in common</span></span> <span id="cb25-8"><a href="#cb25-8"></a><span class="co">#> Using `unnest_longer(results)`; no element has names</span></span> <span id="cb25-9"><a href="#cb25-9"></a><span class="co">#> Using `unnest_wider(results)`; elements have 5 names in common</span></span> <span id="cb25-10"><a href="#cb25-10"></a><span class="co">#> Using `unnest_wider(geometry)`; elements have 4 names in common</span></span> <span id="cb25-11"><a href="#cb25-11"></a><span class="co">#> Using `unnest_wider(location)`; elements have 2 names in common</span></span> <span id="cb25-12"><a href="#cb25-12"></a><span class="co">#> # A tibble: 5 x 11</span></span> <span id="cb25-13"><a href="#cb25-13"></a><span class="co">#> city address_compone… formatted_addre… bounds lat lng location_type</span></span> <span id="cb25-14"><a href="#cb25-14"></a><span class="co">#> <chr> <list> <chr> <list> <dbl> <dbl> <chr> </span></span> <span id="cb25-15"><a href="#cb25-15"></a><span class="co">#> 1 Hous… <list [4]> Houston, TX, USA <name… 29.8 -95.4 APPROXIMATE </span></span> <span id="cb25-16"><a href="#cb25-16"></a><span class="co">#> 2 LA <list [4]> Los Angeles, CA… <name… 34.1 -118. APPROXIMATE </span></span> <span id="cb25-17"><a href="#cb25-17"></a><span class="co">#> 3 New … <list [3]> New York, NY, U… <name… 40.7 -74.0 APPROXIMATE </span></span> <span id="cb25-18"><a href="#cb25-18"></a><span class="co">#> 4 Chic… <list [4]> Chicago, IL, USA <name… 41.9 -87.6 APPROXIMATE </span></span> <span id="cb25-19"><a href="#cb25-19"></a><span class="co">#> 5 Spri… <list [5]> Springfield, MO… <name… 37.2 -93.3 APPROXIMATE </span></span> <span id="cb25-20"><a href="#cb25-20"></a><span class="co">#> # … with 4 more variables: viewport <list>, place_id <chr>, types <list>,</span></span> <span id="cb25-21"><a href="#cb25-21"></a><span class="co">#> # status <chr></span></span></code></pre></div> <p>We could also just look at the first address for each city:</p> <div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>loc <span class="op">%>%</span></span> <span id="cb26-2"><a href="#cb26-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(json) <span class="op">%>%</span></span> <span id="cb26-3"><a href="#cb26-3"></a><span class="st"> </span><span class="kw">hoist</span>(results, <span class="dt">first_result =</span> <span class="dv">1</span>) <span class="op">%>%</span></span> <span id="cb26-4"><a href="#cb26-4"></a><span class="st"> </span><span class="kw">unnest_wider</span>(first_result) <span class="op">%>%</span></span> <span id="cb26-5"><a href="#cb26-5"></a><span class="st"> </span><span class="kw">unnest_wider</span>(geometry) <span class="op">%>%</span></span> <span id="cb26-6"><a href="#cb26-6"></a><span class="st"> </span><span class="kw">unnest_wider</span>(location)</span> <span id="cb26-7"><a href="#cb26-7"></a><span class="co">#> # A tibble: 5 x 11</span></span> <span id="cb26-8"><a href="#cb26-8"></a><span class="co">#> city address_compone… formatted_addre… bounds lat lng location_type</span></span> <span id="cb26-9"><a href="#cb26-9"></a><span class="co">#> <chr> <list> <chr> <list> <dbl> <dbl> <chr> </span></span> <span id="cb26-10"><a href="#cb26-10"></a><span class="co">#> 1 Hous… <list [4]> Houston, TX, USA <name… 29.8 -95.4 APPROXIMATE </span></span> <span id="cb26-11"><a href="#cb26-11"></a><span class="co">#> 2 LA <list [4]> Los Angeles, CA… <name… 34.1 -118. APPROXIMATE </span></span> <span id="cb26-12"><a href="#cb26-12"></a><span class="co">#> 3 New … <list [3]> New York, NY, U… <name… 40.7 -74.0 APPROXIMATE </span></span> <span id="cb26-13"><a href="#cb26-13"></a><span class="co">#> 4 Chic… <list [4]> Chicago, IL, USA <name… 41.9 -87.6 APPROXIMATE </span></span> <span id="cb26-14"><a href="#cb26-14"></a><span class="co">#> 5 Spri… <list [5]> Springfield, MO… <name… 37.2 -93.3 APPROXIMATE </span></span> <span id="cb26-15"><a href="#cb26-15"></a><span class="co">#> # … with 4 more variables: viewport <list>, place_id <chr>, types <list>,</span></span> <span id="cb26-16"><a href="#cb26-16"></a><span class="co">#> # status <chr></span></span></code></pre></div> <p>Or use <code>hoist()</code> to dive deeply to get directly to <code>lat</code> and <code>lng</code>:</p> <div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1"></a>loc <span class="op">%>%</span></span> <span id="cb27-2"><a href="#cb27-2"></a><span class="st"> </span><span class="kw">hoist</span>(json,</span> <span id="cb27-3"><a href="#cb27-3"></a> <span class="dt">lat =</span> <span class="kw">list</span>(<span class="st">"results"</span>, <span class="dv">1</span>, <span class="st">"geometry"</span>, <span class="st">"location"</span>, <span class="st">"lat"</span>),</span> <span id="cb27-4"><a href="#cb27-4"></a> <span class="dt">lng =</span> <span class="kw">list</span>(<span class="st">"results"</span>, <span class="dv">1</span>, <span class="st">"geometry"</span>, <span class="st">"location"</span>, <span class="st">"lng"</span>)</span> <span id="cb27-5"><a href="#cb27-5"></a> )</span> <span id="cb27-6"><a href="#cb27-6"></a><span class="co">#> # A tibble: 5 x 4</span></span> <span id="cb27-7"><a href="#cb27-7"></a><span class="co">#> city lat lng json </span></span> <span id="cb27-8"><a href="#cb27-8"></a><span class="co">#> <chr> <dbl> <dbl> <list> </span></span> <span id="cb27-9"><a href="#cb27-9"></a><span class="co">#> 1 Houston 29.8 -95.4 <named list [2]></span></span> <span id="cb27-10"><a href="#cb27-10"></a><span class="co">#> 2 LA 34.1 -118. <named list [2]></span></span> <span id="cb27-11"><a href="#cb27-11"></a><span class="co">#> 3 New York 40.7 -74.0 <named list [2]></span></span> <span id="cb27-12"><a href="#cb27-12"></a><span class="co">#> 4 Chicago 41.9 -87.6 <named list [2]></span></span> <span id="cb27-13"><a href="#cb27-13"></a><span class="co">#> 5 Springfield 37.2 -93.3 <named list [2]></span></span></code></pre></div> </div> <div id="sharla-gelfands-discography" class="section level2"> <h2>Sharla Gelfand’s discography</h2> <p>We’ll finish off with the most complex list, from <a href="https://sharla.party/post/discog-purrr/">Sharla Gelfand’s</a> discography. We’ll start the usual way: putting the list into a single column data frame, and then widening so each component is a column. I also parse the <code>date_added</code> column into a real date-time<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>.</p> <div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1"></a>discs <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">disc =</span> discog) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb28-2"><a href="#cb28-2"></a><span class="st"> </span><span class="kw">unnest_wider</span>(disc) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb28-3"><a href="#cb28-3"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">date_added =</span> <span class="kw">as.POSIXct</span>(<span class="kw">strptime</span>(date_added, <span class="st">"%Y-%m-%dT%H:%M:%S"</span>))) </span> <span id="cb28-4"><a href="#cb28-4"></a>discs</span> <span id="cb28-5"><a href="#cb28-5"></a><span class="co">#> # A tibble: 155 x 5</span></span> <span id="cb28-6"><a href="#cb28-6"></a><span class="co">#> instance_id date_added basic_information id rating</span></span> <span id="cb28-7"><a href="#cb28-7"></a><span class="co">#> <int> <dttm> <list> <int> <int></span></span> <span id="cb28-8"><a href="#cb28-8"></a><span class="co">#> 1 354823933 2019-02-16 17:48:59 <named list [11]> 7496378 0</span></span> <span id="cb28-9"><a href="#cb28-9"></a><span class="co">#> 2 354092601 2019-02-13 14:13:11 <named list [11]> 4490852 0</span></span> <span id="cb28-10"><a href="#cb28-10"></a><span class="co">#> 3 354091476 2019-02-13 14:07:23 <named list [11]> 9827276 0</span></span> <span id="cb28-11"><a href="#cb28-11"></a><span class="co">#> 4 351244906 2019-02-02 11:39:58 <named list [11]> 9769203 0</span></span> <span id="cb28-12"><a href="#cb28-12"></a><span class="co">#> 5 351244801 2019-02-02 11:39:37 <named list [11]> 7237138 0</span></span> <span id="cb28-13"><a href="#cb28-13"></a><span class="co">#> 6 351052065 2019-02-01 20:40:53 <named list [11]> 13117042 0</span></span> <span id="cb28-14"><a href="#cb28-14"></a><span class="co">#> # … with 149 more rows</span></span></code></pre></div> <p>At this level, we see information about when each disc was added to Sharla’s discography, not any information about the disc itself. To do that we need to widen the <code>basic_information</code> column:</p> <div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>discs <span class="op">%>%</span><span class="st"> </span><span class="kw">unnest_wider</span>(basic_information)</span> <span id="cb29-2"><a href="#cb29-2"></a><span class="co">#> Error: Names must be unique.</span></span> <span id="cb29-3"><a href="#cb29-3"></a><span class="co">#> [31mx[39m These names are duplicated:</span></span> <span id="cb29-4"><a href="#cb29-4"></a><span class="co">#> * "id" at locations 6 and 14.</span></span> <span id="cb29-5"><a href="#cb29-5"></a><span class="co">#> [34mℹ[39m Use argument `names_repair` to specify repair strategy.</span></span></code></pre></div> <p>Unfortunately that fails because there’s an <code>id</code> column inside <code>basic_information</code>. We can quickly see what’s going on by setting <code>names_repair = "unique"</code>:</p> <div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1"></a>discs <span class="op">%>%</span><span class="st"> </span><span class="kw">unnest_wider</span>(basic_information, <span class="dt">names_repair =</span> <span class="st">"unique"</span>)</span> <span id="cb30-2"><a href="#cb30-2"></a><span class="co">#> New names:</span></span> <span id="cb30-3"><a href="#cb30-3"></a><span class="co">#> * id -> id...6</span></span> <span id="cb30-4"><a href="#cb30-4"></a><span class="co">#> * id -> id...14</span></span> <span id="cb30-5"><a href="#cb30-5"></a><span class="co">#> # A tibble: 155 x 15</span></span> <span id="cb30-6"><a href="#cb30-6"></a><span class="co">#> instance_id date_added labels year artists id...6 thumb title</span></span> <span id="cb30-7"><a href="#cb30-7"></a><span class="co">#> <int> <dttm> <list> <int> <list> <int> <chr> <chr></span></span> <span id="cb30-8"><a href="#cb30-8"></a><span class="co">#> 1 354823933 2019-02-16 17:48:59 <list… 2015 <list … 7.50e6 http… Demo </span></span> <span id="cb30-9"><a href="#cb30-9"></a><span class="co">#> 2 354092601 2019-02-13 14:13:11 <list… 2013 <list … 4.49e6 http… Obse…</span></span> <span id="cb30-10"><a href="#cb30-10"></a><span class="co">#> 3 354091476 2019-02-13 14:07:23 <list… 2017 <list … 9.83e6 http… I </span></span> <span id="cb30-11"><a href="#cb30-11"></a><span class="co">#> 4 351244906 2019-02-02 11:39:58 <list… 2017 <list … 9.77e6 http… Oído…</span></span> <span id="cb30-12"><a href="#cb30-12"></a><span class="co">#> 5 351244801 2019-02-02 11:39:37 <list… 2015 <list … 7.24e6 http… A Ca…</span></span> <span id="cb30-13"><a href="#cb30-13"></a><span class="co">#> 6 351052065 2019-02-01 20:40:53 <list… 2019 <list … 1.31e7 http… Tash…</span></span> <span id="cb30-14"><a href="#cb30-14"></a><span class="co">#> # … with 149 more rows, and 7 more variables: formats <list>,</span></span> <span id="cb30-15"><a href="#cb30-15"></a><span class="co">#> # cover_image <chr>, resource_url <chr>, master_id <int>, master_url <chr>,</span></span> <span id="cb30-16"><a href="#cb30-16"></a><span class="co">#> # id...14 <int>, rating <int></span></span></code></pre></div> <p>The problem is that <code>basic_information</code> repeats the <code>id</code> column that’s also stored at the top-level, so we can just drop that:</p> <div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1"></a>discs <span class="op">%>%</span><span class="st"> </span></span> <span id="cb31-2"><a href="#cb31-2"></a><span class="st"> </span><span class="kw">select</span>(<span class="op">!</span>id) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb31-3"><a href="#cb31-3"></a><span class="st"> </span><span class="kw">unnest_wider</span>(basic_information)</span> <span id="cb31-4"><a href="#cb31-4"></a><span class="co">#> # A tibble: 155 x 14</span></span> <span id="cb31-5"><a href="#cb31-5"></a><span class="co">#> instance_id date_added labels year artists id thumb title</span></span> <span id="cb31-6"><a href="#cb31-6"></a><span class="co">#> <int> <dttm> <list> <int> <list> <int> <chr> <chr></span></span> <span id="cb31-7"><a href="#cb31-7"></a><span class="co">#> 1 354823933 2019-02-16 17:48:59 <list… 2015 <list … 7.50e6 http… Demo </span></span> <span id="cb31-8"><a href="#cb31-8"></a><span class="co">#> 2 354092601 2019-02-13 14:13:11 <list… 2013 <list … 4.49e6 http… Obse…</span></span> <span id="cb31-9"><a href="#cb31-9"></a><span class="co">#> 3 354091476 2019-02-13 14:07:23 <list… 2017 <list … 9.83e6 http… I </span></span> <span id="cb31-10"><a href="#cb31-10"></a><span class="co">#> 4 351244906 2019-02-02 11:39:58 <list… 2017 <list … 9.77e6 http… Oído…</span></span> <span id="cb31-11"><a href="#cb31-11"></a><span class="co">#> 5 351244801 2019-02-02 11:39:37 <list… 2015 <list … 7.24e6 http… A Ca…</span></span> <span id="cb31-12"><a href="#cb31-12"></a><span class="co">#> 6 351052065 2019-02-01 20:40:53 <list… 2019 <list … 1.31e7 http… Tash…</span></span> <span id="cb31-13"><a href="#cb31-13"></a><span class="co">#> # … with 149 more rows, and 6 more variables: formats <list>,</span></span> <span id="cb31-14"><a href="#cb31-14"></a><span class="co">#> # cover_image <chr>, resource_url <chr>, master_id <int>, master_url <chr>,</span></span> <span id="cb31-15"><a href="#cb31-15"></a><span class="co">#> # rating <int></span></span></code></pre></div> <p>Alternatively, we could use <code>hoist()</code>:</p> <div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1"></a>discs <span class="op">%>%</span><span class="st"> </span></span> <span id="cb32-2"><a href="#cb32-2"></a><span class="st"> </span><span class="kw">hoist</span>(basic_information,</span> <span id="cb32-3"><a href="#cb32-3"></a> <span class="dt">title =</span> <span class="st">"title"</span>,</span> <span id="cb32-4"><a href="#cb32-4"></a> <span class="dt">year =</span> <span class="st">"year"</span>,</span> <span id="cb32-5"><a href="#cb32-5"></a> <span class="dt">label =</span> <span class="kw">list</span>(<span class="st">"labels"</span>, <span class="dv">1</span>, <span class="st">"name"</span>),</span> <span id="cb32-6"><a href="#cb32-6"></a> <span class="dt">artist =</span> <span class="kw">list</span>(<span class="st">"artists"</span>, <span class="dv">1</span>, <span class="st">"name"</span>)</span> <span id="cb32-7"><a href="#cb32-7"></a> )</span> <span id="cb32-8"><a href="#cb32-8"></a><span class="co">#> # A tibble: 155 x 9</span></span> <span id="cb32-9"><a href="#cb32-9"></a><span class="co">#> instance_id date_added title year label artist basic_informati…</span></span> <span id="cb32-10"><a href="#cb32-10"></a><span class="co">#> <int> <dttm> <chr> <int> <chr> <chr> <list> </span></span> <span id="cb32-11"><a href="#cb32-11"></a><span class="co">#> 1 354823933 2019-02-16 17:48:59 Demo 2015 Tobi… Mollot <named list [9]></span></span> <span id="cb32-12"><a href="#cb32-12"></a><span class="co">#> 2 354092601 2019-02-13 14:13:11 Obse… 2013 La V… Una B… <named list [9]></span></span> <span id="cb32-13"><a href="#cb32-13"></a><span class="co">#> 3 354091476 2019-02-13 14:07:23 I 2017 La V… S.H.I… <named list [9]></span></span> <span id="cb32-14"><a href="#cb32-14"></a><span class="co">#> 4 351244906 2019-02-02 11:39:58 Oído… 2017 La V… Rata … <named list [9]></span></span> <span id="cb32-15"><a href="#cb32-15"></a><span class="co">#> 5 351244801 2019-02-02 11:39:37 A Ca… 2015 Kato… Ivy (… <named list [9]></span></span> <span id="cb32-16"><a href="#cb32-16"></a><span class="co">#> 6 351052065 2019-02-01 20:40:53 Tash… 2019 High… Tashme <named list [9]></span></span> <span id="cb32-17"><a href="#cb32-17"></a><span class="co">#> # … with 149 more rows, and 2 more variables: id <int>, rating <int></span></span></code></pre></div> <p>Here I quickly extract the name of the first label and artist by indexing deeply into the nested list.</p> <p>A more systematic approach would be to create separate tables for artist and label:</p> <div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1"></a>discs <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-2"><a href="#cb33-2"></a><span class="st"> </span><span class="kw">hoist</span>(basic_information, <span class="dt">artist =</span> <span class="st">"artists"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-3"><a href="#cb33-3"></a><span class="st"> </span><span class="kw">select</span>(<span class="dt">disc_id =</span> id, artist) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-4"><a href="#cb33-4"></a><span class="st"> </span><span class="kw">unnest_longer</span>(artist) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-5"><a href="#cb33-5"></a><span class="st"> </span><span class="kw">unnest_wider</span>(artist)</span> <span id="cb33-6"><a href="#cb33-6"></a><span class="co">#> # A tibble: 167 x 8</span></span> <span id="cb33-7"><a href="#cb33-7"></a><span class="co">#> disc_id join name anv tracks role resource_url id</span></span> <span id="cb33-8"><a href="#cb33-8"></a><span class="co">#> <int> <chr> <chr> <chr> <chr> <chr> <chr> <int></span></span> <span id="cb33-9"><a href="#cb33-9"></a><span class="co">#> 1 7496378 "" Mollot "" "" "" https://api.discogs.co… 4.62e6</span></span> <span id="cb33-10"><a href="#cb33-10"></a><span class="co">#> 2 4490852 "" Una Bèstia I… "" "" "" https://api.discogs.co… 3.19e6</span></span> <span id="cb33-11"><a href="#cb33-11"></a><span class="co">#> 3 9827276 "" S.H.I.T. (3) "" "" "" https://api.discogs.co… 2.77e6</span></span> <span id="cb33-12"><a href="#cb33-12"></a><span class="co">#> 4 9769203 "" Rata Negra "" "" "" https://api.discogs.co… 4.28e6</span></span> <span id="cb33-13"><a href="#cb33-13"></a><span class="co">#> 5 7237138 "" Ivy (18) "" "" "" https://api.discogs.co… 3.60e6</span></span> <span id="cb33-14"><a href="#cb33-14"></a><span class="co">#> 6 13117042 "" Tashme "" "" "" https://api.discogs.co… 5.21e6</span></span> <span id="cb33-15"><a href="#cb33-15"></a><span class="co">#> # … with 161 more rows</span></span> <span id="cb33-16"><a href="#cb33-16"></a></span> <span id="cb33-17"><a href="#cb33-17"></a>discs <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-18"><a href="#cb33-18"></a><span class="st"> </span><span class="kw">hoist</span>(basic_information, <span class="dt">format =</span> <span class="st">"formats"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-19"><a href="#cb33-19"></a><span class="st"> </span><span class="kw">select</span>(<span class="dt">disc_id =</span> id, format) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-20"><a href="#cb33-20"></a><span class="st"> </span><span class="kw">unnest_longer</span>(format) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-21"><a href="#cb33-21"></a><span class="st"> </span><span class="kw">unnest_wider</span>(format) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-22"><a href="#cb33-22"></a><span class="st"> </span><span class="kw">unnest_longer</span>(descriptions)</span> <span id="cb33-23"><a href="#cb33-23"></a><span class="co">#> # A tibble: 280 x 5</span></span> <span id="cb33-24"><a href="#cb33-24"></a><span class="co">#> disc_id descriptions text name qty </span></span> <span id="cb33-25"><a href="#cb33-25"></a><span class="co">#> <int> <chr> <chr> <chr> <chr></span></span> <span id="cb33-26"><a href="#cb33-26"></a><span class="co">#> 1 7496378 "Numbered" Black Cassette 1 </span></span> <span id="cb33-27"><a href="#cb33-27"></a><span class="co">#> 2 4490852 "LP" <NA> Vinyl 1 </span></span> <span id="cb33-28"><a href="#cb33-28"></a><span class="co">#> 3 9827276 "7\"" <NA> Vinyl 1 </span></span> <span id="cb33-29"><a href="#cb33-29"></a><span class="co">#> 4 9827276 "45 RPM" <NA> Vinyl 1 </span></span> <span id="cb33-30"><a href="#cb33-30"></a><span class="co">#> 5 9827276 "EP" <NA> Vinyl 1 </span></span> <span id="cb33-31"><a href="#cb33-31"></a><span class="co">#> 6 9769203 "LP" <NA> Vinyl 1 </span></span> <span id="cb33-32"><a href="#cb33-32"></a><span class="co">#> # … with 274 more rows</span></span></code></pre></div> <p>Then you could join these back on to the original dataset as needed.</p> </div> <div class="footnotes"> <hr /> <ol> <li id="fn1"><p>I’d normally use <code>readr::parse_datetime()</code> or <code>lubridate::ymd_hms()</code>, but I can’t here because it’s a vignette and I don’t want to add a dependency to tidyr just to simplify one example.<a href="#fnref1" class="footnote-back">↩︎</a></p></li> </ol> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>