EVOLUTION-MANAGER
Edit File: grouping.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Grouped data</title> <script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> // v0.0.1 // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. document.addEventListener('DOMContentLoaded', function() { const codeList = document.getElementsByClassName("sourceCode"); for (var i = 0; i < codeList.length; i++) { var linkList = codeList[i].getElementsByTagName('a'); for (var j = 0; j < linkList.length; j++) { if (linkList[j].innerHTML === "") { linkList[j].setAttribute('aria-hidden', 'true'); } } } }); </script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css" data-origin="pandoc"> code.sourceCode > span { display: inline-block; line-height: 1.25; } code.sourceCode > span { color: inherit; text-decoration: inherit; } code.sourceCode > span:empty { height: 1.2em; } .sourceCode { overflow: visible; } code.sourceCode { white-space: pre; position: relative; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { code.sourceCode { white-space: pre-wrap; } code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } for (var j = 0; j < rules.length; j++) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') continue; // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Grouped data</h1> <p>dplyr verbs are particularly powerful when you apply them to grouped data frames (<code>grouped_df</code> objects). This vignette shows you:</p> <ul> <li><p>How to group, inspect, and ungroup with <code>group_by()</code> and friends.</p></li> <li><p>How individual dplyr verbs changes their behaviour when applied to grouped data frame.</p></li> <li><p>How to access data about the “current” group from within a verb.</p></li> </ul> <p>We’ll start by loading dplyr:</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(dplyr)</span></code></pre></div> <div id="group_by" class="section level2"> <h2><code>group_by()</code></h2> <p>The most important grouping verb is <code>group_by()</code>: it takes a data frame and one or more variables to group by:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>by_species <-<span class="st"> </span>starwars <span class="op">%>%</span><span class="st"> </span><span class="kw">group_by</span>(species)</span> <span id="cb2-2"><a href="#cb2-2"></a>by_sex_gender <-<span class="st"> </span>starwars <span class="op">%>%</span><span class="st"> </span><span class="kw">group_by</span>(sex, gender)</span></code></pre></div> <p>You can see the grouping when you print the data:</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>by_species</span> <span id="cb3-2"><a href="#cb3-2"></a><span class="co">#> # A tibble: 87 x 14</span></span> <span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span> <span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span> <span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> 1 Luke… 172 77 blond fair blue 19 male mascu…</span></span> <span id="cb3-7"><a href="#cb3-7"></a><span class="co">#> 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…</span></span> <span id="cb3-8"><a href="#cb3-8"></a><span class="co">#> 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…</span></span> <span id="cb3-9"><a href="#cb3-9"></a><span class="co">#> 4 Dart… 202 136 none white yellow 41.9 male mascu…</span></span> <span id="cb3-10"><a href="#cb3-10"></a><span class="co">#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,</span></span> <span id="cb3-11"><a href="#cb3-11"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span> <span id="cb3-12"><a href="#cb3-12"></a>by_sex_gender</span> <span id="cb3-13"><a href="#cb3-13"></a><span class="co">#> # A tibble: 87 x 14</span></span> <span id="cb3-14"><a href="#cb3-14"></a><span class="co">#> # Groups: sex, gender [6]</span></span> <span id="cb3-15"><a href="#cb3-15"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span> <span id="cb3-16"><a href="#cb3-16"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span> <span id="cb3-17"><a href="#cb3-17"></a><span class="co">#> 1 Luke… 172 77 blond fair blue 19 male mascu…</span></span> <span id="cb3-18"><a href="#cb3-18"></a><span class="co">#> 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…</span></span> <span id="cb3-19"><a href="#cb3-19"></a><span class="co">#> 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…</span></span> <span id="cb3-20"><a href="#cb3-20"></a><span class="co">#> 4 Dart… 202 136 none white yellow 41.9 male mascu…</span></span> <span id="cb3-21"><a href="#cb3-21"></a><span class="co">#> # … with 83 more rows, and 5 more variables: homeworld <chr>, species <chr>,</span></span> <span id="cb3-22"><a href="#cb3-22"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span></code></pre></div> <p>Or use <code>tally()</code> to count the number of rows in each group. The <code>sort</code> argument is useful if you want to see the largest groups up front.</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb4-2"><a href="#cb4-2"></a><span class="co">#> # A tibble: 38 x 2</span></span> <span id="cb4-3"><a href="#cb4-3"></a><span class="co">#> species n</span></span> <span id="cb4-4"><a href="#cb4-4"></a><span class="co">#> <chr> <int></span></span> <span id="cb4-5"><a href="#cb4-5"></a><span class="co">#> 1 Aleena 1</span></span> <span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> 2 Besalisk 1</span></span> <span id="cb4-7"><a href="#cb4-7"></a><span class="co">#> 3 Cerean 1</span></span> <span id="cb4-8"><a href="#cb4-8"></a><span class="co">#> 4 Chagrian 1</span></span> <span id="cb4-9"><a href="#cb4-9"></a><span class="co">#> # … with 34 more rows</span></span> <span id="cb4-10"><a href="#cb4-10"></a></span> <span id="cb4-11"><a href="#cb4-11"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span><span class="kw">tally</span>(<span class="dt">sort =</span> <span class="ot">TRUE</span>)</span> <span id="cb4-12"><a href="#cb4-12"></a><span class="co">#> # A tibble: 6 x 3</span></span> <span id="cb4-13"><a href="#cb4-13"></a><span class="co">#> # Groups: sex [5]</span></span> <span id="cb4-14"><a href="#cb4-14"></a><span class="co">#> sex gender n</span></span> <span id="cb4-15"><a href="#cb4-15"></a><span class="co">#> <chr> <chr> <int></span></span> <span id="cb4-16"><a href="#cb4-16"></a><span class="co">#> 1 male masculine 60</span></span> <span id="cb4-17"><a href="#cb4-17"></a><span class="co">#> 2 female feminine 16</span></span> <span id="cb4-18"><a href="#cb4-18"></a><span class="co">#> 3 none masculine 5</span></span> <span id="cb4-19"><a href="#cb4-19"></a><span class="co">#> 4 <NA> <NA> 4</span></span> <span id="cb4-20"><a href="#cb4-20"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p>As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a <code>mutate()</code> <strong>before</strong> the <code>group_by()</code>:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>bmi_breaks <-<span class="st"> </span><span class="kw">c</span>(<span class="dv">0</span>, <span class="fl">18.5</span>, <span class="dv">25</span>, <span class="dv">30</span>, <span class="ot">Inf</span>)</span> <span id="cb5-2"><a href="#cb5-2"></a></span> <span id="cb5-3"><a href="#cb5-3"></a>starwars <span class="op">%>%</span></span> <span id="cb5-4"><a href="#cb5-4"></a><span class="st"> </span><span class="kw">group_by</span>(<span class="dt">bmi_cat =</span> <span class="kw">cut</span>(mass<span class="op">/</span>(height<span class="op">/</span><span class="dv">100</span>)<span class="op">^</span><span class="dv">2</span>, <span class="dt">breaks=</span>bmi_breaks)) <span class="op">%>%</span></span> <span id="cb5-5"><a href="#cb5-5"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb5-6"><a href="#cb5-6"></a><span class="co">#> # A tibble: 5 x 2</span></span> <span id="cb5-7"><a href="#cb5-7"></a><span class="co">#> bmi_cat n</span></span> <span id="cb5-8"><a href="#cb5-8"></a><span class="co">#> <fct> <int></span></span> <span id="cb5-9"><a href="#cb5-9"></a><span class="co">#> 1 (0,18.5] 10</span></span> <span id="cb5-10"><a href="#cb5-10"></a><span class="co">#> 2 (18.5,25] 24</span></span> <span id="cb5-11"><a href="#cb5-11"></a><span class="co">#> 3 (25,30] 13</span></span> <span id="cb5-12"><a href="#cb5-12"></a><span class="co">#> 4 (30,Inf] 12</span></span> <span id="cb5-13"><a href="#cb5-13"></a><span class="co">#> # … with 1 more row</span></span></code></pre></div> </div> <div id="group-metadata" class="section level2"> <h2>Group metadata</h2> <p>You can see underlying group data with <code>group_keys()</code>. It has one row for each group and one column for each grouping variable:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">group_keys</span>()</span> <span id="cb6-2"><a href="#cb6-2"></a><span class="co">#> # A tibble: 38 x 1</span></span> <span id="cb6-3"><a href="#cb6-3"></a><span class="co">#> species </span></span> <span id="cb6-4"><a href="#cb6-4"></a><span class="co">#> * <chr> </span></span> <span id="cb6-5"><a href="#cb6-5"></a><span class="co">#> 1 Aleena </span></span> <span id="cb6-6"><a href="#cb6-6"></a><span class="co">#> 2 Besalisk</span></span> <span id="cb6-7"><a href="#cb6-7"></a><span class="co">#> 3 Cerean </span></span> <span id="cb6-8"><a href="#cb6-8"></a><span class="co">#> 4 Chagrian</span></span> <span id="cb6-9"><a href="#cb6-9"></a><span class="co">#> # … with 34 more rows</span></span> <span id="cb6-10"><a href="#cb6-10"></a></span> <span id="cb6-11"><a href="#cb6-11"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span><span class="kw">group_keys</span>()</span> <span id="cb6-12"><a href="#cb6-12"></a><span class="co">#> # A tibble: 6 x 2</span></span> <span id="cb6-13"><a href="#cb6-13"></a><span class="co">#> sex gender </span></span> <span id="cb6-14"><a href="#cb6-14"></a><span class="co">#> * <chr> <chr> </span></span> <span id="cb6-15"><a href="#cb6-15"></a><span class="co">#> 1 female feminine </span></span> <span id="cb6-16"><a href="#cb6-16"></a><span class="co">#> 2 hermaphroditic masculine</span></span> <span id="cb6-17"><a href="#cb6-17"></a><span class="co">#> 3 male masculine</span></span> <span id="cb6-18"><a href="#cb6-18"></a><span class="co">#> 4 none feminine </span></span> <span id="cb6-19"><a href="#cb6-19"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p>You can see which group each row belongs to with <code>group_indices()</code>:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">group_indices</span>()</span> <span id="cb7-2"><a href="#cb7-2"></a><span class="co">#> [1] 11 6 6 11 11 11 11 6 11 11 11 11 34 11 24 12 11 11 36 11 11 6 31 11 11</span></span> <span id="cb7-3"><a href="#cb7-3"></a><span class="co">#> [26] 18 11 11 8 26 11 21 11 10 10 10 38 30 7 38 11 37 32 32 33 35 29 11 3 20</span></span> <span id="cb7-4"><a href="#cb7-4"></a><span class="co">#> [51] 37 27 13 23 16 4 11 11 11 9 17 17 11 11 11 11 5 2 15 15 11 1 6 25 19</span></span> <span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> [76] 28 14 34 11 38 22 11 11 11 6 38 11</span></span></code></pre></div> <p>And which rows each group contains with <code>group_rows()</code>:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">group_rows</span>() <span class="op">%>%</span><span class="st"> </span><span class="kw">head</span>()</span> <span id="cb8-2"><a href="#cb8-2"></a><span class="co">#> <list_of<integer>[6]></span></span> <span id="cb8-3"><a href="#cb8-3"></a><span class="co">#> [[1]]</span></span> <span id="cb8-4"><a href="#cb8-4"></a><span class="co">#> [1] 72</span></span> <span id="cb8-5"><a href="#cb8-5"></a><span class="co">#> </span></span> <span id="cb8-6"><a href="#cb8-6"></a><span class="co">#> [[2]]</span></span> <span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> [1] 68</span></span> <span id="cb8-8"><a href="#cb8-8"></a><span class="co">#> </span></span> <span id="cb8-9"><a href="#cb8-9"></a><span class="co">#> [[3]]</span></span> <span id="cb8-10"><a href="#cb8-10"></a><span class="co">#> [1] 49</span></span> <span id="cb8-11"><a href="#cb8-11"></a><span class="co">#> </span></span> <span id="cb8-12"><a href="#cb8-12"></a><span class="co">#> [[4]]</span></span> <span id="cb8-13"><a href="#cb8-13"></a><span class="co">#> [1] 56</span></span> <span id="cb8-14"><a href="#cb8-14"></a><span class="co">#> </span></span> <span id="cb8-15"><a href="#cb8-15"></a><span class="co">#> [[5]]</span></span> <span id="cb8-16"><a href="#cb8-16"></a><span class="co">#> [1] 67</span></span> <span id="cb8-17"><a href="#cb8-17"></a><span class="co">#> </span></span> <span id="cb8-18"><a href="#cb8-18"></a><span class="co">#> [[6]]</span></span> <span id="cb8-19"><a href="#cb8-19"></a><span class="co">#> [1] 2 3 8 22 73 85</span></span></code></pre></div> <p>Use <code>group_vars()</code> if you just want the names of the grouping variables:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb9-2"><a href="#cb9-2"></a><span class="co">#> [1] "species"</span></span> <span id="cb9-3"><a href="#cb9-3"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb9-4"><a href="#cb9-4"></a><span class="co">#> [1] "sex" "gender"</span></span></code></pre></div> <div id="changing-and-adding-to-grouping-variables" class="section level3"> <h3>Changing and adding to grouping variables</h3> <p>If you apply <code>group_by()</code> to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by <code>homeworld</code> instead of <code>species</code>:</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>by_species <span class="op">%>%</span></span> <span id="cb10-2"><a href="#cb10-2"></a><span class="st"> </span><span class="kw">group_by</span>(homeworld) <span class="op">%>%</span></span> <span id="cb10-3"><a href="#cb10-3"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb10-4"><a href="#cb10-4"></a><span class="co">#> # A tibble: 49 x 2</span></span> <span id="cb10-5"><a href="#cb10-5"></a><span class="co">#> homeworld n</span></span> <span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> <chr> <int></span></span> <span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> 1 Alderaan 3</span></span> <span id="cb10-8"><a href="#cb10-8"></a><span class="co">#> 2 Aleen Minor 1</span></span> <span id="cb10-9"><a href="#cb10-9"></a><span class="co">#> 3 Bespin 1</span></span> <span id="cb10-10"><a href="#cb10-10"></a><span class="co">#> 4 Bestine IV 1</span></span> <span id="cb10-11"><a href="#cb10-11"></a><span class="co">#> # … with 45 more rows</span></span></code></pre></div> <p>To <strong>augment</strong> the grouping, using <code>.add = TRUE</code><a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>. For example, the following code groups by species and homeworld:</p> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>by_species <span class="op">%>%</span></span> <span id="cb11-2"><a href="#cb11-2"></a><span class="st"> </span><span class="kw">group_by</span>(homeworld, <span class="dt">.add =</span> <span class="ot">TRUE</span>) <span class="op">%>%</span></span> <span id="cb11-3"><a href="#cb11-3"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb11-4"><a href="#cb11-4"></a><span class="co">#> # A tibble: 58 x 3</span></span> <span id="cb11-5"><a href="#cb11-5"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb11-6"><a href="#cb11-6"></a><span class="co">#> species homeworld n</span></span> <span id="cb11-7"><a href="#cb11-7"></a><span class="co">#> <chr> <chr> <int></span></span> <span id="cb11-8"><a href="#cb11-8"></a><span class="co">#> 1 Aleena Aleen Minor 1</span></span> <span id="cb11-9"><a href="#cb11-9"></a><span class="co">#> 2 Besalisk Ojom 1</span></span> <span id="cb11-10"><a href="#cb11-10"></a><span class="co">#> 3 Cerean Cerea 1</span></span> <span id="cb11-11"><a href="#cb11-11"></a><span class="co">#> 4 Chagrian Champala 1</span></span> <span id="cb11-12"><a href="#cb11-12"></a><span class="co">#> # … with 54 more rows</span></span></code></pre></div> </div> <div id="removing-grouping-variables" class="section level3"> <h3>Removing grouping variables</h3> <p>To remove all grouping variables, use <code>ungroup()</code>:</p> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>by_species <span class="op">%>%</span></span> <span id="cb12-2"><a href="#cb12-2"></a><span class="st"> </span><span class="kw">ungroup</span>() <span class="op">%>%</span></span> <span id="cb12-3"><a href="#cb12-3"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb12-4"><a href="#cb12-4"></a><span class="co">#> # A tibble: 1 x 1</span></span> <span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> n</span></span> <span id="cb12-6"><a href="#cb12-6"></a><span class="co">#> <int></span></span> <span id="cb12-7"><a href="#cb12-7"></a><span class="co">#> 1 87</span></span></code></pre></div> <p>You can also choose to selectively ungroup by listing the variables you want to remove:</p> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span></span> <span id="cb13-2"><a href="#cb13-2"></a><span class="st"> </span><span class="kw">ungroup</span>(sex) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb13-3"><a href="#cb13-3"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb13-4"><a href="#cb13-4"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb13-5"><a href="#cb13-5"></a><span class="co">#> gender n</span></span> <span id="cb13-6"><a href="#cb13-6"></a><span class="co">#> <chr> <int></span></span> <span id="cb13-7"><a href="#cb13-7"></a><span class="co">#> 1 feminine 17</span></span> <span id="cb13-8"><a href="#cb13-8"></a><span class="co">#> 2 masculine 66</span></span> <span id="cb13-9"><a href="#cb13-9"></a><span class="co">#> 3 <NA> 4</span></span></code></pre></div> </div> </div> <div id="verbs" class="section level2"> <h2>Verbs</h2> <p>The following sections describe how grouping affects the main dplyr verbs.</p> <div id="summarise" class="section level3"> <h3><code>summarise()</code></h3> <p><code>summarise()</code> computes a summary for each group. This means that it starts from <code>group_keys()</code>, adding summary variables to the right hand side:</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>by_species <span class="op">%>%</span></span> <span id="cb14-2"><a href="#cb14-2"></a><span class="st"> </span><span class="kw">summarise</span>(</span> <span id="cb14-3"><a href="#cb14-3"></a> <span class="dt">n =</span> <span class="kw">n</span>(),</span> <span id="cb14-4"><a href="#cb14-4"></a> <span class="dt">height =</span> <span class="kw">mean</span>(height, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>)</span> <span id="cb14-5"><a href="#cb14-5"></a> )</span> <span id="cb14-6"><a href="#cb14-6"></a><span class="co">#> `summarise()` ungrouping output (override with `.groups` argument)</span></span> <span id="cb14-7"><a href="#cb14-7"></a><span class="co">#> # A tibble: 38 x 3</span></span> <span id="cb14-8"><a href="#cb14-8"></a><span class="co">#> species n height</span></span> <span id="cb14-9"><a href="#cb14-9"></a><span class="co">#> <chr> <int> <dbl></span></span> <span id="cb14-10"><a href="#cb14-10"></a><span class="co">#> 1 Aleena 1 79</span></span> <span id="cb14-11"><a href="#cb14-11"></a><span class="co">#> 2 Besalisk 1 198</span></span> <span id="cb14-12"><a href="#cb14-12"></a><span class="co">#> 3 Cerean 1 198</span></span> <span id="cb14-13"><a href="#cb14-13"></a><span class="co">#> 4 Chagrian 1 196</span></span> <span id="cb14-14"><a href="#cb14-14"></a><span class="co">#> # … with 34 more rows</span></span></code></pre></div> <p>The <code>.groups=</code> argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to <code>.groups = "drop_last"</code> without a message or <code>.groups = NULL</code> with a message (the default).</p> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-2"><a href="#cb15-2"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">n =</span> <span class="kw">n</span>()) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-3"><a href="#cb15-3"></a><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb15-4"><a href="#cb15-4"></a><span class="co">#> `summarise()` regrouping output by 'sex' (override with `.groups` argument)</span></span> <span id="cb15-5"><a href="#cb15-5"></a><span class="co">#> [1] "sex"</span></span> <span id="cb15-6"><a href="#cb15-6"></a></span> <span id="cb15-7"><a href="#cb15-7"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-8"><a href="#cb15-8"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">n =</span> <span class="kw">n</span>(), <span class="dt">.groups =</span> <span class="st">"drop_last"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-9"><a href="#cb15-9"></a><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb15-10"><a href="#cb15-10"></a><span class="co">#> [1] "sex"</span></span></code></pre></div> <p>Since version 1.0.0 the groups may also be kept (<code>.groups = "keep"</code>) or dropped (<code>.groups = "drop"</code>).</p> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span></span> <span id="cb16-2"><a href="#cb16-2"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">n =</span> <span class="kw">n</span>(), <span class="dt">.groups =</span> <span class="st">"keep"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb16-3"><a href="#cb16-3"></a><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb16-4"><a href="#cb16-4"></a><span class="co">#> [1] "sex" "gender"</span></span> <span id="cb16-5"><a href="#cb16-5"></a></span> <span id="cb16-6"><a href="#cb16-6"></a>by_sex_gender <span class="op">%>%</span><span class="st"> </span></span> <span id="cb16-7"><a href="#cb16-7"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">n =</span> <span class="kw">n</span>(), <span class="dt">.groups =</span> <span class="st">"drop"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb16-8"><a href="#cb16-8"></a><span class="st"> </span><span class="kw">group_vars</span>()</span> <span id="cb16-9"><a href="#cb16-9"></a><span class="co">#> character(0)</span></span></code></pre></div> <p>When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble).</p> </div> <div id="select-rename-and-relocate" class="section level3"> <h3><code>select()</code>, <code>rename()</code>, and <code>relocate()</code></h3> <p><code>rename()</code> and <code>relocate()</code> behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped <code>select()</code> is almost identical to ungrouped select, except that it always includes the grouping variables:</p> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>by_species <span class="op">%>%</span><span class="st"> </span><span class="kw">select</span>(mass)</span> <span id="cb17-2"><a href="#cb17-2"></a><span class="co">#> Adding missing grouping variables: `species`</span></span> <span id="cb17-3"><a href="#cb17-3"></a><span class="co">#> # A tibble: 87 x 2</span></span> <span id="cb17-4"><a href="#cb17-4"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb17-5"><a href="#cb17-5"></a><span class="co">#> species mass</span></span> <span id="cb17-6"><a href="#cb17-6"></a><span class="co">#> <chr> <dbl></span></span> <span id="cb17-7"><a href="#cb17-7"></a><span class="co">#> 1 Human 77</span></span> <span id="cb17-8"><a href="#cb17-8"></a><span class="co">#> 2 Droid 75</span></span> <span id="cb17-9"><a href="#cb17-9"></a><span class="co">#> 3 Droid 32</span></span> <span id="cb17-10"><a href="#cb17-10"></a><span class="co">#> 4 Human 136</span></span> <span id="cb17-11"><a href="#cb17-11"></a><span class="co">#> # … with 83 more rows</span></span></code></pre></div> <p>If you don’t want the grouping variables, you’ll have to first <code>ungroup()</code>. (This design is possibly a mistake, but we’re stuck with it for now.)</p> </div> <div id="arrange" class="section level3"> <h3><code>arrange()</code></h3> <p>Grouped <code>arrange()</code> is the same as ungrouped <code>arrange()</code>, unless you set <code>.by_group = TRUE</code>, in which case it will order first by the grouping variables.</p> <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a>by_species <span class="op">%>%</span></span> <span id="cb18-2"><a href="#cb18-2"></a><span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(mass)) <span class="op">%>%</span></span> <span id="cb18-3"><a href="#cb18-3"></a><span class="st"> </span><span class="kw">relocate</span>(species, mass)</span> <span id="cb18-4"><a href="#cb18-4"></a><span class="co">#> # A tibble: 87 x 14</span></span> <span id="cb18-5"><a href="#cb18-5"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb18-6"><a href="#cb18-6"></a><span class="co">#> species mass name height hair_color skin_color eye_color birth_year sex </span></span> <span id="cb18-7"><a href="#cb18-7"></a><span class="co">#> <chr> <dbl> <chr> <int> <chr> <chr> <chr> <dbl> <chr></span></span> <span id="cb18-8"><a href="#cb18-8"></a><span class="co">#> 1 Hutt 1358 Jabb… 175 <NA> green-tan… orange 600 herm…</span></span> <span id="cb18-9"><a href="#cb18-9"></a><span class="co">#> 2 Kaleesh 159 Grie… 216 none brown, wh… green, y… NA male </span></span> <span id="cb18-10"><a href="#cb18-10"></a><span class="co">#> 3 Droid 140 IG-88 200 none metal red 15 none </span></span> <span id="cb18-11"><a href="#cb18-11"></a><span class="co">#> 4 Human 136 Dart… 202 none white yellow 41.9 male </span></span> <span id="cb18-12"><a href="#cb18-12"></a><span class="co">#> # … with 83 more rows, and 5 more variables: gender <chr>, homeworld <chr>,</span></span> <span id="cb18-13"><a href="#cb18-13"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span> <span id="cb18-14"><a href="#cb18-14"></a></span> <span id="cb18-15"><a href="#cb18-15"></a>by_species <span class="op">%>%</span></span> <span id="cb18-16"><a href="#cb18-16"></a><span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(mass), <span class="dt">.by_group =</span> <span class="ot">TRUE</span>) <span class="op">%>%</span></span> <span id="cb18-17"><a href="#cb18-17"></a><span class="st"> </span><span class="kw">relocate</span>(species, mass)</span> <span id="cb18-18"><a href="#cb18-18"></a><span class="co">#> # A tibble: 87 x 14</span></span> <span id="cb18-19"><a href="#cb18-19"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb18-20"><a href="#cb18-20"></a><span class="co">#> species mass name height hair_color skin_color eye_color birth_year sex </span></span> <span id="cb18-21"><a href="#cb18-21"></a><span class="co">#> <chr> <dbl> <chr> <int> <chr> <chr> <chr> <dbl> <chr></span></span> <span id="cb18-22"><a href="#cb18-22"></a><span class="co">#> 1 Aleena 15 Ratt… 79 none grey, blue unknown NA male </span></span> <span id="cb18-23"><a href="#cb18-23"></a><span class="co">#> 2 Besali… 102 Dext… 198 none brown yellow NA male </span></span> <span id="cb18-24"><a href="#cb18-24"></a><span class="co">#> 3 Cerean 82 Ki-A… 198 white pale yellow 92 male </span></span> <span id="cb18-25"><a href="#cb18-25"></a><span class="co">#> 4 Chagri… NA Mas … 196 none blue blue NA male </span></span> <span id="cb18-26"><a href="#cb18-26"></a><span class="co">#> # … with 83 more rows, and 5 more variables: gender <chr>, homeworld <chr>,</span></span> <span id="cb18-27"><a href="#cb18-27"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span></code></pre></div> <p>Note that second example is sorted by <code>species</code> (from the <code>group_by()</code> statement) and then by <code>mass</code> (within species).</p> </div> <div id="mutate-and-transmute" class="section level3"> <h3><code>mutate()</code> and <code>transmute()</code></h3> <p>In simple cases with vectorised functions, grouped and ungrouped <code>mutate()</code> give the same results. They differ when used with summary functions:</p> <div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a><span class="co"># Subtract off global mean</span></span> <span id="cb19-2"><a href="#cb19-2"></a>starwars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb19-3"><a href="#cb19-3"></a><span class="st"> </span><span class="kw">select</span>(name, homeworld, mass) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb19-4"><a href="#cb19-4"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">standard_mass =</span> mass <span class="op">-</span><span class="st"> </span><span class="kw">mean</span>(mass, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>))</span> <span id="cb19-5"><a href="#cb19-5"></a><span class="co">#> # A tibble: 87 x 4</span></span> <span id="cb19-6"><a href="#cb19-6"></a><span class="co">#> name homeworld mass standard_mass</span></span> <span id="cb19-7"><a href="#cb19-7"></a><span class="co">#> <chr> <chr> <dbl> <dbl></span></span> <span id="cb19-8"><a href="#cb19-8"></a><span class="co">#> 1 Luke Skywalker Tatooine 77 -20.3</span></span> <span id="cb19-9"><a href="#cb19-9"></a><span class="co">#> 2 C-3PO Tatooine 75 -22.3</span></span> <span id="cb19-10"><a href="#cb19-10"></a><span class="co">#> 3 R2-D2 Naboo 32 -65.3</span></span> <span id="cb19-11"><a href="#cb19-11"></a><span class="co">#> 4 Darth Vader Tatooine 136 38.7</span></span> <span id="cb19-12"><a href="#cb19-12"></a><span class="co">#> # … with 83 more rows</span></span> <span id="cb19-13"><a href="#cb19-13"></a></span> <span id="cb19-14"><a href="#cb19-14"></a><span class="co"># Subtract off homeworld mean</span></span> <span id="cb19-15"><a href="#cb19-15"></a>starwars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb19-16"><a href="#cb19-16"></a><span class="st"> </span><span class="kw">select</span>(name, homeworld, mass) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb19-17"><a href="#cb19-17"></a><span class="st"> </span><span class="kw">group_by</span>(homeworld) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb19-18"><a href="#cb19-18"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">standard_mass =</span> mass <span class="op">-</span><span class="st"> </span><span class="kw">mean</span>(mass, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>))</span> <span id="cb19-19"><a href="#cb19-19"></a><span class="co">#> # A tibble: 87 x 4</span></span> <span id="cb19-20"><a href="#cb19-20"></a><span class="co">#> # Groups: homeworld [49]</span></span> <span id="cb19-21"><a href="#cb19-21"></a><span class="co">#> name homeworld mass standard_mass</span></span> <span id="cb19-22"><a href="#cb19-22"></a><span class="co">#> <chr> <chr> <dbl> <dbl></span></span> <span id="cb19-23"><a href="#cb19-23"></a><span class="co">#> 1 Luke Skywalker Tatooine 77 -8.38</span></span> <span id="cb19-24"><a href="#cb19-24"></a><span class="co">#> 2 C-3PO Tatooine 75 -10.4 </span></span> <span id="cb19-25"><a href="#cb19-25"></a><span class="co">#> 3 R2-D2 Naboo 32 -32.2 </span></span> <span id="cb19-26"><a href="#cb19-26"></a><span class="co">#> 4 Darth Vader Tatooine 136 50.6 </span></span> <span id="cb19-27"><a href="#cb19-27"></a><span class="co">#> # … with 83 more rows</span></span></code></pre></div> <p>Or with window functions like <code>min_rank()</code>:</p> <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a><span class="co"># Overall rank</span></span> <span id="cb20-2"><a href="#cb20-2"></a>starwars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb20-3"><a href="#cb20-3"></a><span class="st"> </span><span class="kw">select</span>(name, homeworld, height) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb20-4"><a href="#cb20-4"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">rank =</span> <span class="kw">min_rank</span>(height))</span> <span id="cb20-5"><a href="#cb20-5"></a><span class="co">#> # A tibble: 87 x 4</span></span> <span id="cb20-6"><a href="#cb20-6"></a><span class="co">#> name homeworld height rank</span></span> <span id="cb20-7"><a href="#cb20-7"></a><span class="co">#> <chr> <chr> <int> <int></span></span> <span id="cb20-8"><a href="#cb20-8"></a><span class="co">#> 1 Luke Skywalker Tatooine 172 29</span></span> <span id="cb20-9"><a href="#cb20-9"></a><span class="co">#> 2 C-3PO Tatooine 167 21</span></span> <span id="cb20-10"><a href="#cb20-10"></a><span class="co">#> 3 R2-D2 Naboo 96 5</span></span> <span id="cb20-11"><a href="#cb20-11"></a><span class="co">#> 4 Darth Vader Tatooine 202 72</span></span> <span id="cb20-12"><a href="#cb20-12"></a><span class="co">#> # … with 83 more rows</span></span> <span id="cb20-13"><a href="#cb20-13"></a></span> <span id="cb20-14"><a href="#cb20-14"></a><span class="co"># Rank per homeworld</span></span> <span id="cb20-15"><a href="#cb20-15"></a>starwars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb20-16"><a href="#cb20-16"></a><span class="st"> </span><span class="kw">select</span>(name, homeworld, height) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb20-17"><a href="#cb20-17"></a><span class="st"> </span><span class="kw">group_by</span>(homeworld) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb20-18"><a href="#cb20-18"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">rank =</span> <span class="kw">min_rank</span>(height))</span> <span id="cb20-19"><a href="#cb20-19"></a><span class="co">#> # A tibble: 87 x 4</span></span> <span id="cb20-20"><a href="#cb20-20"></a><span class="co">#> # Groups: homeworld [49]</span></span> <span id="cb20-21"><a href="#cb20-21"></a><span class="co">#> name homeworld height rank</span></span> <span id="cb20-22"><a href="#cb20-22"></a><span class="co">#> <chr> <chr> <int> <int></span></span> <span id="cb20-23"><a href="#cb20-23"></a><span class="co">#> 1 Luke Skywalker Tatooine 172 5</span></span> <span id="cb20-24"><a href="#cb20-24"></a><span class="co">#> 2 C-3PO Tatooine 167 4</span></span> <span id="cb20-25"><a href="#cb20-25"></a><span class="co">#> 3 R2-D2 Naboo 96 1</span></span> <span id="cb20-26"><a href="#cb20-26"></a><span class="co">#> 4 Darth Vader Tatooine 202 10</span></span> <span id="cb20-27"><a href="#cb20-27"></a><span class="co">#> # … with 83 more rows</span></span></code></pre></div> </div> <div id="filter" class="section level3"> <h3><code>filter()</code></h3> <p>A grouped <code>filter()</code> effectively does a <code>mutate()</code> to generate a logical variable, and then only keeps the rows where the variable is <code>TRUE</code>. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species:</p> <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>by_species <span class="op">%>%</span></span> <span id="cb21-2"><a href="#cb21-2"></a><span class="st"> </span><span class="kw">select</span>(name, species, height) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb21-3"><a href="#cb21-3"></a><span class="st"> </span><span class="kw">filter</span>(height <span class="op">==</span><span class="st"> </span><span class="kw">max</span>(height))</span> <span id="cb21-4"><a href="#cb21-4"></a><span class="co">#> # A tibble: 35 x 3</span></span> <span id="cb21-5"><a href="#cb21-5"></a><span class="co">#> # Groups: species [35]</span></span> <span id="cb21-6"><a href="#cb21-6"></a><span class="co">#> name species height</span></span> <span id="cb21-7"><a href="#cb21-7"></a><span class="co">#> <chr> <chr> <int></span></span> <span id="cb21-8"><a href="#cb21-8"></a><span class="co">#> 1 Greedo Rodian 173</span></span> <span id="cb21-9"><a href="#cb21-9"></a><span class="co">#> 2 Jabba Desilijic Tiure Hutt 175</span></span> <span id="cb21-10"><a href="#cb21-10"></a><span class="co">#> 3 Yoda Yoda's species 66</span></span> <span id="cb21-11"><a href="#cb21-11"></a><span class="co">#> 4 Bossk Trandoshan 190</span></span> <span id="cb21-12"><a href="#cb21-12"></a><span class="co">#> # … with 31 more rows</span></span></code></pre></div> <p>You can also use <code>filter()</code> to remove entire groups. For example, the following code eliminates all groups that only have a single member:</p> <div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1"></a>by_species <span class="op">%>%</span></span> <span id="cb22-2"><a href="#cb22-2"></a><span class="st"> </span><span class="kw">filter</span>(<span class="kw">n</span>() <span class="op">!=</span><span class="st"> </span><span class="dv">1</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb22-3"><a href="#cb22-3"></a><span class="st"> </span><span class="kw">tally</span>()</span> <span id="cb22-4"><a href="#cb22-4"></a><span class="co">#> # A tibble: 9 x 2</span></span> <span id="cb22-5"><a href="#cb22-5"></a><span class="co">#> species n</span></span> <span id="cb22-6"><a href="#cb22-6"></a><span class="co">#> <chr> <int></span></span> <span id="cb22-7"><a href="#cb22-7"></a><span class="co">#> 1 Droid 6</span></span> <span id="cb22-8"><a href="#cb22-8"></a><span class="co">#> 2 Gungan 3</span></span> <span id="cb22-9"><a href="#cb22-9"></a><span class="co">#> 3 Human 35</span></span> <span id="cb22-10"><a href="#cb22-10"></a><span class="co">#> 4 Kaminoan 2</span></span> <span id="cb22-11"><a href="#cb22-11"></a><span class="co">#> # … with 5 more rows</span></span></code></pre></div> </div> <div id="slice-and-friends" class="section level3"> <h3><code>slice()</code> and friends</h3> <p><code>slice()</code> and friends (<code>slice_head()</code>, <code>slice_tail()</code>, <code>slice_sample()</code>, <code>slice_min()</code> and <code>slice_max()</code>) select rows within a group. For example, we can select the first observation within each species:</p> <div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>by_species <span class="op">%>%</span></span> <span id="cb23-2"><a href="#cb23-2"></a><span class="st"> </span><span class="kw">relocate</span>(species) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb23-3"><a href="#cb23-3"></a><span class="st"> </span><span class="kw">slice</span>(<span class="dv">1</span>)</span> <span id="cb23-4"><a href="#cb23-4"></a><span class="co">#> # A tibble: 38 x 14</span></span> <span id="cb23-5"><a href="#cb23-5"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb23-6"><a href="#cb23-6"></a><span class="co">#> species name height mass hair_color skin_color eye_color birth_year sex </span></span> <span id="cb23-7"><a href="#cb23-7"></a><span class="co">#> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr></span></span> <span id="cb23-8"><a href="#cb23-8"></a><span class="co">#> 1 Aleena Ratt… 79 15 none grey, blue unknown NA male </span></span> <span id="cb23-9"><a href="#cb23-9"></a><span class="co">#> 2 Besali… Dext… 198 102 none brown yellow NA male </span></span> <span id="cb23-10"><a href="#cb23-10"></a><span class="co">#> 3 Cerean Ki-A… 198 82 white pale yellow 92 male </span></span> <span id="cb23-11"><a href="#cb23-11"></a><span class="co">#> 4 Chagri… Mas … 196 NA none blue blue NA male </span></span> <span id="cb23-12"><a href="#cb23-12"></a><span class="co">#> # … with 34 more rows, and 5 more variables: gender <chr>, homeworld <chr>,</span></span> <span id="cb23-13"><a href="#cb23-13"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span></code></pre></div> <p>Similarly, we can use <code>slice_min()</code> to select the smallest <code>n</code> values of a variable:</p> <div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a>by_species <span class="op">%>%</span></span> <span id="cb24-2"><a href="#cb24-2"></a><span class="st"> </span><span class="kw">filter</span>(<span class="op">!</span><span class="kw">is.na</span>(height)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb24-3"><a href="#cb24-3"></a><span class="st"> </span><span class="kw">slice_min</span>(height, <span class="dt">n =</span> <span class="dv">2</span>)</span> <span id="cb24-4"><a href="#cb24-4"></a><span class="co">#> # A tibble: 48 x 14</span></span> <span id="cb24-5"><a href="#cb24-5"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb24-6"><a href="#cb24-6"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span> <span id="cb24-7"><a href="#cb24-7"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span> <span id="cb24-8"><a href="#cb24-8"></a><span class="co">#> 1 Ratt… 79 15 none grey, blue unknown NA male mascu…</span></span> <span id="cb24-9"><a href="#cb24-9"></a><span class="co">#> 2 Dext… 198 102 none brown yellow NA male mascu…</span></span> <span id="cb24-10"><a href="#cb24-10"></a><span class="co">#> 3 Ki-A… 198 82 white pale yellow 92 male mascu…</span></span> <span id="cb24-11"><a href="#cb24-11"></a><span class="co">#> 4 Mas … 196 NA none blue blue NA male mascu…</span></span> <span id="cb24-12"><a href="#cb24-12"></a><span class="co">#> # … with 44 more rows, and 5 more variables: homeworld <chr>, species <chr>,</span></span> <span id="cb24-13"><a href="#cb24-13"></a><span class="co">#> # films <list>, vehicles <list>, starships <list></span></span></code></pre></div> </div> </div> <div id="computing-on-grouping-information" class="section level2"> <h2>Computing on grouping information</h2> <p>Inside dplyr verbs, you can access various properties of the “current” group using a family of functions with the <code>cur_</code> prefix. These functions are typically needed for everyday usage of dplyr, but can be useful because they allow you to free from some of the typical constraints of dplyr verbs.</p> <div id="cur_data" class="section level3"> <h3><code>cur_data()</code></h3> <p><code>cur_data()</code> returns the current group, excluding grouping variables. It’s useful to feed to functions that take a whole data frame. For example, the following code fits a linear model of <code>mass ~ height</code> to each species:</p> <div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>by_species <span class="op">%>%</span></span> <span id="cb25-2"><a href="#cb25-2"></a><span class="st"> </span><span class="kw">filter</span>(<span class="kw">n</span>() <span class="op">></span><span class="st"> </span><span class="dv">1</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb25-3"><a href="#cb25-3"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">mod =</span> <span class="kw">list</span>(<span class="kw">lm</span>(mass <span class="op">~</span><span class="st"> </span>height, <span class="dt">data =</span> <span class="kw">cur_data</span>())))</span> <span id="cb25-4"><a href="#cb25-4"></a><span class="co">#> # A tibble: 58 x 15</span></span> <span id="cb25-5"><a href="#cb25-5"></a><span class="co">#> # Groups: species [9]</span></span> <span id="cb25-6"><a href="#cb25-6"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span> <span id="cb25-7"><a href="#cb25-7"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span> <span id="cb25-8"><a href="#cb25-8"></a><span class="co">#> 1 Luke… 172 77 blond fair blue 19 male mascu…</span></span> <span id="cb25-9"><a href="#cb25-9"></a><span class="co">#> 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…</span></span> <span id="cb25-10"><a href="#cb25-10"></a><span class="co">#> 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…</span></span> <span id="cb25-11"><a href="#cb25-11"></a><span class="co">#> 4 Dart… 202 136 none white yellow 41.9 male mascu…</span></span> <span id="cb25-12"><a href="#cb25-12"></a><span class="co">#> # … with 54 more rows, and 6 more variables: homeworld <chr>, species <chr>,</span></span> <span id="cb25-13"><a href="#cb25-13"></a><span class="co">#> # films <list>, vehicles <list>, starships <list>, mod <list></span></span></code></pre></div> </div> <div id="cur_group-and-cur_group_id" class="section level3"> <h3><code>cur_group()</code> and <code>cur_group_id()</code></h3> <p><code>cur_group_id()</code> gives a unique numeric identifier for the current group. This is sometimes useful if you want to index into an external data structure.</p> <div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>by_species <span class="op">%>%</span></span> <span id="cb26-2"><a href="#cb26-2"></a><span class="st"> </span><span class="kw">arrange</span>(species) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb26-3"><a href="#cb26-3"></a><span class="st"> </span><span class="kw">select</span>(name, species, homeworld) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb26-4"><a href="#cb26-4"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">id =</span> <span class="kw">cur_group_id</span>())</span> <span id="cb26-5"><a href="#cb26-5"></a><span class="co">#> # A tibble: 87 x 4</span></span> <span id="cb26-6"><a href="#cb26-6"></a><span class="co">#> # Groups: species [38]</span></span> <span id="cb26-7"><a href="#cb26-7"></a><span class="co">#> name species homeworld id</span></span> <span id="cb26-8"><a href="#cb26-8"></a><span class="co">#> <chr> <chr> <chr> <int></span></span> <span id="cb26-9"><a href="#cb26-9"></a><span class="co">#> 1 Ratts Tyerell Aleena Aleen Minor 1</span></span> <span id="cb26-10"><a href="#cb26-10"></a><span class="co">#> 2 Dexter Jettster Besalisk Ojom 2</span></span> <span id="cb26-11"><a href="#cb26-11"></a><span class="co">#> 3 Ki-Adi-Mundi Cerean Cerea 3</span></span> <span id="cb26-12"><a href="#cb26-12"></a><span class="co">#> 4 Mas Amedda Chagrian Champala 4</span></span> <span id="cb26-13"><a href="#cb26-13"></a><span class="co">#> # … with 83 more rows</span></span></code></pre></div> </div> </div> <div class="footnotes"> <hr /> <ol> <li id="fn1"><p>Note that the argument changed from <code>add = TRUE</code> to <code>.add = TRUE</code> in dplyr 1.0.0.<a href="#fnref1" class="footnote-back">↩︎</a></p></li> </ol> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>