EVOLUTION-MANAGER
Edit File: rowwise.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Row-wise operations</title> <script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> // v0.0.1 // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. document.addEventListener('DOMContentLoaded', function() { const codeList = document.getElementsByClassName("sourceCode"); for (var i = 0; i < codeList.length; i++) { var linkList = codeList[i].getElementsByTagName('a'); for (var j = 0; j < linkList.length; j++) { if (linkList[j].innerHTML === "") { linkList[j].setAttribute('aria-hidden', 'true'); } } } }); </script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css" data-origin="pandoc"> code.sourceCode > span { display: inline-block; line-height: 1.25; } code.sourceCode > span { color: inherit; text-decoration: inherit; } code.sourceCode > span:empty { height: 1.2em; } .sourceCode { overflow: visible; } code.sourceCode { white-space: pre; position: relative; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { code.sourceCode { white-space: pre-wrap; } code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } for (var j = 0; j < rules.length; j++) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') continue; // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Row-wise operations</h1> <p>dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you’ll learn dplyr’s approach centred around the row-wise data frame created by <code>rowwise()</code>.</p> <p>There are three common use cases that we discuss in this vignette:</p> <ul> <li>Row-wise aggregates (e.g. compute the mean of x, y, z).</li> <li>Calling a function multiple times with varying arguments.</li> <li>Working with list-columns.</li> </ul> <p>These types of problems are often easily solved with a for loop, but it’s nice to have a solution that fits naturally into a pipeline.</p> <blockquote> <p>Of course, someone has to write loops. It doesn’t have to be you. — Jenny Bryan</p> </blockquote> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(dplyr, <span class="dt">warn.conflicts =</span> <span class="ot">FALSE</span>)</span></code></pre></div> <div id="creating" class="section level2"> <h2>Creating</h2> <p>Row-wise operations require a special type of grouping where each group consists of a single row. You create this with <code>rowwise()</code>:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>df <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">x =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">2</span>, <span class="dt">y =</span> <span class="dv">3</span><span class="op">:</span><span class="dv">4</span>, <span class="dt">z =</span> <span class="dv">5</span><span class="op">:</span><span class="dv">6</span>)</span> <span id="cb2-2"><a href="#cb2-2"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">rowwise</span>()</span> <span id="cb2-3"><a href="#cb2-3"></a><span class="co">#> # A tibble: 2 x 3</span></span> <span id="cb2-4"><a href="#cb2-4"></a><span class="co">#> # Rowwise: </span></span> <span id="cb2-5"><a href="#cb2-5"></a><span class="co">#> x y z</span></span> <span id="cb2-6"><a href="#cb2-6"></a><span class="co">#> <int> <int> <int></span></span> <span id="cb2-7"><a href="#cb2-7"></a><span class="co">#> 1 1 3 5</span></span> <span id="cb2-8"><a href="#cb2-8"></a><span class="co">#> 2 2 4 6</span></span></code></pre></div> <p>Like <code>group_by()</code>, <code>rowwise()</code> doesn’t really do anything itself; it just changes how the other verbs work. For example, compare the results of <code>mutate()</code> in the following code:</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">m =</span> <span class="kw">mean</span>(<span class="kw">c</span>(x, y, z)))</span> <span id="cb3-2"><a href="#cb3-2"></a><span class="co">#> # A tibble: 2 x 4</span></span> <span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> x y z m</span></span> <span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> <int> <int> <int> <dbl></span></span> <span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> 1 1 3 5 3.5</span></span> <span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> 2 2 4 6 3.5</span></span> <span id="cb3-7"><a href="#cb3-7"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">m =</span> <span class="kw">mean</span>(<span class="kw">c</span>(x, y, z)))</span> <span id="cb3-8"><a href="#cb3-8"></a><span class="co">#> # A tibble: 2 x 4</span></span> <span id="cb3-9"><a href="#cb3-9"></a><span class="co">#> # Rowwise: </span></span> <span id="cb3-10"><a href="#cb3-10"></a><span class="co">#> x y z m</span></span> <span id="cb3-11"><a href="#cb3-11"></a><span class="co">#> <int> <int> <int> <dbl></span></span> <span id="cb3-12"><a href="#cb3-12"></a><span class="co">#> 1 1 3 5 3</span></span> <span id="cb3-13"><a href="#cb3-13"></a><span class="co">#> 2 2 4 6 4</span></span></code></pre></div> <p>If you use <code>mutate()</code> with a regular data frame, it computes the mean of <code>x</code>, <code>y</code>, and <code>z</code> across all rows. If you apply it to a row-wise data frame, it computes the mean for each row.</p> <p>You can optionally supply “identifier” variables in your call to <code>rowwise()</code>. These variables are preserved when you call <code>summarise()</code>, so they behave somewhat similarly to the grouping variables passed to <code>group_by()</code>:</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>df <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">name =</span> <span class="kw">c</span>(<span class="st">"Mara"</span>, <span class="st">"Hadley"</span>), <span class="dt">x =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">2</span>, <span class="dt">y =</span> <span class="dv">3</span><span class="op">:</span><span class="dv">4</span>, <span class="dt">z =</span> <span class="dv">5</span><span class="op">:</span><span class="dv">6</span>)</span> <span id="cb4-2"><a href="#cb4-2"></a></span> <span id="cb4-3"><a href="#cb4-3"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-4"><a href="#cb4-4"></a><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-5"><a href="#cb4-5"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">m =</span> <span class="kw">mean</span>(<span class="kw">c</span>(x, y, z)))</span> <span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> `summarise()` ungrouping output (override with `.groups` argument)</span></span> <span id="cb4-7"><a href="#cb4-7"></a><span class="co">#> # A tibble: 2 x 1</span></span> <span id="cb4-8"><a href="#cb4-8"></a><span class="co">#> m</span></span> <span id="cb4-9"><a href="#cb4-9"></a><span class="co">#> <dbl></span></span> <span id="cb4-10"><a href="#cb4-10"></a><span class="co">#> 1 3</span></span> <span id="cb4-11"><a href="#cb4-11"></a><span class="co">#> 2 4</span></span> <span id="cb4-12"><a href="#cb4-12"></a></span> <span id="cb4-13"><a href="#cb4-13"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-14"><a href="#cb4-14"></a><span class="st"> </span><span class="kw">rowwise</span>(name) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-15"><a href="#cb4-15"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">m =</span> <span class="kw">mean</span>(<span class="kw">c</span>(x, y, z)))</span> <span id="cb4-16"><a href="#cb4-16"></a><span class="co">#> `summarise()` regrouping output by 'name' (override with `.groups` argument)</span></span> <span id="cb4-17"><a href="#cb4-17"></a><span class="co">#> # A tibble: 2 x 2</span></span> <span id="cb4-18"><a href="#cb4-18"></a><span class="co">#> # Groups: name [2]</span></span> <span id="cb4-19"><a href="#cb4-19"></a><span class="co">#> name m</span></span> <span id="cb4-20"><a href="#cb4-20"></a><span class="co">#> <chr> <dbl></span></span> <span id="cb4-21"><a href="#cb4-21"></a><span class="co">#> 1 Mara 3</span></span> <span id="cb4-22"><a href="#cb4-22"></a><span class="co">#> 2 Hadley 4</span></span></code></pre></div> <p><code>rowwise()</code> is just a special form of grouping, so if you want to remove it from a data frame, just call <code>ungroup()</code>.</p> </div> <div id="per-row-summary-statistics" class="section level2"> <h2>Per row summary statistics</h2> <p><code>dplyr::summarise()</code> makes it really easy to summarise values across rows within one column. When combined with <code>rowwise()</code> it also makes it easy to summarise values across columns within one row. To see how, we’ll start by making a little dataset:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>df <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">id =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">6</span>, <span class="dt">w =</span> <span class="dv">10</span><span class="op">:</span><span class="dv">15</span>, <span class="dt">x =</span> <span class="dv">20</span><span class="op">:</span><span class="dv">25</span>, <span class="dt">y =</span> <span class="dv">30</span><span class="op">:</span><span class="dv">35</span>, <span class="dt">z =</span> <span class="dv">40</span><span class="op">:</span><span class="dv">45</span>)</span> <span id="cb5-2"><a href="#cb5-2"></a>df</span> <span id="cb5-3"><a href="#cb5-3"></a><span class="co">#> # A tibble: 6 x 5</span></span> <span id="cb5-4"><a href="#cb5-4"></a><span class="co">#> id w x y z</span></span> <span id="cb5-5"><a href="#cb5-5"></a><span class="co">#> <int> <int> <int> <int> <int></span></span> <span id="cb5-6"><a href="#cb5-6"></a><span class="co">#> 1 1 10 20 30 40</span></span> <span id="cb5-7"><a href="#cb5-7"></a><span class="co">#> 2 2 11 21 31 41</span></span> <span id="cb5-8"><a href="#cb5-8"></a><span class="co">#> 3 3 12 22 32 42</span></span> <span id="cb5-9"><a href="#cb5-9"></a><span class="co">#> 4 4 13 23 33 43</span></span> <span id="cb5-10"><a href="#cb5-10"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p>Let’s say we want compute the sum of <code>w</code>, <code>x</code>, <code>y</code>, and <code>z</code> for each row. We start by making a row-wise data frame:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>rf <-<span class="st"> </span>df <span class="op">%>%</span><span class="st"> </span><span class="kw">rowwise</span>(id)</span></code></pre></div> <p>We can then use <code>mutate()</code> to add a new column to each row, or <code>summarise()</code> to return just that one summary:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">total =</span> <span class="kw">sum</span>(<span class="kw">c</span>(w, x, y, z)))</span> <span id="cb7-2"><a href="#cb7-2"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb7-3"><a href="#cb7-3"></a><span class="co">#> # Rowwise: id</span></span> <span id="cb7-4"><a href="#cb7-4"></a><span class="co">#> id w x y z total</span></span> <span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> <int> <int> <int> <int> <int> <int></span></span> <span id="cb7-6"><a href="#cb7-6"></a><span class="co">#> 1 1 10 20 30 40 100</span></span> <span id="cb7-7"><a href="#cb7-7"></a><span class="co">#> 2 2 11 21 31 41 104</span></span> <span id="cb7-8"><a href="#cb7-8"></a><span class="co">#> 3 3 12 22 32 42 108</span></span> <span id="cb7-9"><a href="#cb7-9"></a><span class="co">#> 4 4 13 23 33 43 112</span></span> <span id="cb7-10"><a href="#cb7-10"></a><span class="co">#> # … with 2 more rows</span></span> <span id="cb7-11"><a href="#cb7-11"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">total =</span> <span class="kw">sum</span>(<span class="kw">c</span>(w, x, y, z)))</span> <span id="cb7-12"><a href="#cb7-12"></a><span class="co">#> `summarise()` regrouping output by 'id' (override with `.groups` argument)</span></span> <span id="cb7-13"><a href="#cb7-13"></a><span class="co">#> # A tibble: 6 x 2</span></span> <span id="cb7-14"><a href="#cb7-14"></a><span class="co">#> # Groups: id [6]</span></span> <span id="cb7-15"><a href="#cb7-15"></a><span class="co">#> id total</span></span> <span id="cb7-16"><a href="#cb7-16"></a><span class="co">#> <int> <int></span></span> <span id="cb7-17"><a href="#cb7-17"></a><span class="co">#> 1 1 100</span></span> <span id="cb7-18"><a href="#cb7-18"></a><span class="co">#> 2 2 104</span></span> <span id="cb7-19"><a href="#cb7-19"></a><span class="co">#> 3 3 108</span></span> <span id="cb7-20"><a href="#cb7-20"></a><span class="co">#> 4 4 112</span></span> <span id="cb7-21"><a href="#cb7-21"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p>Of course, if you have a lot of variables, it’s going to be tedious to type in every variable name. Instead, you can use <code>c_across()</code> which uses tidy selection syntax so you can to succinctly select many variables:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">total =</span> <span class="kw">sum</span>(<span class="kw">c_across</span>(w<span class="op">:</span>z)))</span> <span id="cb8-2"><a href="#cb8-2"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb8-3"><a href="#cb8-3"></a><span class="co">#> # Rowwise: id</span></span> <span id="cb8-4"><a href="#cb8-4"></a><span class="co">#> id w x y z total</span></span> <span id="cb8-5"><a href="#cb8-5"></a><span class="co">#> <int> <int> <int> <int> <int> <int></span></span> <span id="cb8-6"><a href="#cb8-6"></a><span class="co">#> 1 1 10 20 30 40 100</span></span> <span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> 2 2 11 21 31 41 104</span></span> <span id="cb8-8"><a href="#cb8-8"></a><span class="co">#> 3 3 12 22 32 42 108</span></span> <span id="cb8-9"><a href="#cb8-9"></a><span class="co">#> 4 4 13 23 33 43 112</span></span> <span id="cb8-10"><a href="#cb8-10"></a><span class="co">#> # … with 2 more rows</span></span> <span id="cb8-11"><a href="#cb8-11"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">total =</span> <span class="kw">sum</span>(<span class="kw">c_across</span>(<span class="kw">where</span>(is.numeric))))</span> <span id="cb8-12"><a href="#cb8-12"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb8-13"><a href="#cb8-13"></a><span class="co">#> # Rowwise: id</span></span> <span id="cb8-14"><a href="#cb8-14"></a><span class="co">#> id w x y z total</span></span> <span id="cb8-15"><a href="#cb8-15"></a><span class="co">#> <int> <int> <int> <int> <int> <int></span></span> <span id="cb8-16"><a href="#cb8-16"></a><span class="co">#> 1 1 10 20 30 40 100</span></span> <span id="cb8-17"><a href="#cb8-17"></a><span class="co">#> 2 2 11 21 31 41 104</span></span> <span id="cb8-18"><a href="#cb8-18"></a><span class="co">#> 3 3 12 22 32 42 108</span></span> <span id="cb8-19"><a href="#cb8-19"></a><span class="co">#> 4 4 13 23 33 43 112</span></span> <span id="cb8-20"><a href="#cb8-20"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p>You could combine this with column-wise operations (see <code>vignette("colwise")</code> for more details) to compute the proportion of the total for each column:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>rf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb9-2"><a href="#cb9-2"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">total =</span> <span class="kw">sum</span>(<span class="kw">c_across</span>(w<span class="op">:</span>z))) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb9-3"><a href="#cb9-3"></a><span class="st"> </span><span class="kw">ungroup</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb9-4"><a href="#cb9-4"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="kw">across</span>(w<span class="op">:</span>z, <span class="op">~</span><span class="st"> </span>. <span class="op">/</span><span class="st"> </span>total))</span> <span id="cb9-5"><a href="#cb9-5"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb9-6"><a href="#cb9-6"></a><span class="co">#> id w x y z total</span></span> <span id="cb9-7"><a href="#cb9-7"></a><span class="co">#> <int> <dbl> <dbl> <dbl> <dbl> <int></span></span> <span id="cb9-8"><a href="#cb9-8"></a><span class="co">#> 1 1 0.1 0.2 0.3 0.4 100</span></span> <span id="cb9-9"><a href="#cb9-9"></a><span class="co">#> 2 2 0.106 0.202 0.298 0.394 104</span></span> <span id="cb9-10"><a href="#cb9-10"></a><span class="co">#> 3 3 0.111 0.204 0.296 0.389 108</span></span> <span id="cb9-11"><a href="#cb9-11"></a><span class="co">#> 4 4 0.116 0.205 0.295 0.384 112</span></span> <span id="cb9-12"><a href="#cb9-12"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <div id="row-wise-summary-functions" class="section level3"> <h3>Row-wise summary functions</h3> <p>The <code>rowwise()</code> approach will work for any summary function. But if you need greater speed, it’s worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don’t split it into rows, compute the summary, and then join the results back together again.</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">total =</span> <span class="kw">rowSums</span>(<span class="kw">across</span>(<span class="kw">where</span>(is.numeric))))</span> <span id="cb10-2"><a href="#cb10-2"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb10-3"><a href="#cb10-3"></a><span class="co">#> id w x y z total</span></span> <span id="cb10-4"><a href="#cb10-4"></a><span class="co">#> <int> <int> <int> <int> <int> <dbl></span></span> <span id="cb10-5"><a href="#cb10-5"></a><span class="co">#> 1 1 10 20 30 40 101</span></span> <span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> 2 2 11 21 31 41 106</span></span> <span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> 3 3 12 22 32 42 111</span></span> <span id="cb10-8"><a href="#cb10-8"></a><span class="co">#> 4 4 13 23 33 43 116</span></span> <span id="cb10-9"><a href="#cb10-9"></a><span class="co">#> # … with 2 more rows</span></span> <span id="cb10-10"><a href="#cb10-10"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">mean =</span> <span class="kw">rowMeans</span>(<span class="kw">across</span>(<span class="kw">where</span>(is.numeric))))</span> <span id="cb10-11"><a href="#cb10-11"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb10-12"><a href="#cb10-12"></a><span class="co">#> id w x y z mean</span></span> <span id="cb10-13"><a href="#cb10-13"></a><span class="co">#> <int> <int> <int> <int> <int> <dbl></span></span> <span id="cb10-14"><a href="#cb10-14"></a><span class="co">#> 1 1 10 20 30 40 20.2</span></span> <span id="cb10-15"><a href="#cb10-15"></a><span class="co">#> 2 2 11 21 31 41 21.2</span></span> <span id="cb10-16"><a href="#cb10-16"></a><span class="co">#> 3 3 12 22 32 42 22.2</span></span> <span id="cb10-17"><a href="#cb10-17"></a><span class="co">#> 4 4 13 23 33 43 23.2</span></span> <span id="cb10-18"><a href="#cb10-18"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> <p><strong>NB</strong>: I use <code>df</code> (not <code>rf</code>) and <code>across()</code> (not <code>c_across()</code>) here because <code>rowMeans()</code> and <code>rowSums()</code> take a multi-row data frame as input.</p> </div> </div> <div id="list-columns" class="section level2"> <h2>List-columns</h2> <p><code>rowwise()</code> operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the <code>apply()</code> or <code>purrr::map()</code> families.</p> <div id="motivation" class="section level3"> <h3>Motivation</h3> <p>Imagine you have this data frame, and you want to count the lengths of each element:</p> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>df <-<span class="st"> </span><span class="kw">tibble</span>(</span> <span id="cb11-2"><a href="#cb11-2"></a> <span class="dt">x =</span> <span class="kw">list</span>(<span class="dv">1</span>, <span class="dv">2</span><span class="op">:</span><span class="dv">3</span>, <span class="dv">4</span><span class="op">:</span><span class="dv">6</span>)</span> <span id="cb11-3"><a href="#cb11-3"></a>)</span></code></pre></div> <p>You might try calling <code>length()</code>:</p> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">l =</span> <span class="kw">length</span>(x))</span> <span id="cb12-2"><a href="#cb12-2"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb12-3"><a href="#cb12-3"></a><span class="co">#> x l</span></span> <span id="cb12-4"><a href="#cb12-4"></a><span class="co">#> <list> <int></span></span> <span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> 1 <dbl [1]> 3</span></span> <span id="cb12-6"><a href="#cb12-6"></a><span class="co">#> 2 <int [2]> 3</span></span> <span id="cb12-7"><a href="#cb12-7"></a><span class="co">#> 3 <int [3]> 3</span></span></code></pre></div> <p>But that returns the length of the column, not the length of the individual values. If you’re an R documentation aficionado, you might know there’s already a base R function just for this purpose:</p> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">l =</span> <span class="kw">lengths</span>(x))</span> <span id="cb13-2"><a href="#cb13-2"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb13-3"><a href="#cb13-3"></a><span class="co">#> x l</span></span> <span id="cb13-4"><a href="#cb13-4"></a><span class="co">#> <list> <int></span></span> <span id="cb13-5"><a href="#cb13-5"></a><span class="co">#> 1 <dbl [1]> 1</span></span> <span id="cb13-6"><a href="#cb13-6"></a><span class="co">#> 2 <int [2]> 2</span></span> <span id="cb13-7"><a href="#cb13-7"></a><span class="co">#> 3 <int [3]> 3</span></span></code></pre></div> <p>Or if you’re an experienced R programmer, you might know how to apply a function to each element of a list using <code>sapply()</code>, <code>vapply()</code>, or one of the purrr <code>map()</code> functions:</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">l =</span> <span class="kw">sapply</span>(x, length))</span> <span id="cb14-2"><a href="#cb14-2"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb14-3"><a href="#cb14-3"></a><span class="co">#> x l</span></span> <span id="cb14-4"><a href="#cb14-4"></a><span class="co">#> <list> <int></span></span> <span id="cb14-5"><a href="#cb14-5"></a><span class="co">#> 1 <dbl [1]> 1</span></span> <span id="cb14-6"><a href="#cb14-6"></a><span class="co">#> 2 <int [2]> 2</span></span> <span id="cb14-7"><a href="#cb14-7"></a><span class="co">#> 3 <int [3]> 3</span></span> <span id="cb14-8"><a href="#cb14-8"></a>df <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">l =</span> purrr<span class="op">::</span><span class="kw">map_int</span>(x, length))</span> <span id="cb14-9"><a href="#cb14-9"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb14-10"><a href="#cb14-10"></a><span class="co">#> x l</span></span> <span id="cb14-11"><a href="#cb14-11"></a><span class="co">#> <list> <int></span></span> <span id="cb14-12"><a href="#cb14-12"></a><span class="co">#> 1 <dbl [1]> 1</span></span> <span id="cb14-13"><a href="#cb14-13"></a><span class="co">#> 2 <int [2]> 2</span></span> <span id="cb14-14"><a href="#cb14-14"></a><span class="co">#> 3 <int [3]> 3</span></span></code></pre></div> <p>But wouldn’t it be nice if you could just write <code>length(x)</code> and dplyr would figure out that you wanted to compute the length of the element inside of <code>x</code>? Since you’re here, you might already be guessing at the answer: this is just another application of the row-wise pattern.</p> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-2"><a href="#cb15-2"></a><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb15-3"><a href="#cb15-3"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">l =</span> <span class="kw">length</span>(x))</span> <span id="cb15-4"><a href="#cb15-4"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb15-5"><a href="#cb15-5"></a><span class="co">#> # Rowwise: </span></span> <span id="cb15-6"><a href="#cb15-6"></a><span class="co">#> x l</span></span> <span id="cb15-7"><a href="#cb15-7"></a><span class="co">#> <list> <int></span></span> <span id="cb15-8"><a href="#cb15-8"></a><span class="co">#> 1 <dbl [1]> 1</span></span> <span id="cb15-9"><a href="#cb15-9"></a><span class="co">#> 2 <int [2]> 2</span></span> <span id="cb15-10"><a href="#cb15-10"></a><span class="co">#> 3 <int [3]> 3</span></span></code></pre></div> </div> <div id="subsetting" class="section level3"> <h3>Subsetting</h3> <p>Before we continue on, I wanted to briefly mention the magic that makes this work. This isn’t something you’ll generally need to think about (it’ll just work), but it’s useful to know about when something goes wrong.</p> <p>There’s an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames:</p> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>df <-<span class="st"> </span><span class="kw">tibble</span>(<span class="dt">g =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">2</span>, <span class="dt">y =</span> <span class="kw">list</span>(<span class="dv">1</span><span class="op">:</span><span class="dv">3</span>, <span class="st">"a"</span>))</span> <span id="cb16-2"><a href="#cb16-2"></a>gf <-<span class="st"> </span>df <span class="op">%>%</span><span class="st"> </span><span class="kw">group_by</span>(g)</span> <span id="cb16-3"><a href="#cb16-3"></a>rf <-<span class="st"> </span>df <span class="op">%>%</span><span class="st"> </span><span class="kw">rowwise</span>(g)</span></code></pre></div> <p>If we compute some properties of <code>y</code>, you’ll notice the results look different:</p> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>gf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">type =</span> <span class="kw">typeof</span>(y), <span class="dt">length =</span> <span class="kw">length</span>(y))</span> <span id="cb17-2"><a href="#cb17-2"></a><span class="co">#> # A tibble: 2 x 4</span></span> <span id="cb17-3"><a href="#cb17-3"></a><span class="co">#> # Groups: g [2]</span></span> <span id="cb17-4"><a href="#cb17-4"></a><span class="co">#> g y type length</span></span> <span id="cb17-5"><a href="#cb17-5"></a><span class="co">#> <int> <list> <chr> <int></span></span> <span id="cb17-6"><a href="#cb17-6"></a><span class="co">#> 1 1 <int [3]> list 1</span></span> <span id="cb17-7"><a href="#cb17-7"></a><span class="co">#> 2 2 <chr [1]> list 1</span></span> <span id="cb17-8"><a href="#cb17-8"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">type =</span> <span class="kw">typeof</span>(y), <span class="dt">length =</span> <span class="kw">length</span>(y))</span> <span id="cb17-9"><a href="#cb17-9"></a><span class="co">#> # A tibble: 2 x 4</span></span> <span id="cb17-10"><a href="#cb17-10"></a><span class="co">#> # Rowwise: g</span></span> <span id="cb17-11"><a href="#cb17-11"></a><span class="co">#> g y type length</span></span> <span id="cb17-12"><a href="#cb17-12"></a><span class="co">#> <int> <list> <chr> <int></span></span> <span id="cb17-13"><a href="#cb17-13"></a><span class="co">#> 1 1 <int [3]> integer 3</span></span> <span id="cb17-14"><a href="#cb17-14"></a><span class="co">#> 2 2 <chr [1]> character 1</span></span></code></pre></div> <p>They key difference is that when <code>mutate()</code> slices up the columns to pass to <code>length(y)</code> the grouped mutate uses <code>[</code> and the row-wise mutate uses <code>[[</code>. The following code gives a flavour of the differences if you used a for loop:</p> <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a><span class="co"># grouped</span></span> <span id="cb18-2"><a href="#cb18-2"></a>out1 <-<span class="st"> </span><span class="kw">integer</span>(<span class="dv">2</span>)</span> <span id="cb18-3"><a href="#cb18-3"></a><span class="cf">for</span> (i <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">2</span>) {</span> <span id="cb18-4"><a href="#cb18-4"></a> out1[[i]] <-<span class="st"> </span><span class="kw">length</span>(df<span class="op">$</span>y[i])</span> <span id="cb18-5"><a href="#cb18-5"></a>}</span> <span id="cb18-6"><a href="#cb18-6"></a>out1</span> <span id="cb18-7"><a href="#cb18-7"></a><span class="co">#> [1] 1 1</span></span> <span id="cb18-8"><a href="#cb18-8"></a></span> <span id="cb18-9"><a href="#cb18-9"></a><span class="co"># rowwise</span></span> <span id="cb18-10"><a href="#cb18-10"></a>out2 <-<span class="st"> </span><span class="kw">integer</span>(<span class="dv">2</span>)</span> <span id="cb18-11"><a href="#cb18-11"></a><span class="cf">for</span> (i <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span><span class="dv">2</span>) {</span> <span id="cb18-12"><a href="#cb18-12"></a> out2[[i]] <-<span class="st"> </span><span class="kw">length</span>(df<span class="op">$</span>y[[i]])</span> <span id="cb18-13"><a href="#cb18-13"></a>}</span> <span id="cb18-14"><a href="#cb18-14"></a>out2</span> <span id="cb18-15"><a href="#cb18-15"></a><span class="co">#> [1] 3 1</span></span></code></pre></div> <p>Note that this magic only applies when you’re referring to existing columns, not when you’re creating new rows. This is potentially confusing, but we’re fairly confident it’s the least worst solution, particularly given the hint in the error message.</p> <div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a>gf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">y2 =</span> y)</span> <span id="cb19-2"><a href="#cb19-2"></a><span class="co">#> # A tibble: 2 x 3</span></span> <span id="cb19-3"><a href="#cb19-3"></a><span class="co">#> # Groups: g [2]</span></span> <span id="cb19-4"><a href="#cb19-4"></a><span class="co">#> g y y2 </span></span> <span id="cb19-5"><a href="#cb19-5"></a><span class="co">#> <int> <list> <list> </span></span> <span id="cb19-6"><a href="#cb19-6"></a><span class="co">#> 1 1 <int [3]> <int [3]></span></span> <span id="cb19-7"><a href="#cb19-7"></a><span class="co">#> 2 2 <chr [1]> <chr [1]></span></span> <span id="cb19-8"><a href="#cb19-8"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">y2 =</span> y)</span> <span id="cb19-9"><a href="#cb19-9"></a><span class="co">#> Error: Problem with `mutate()` input `y2`.</span></span> <span id="cb19-10"><a href="#cb19-10"></a><span class="co">#> x Input `y2` can't be recycled to size 1.</span></span> <span id="cb19-11"><a href="#cb19-11"></a><span class="co">#> ℹ Input `y2` is `y`.</span></span> <span id="cb19-12"><a href="#cb19-12"></a><span class="co">#> ℹ Input `y2` must be size 1, not 3.</span></span> <span id="cb19-13"><a href="#cb19-13"></a><span class="co">#> ℹ Did you mean: `y2 = list(y)` ?</span></span> <span id="cb19-14"><a href="#cb19-14"></a><span class="co">#> ℹ The error occurred in row 1.</span></span> <span id="cb19-15"><a href="#cb19-15"></a>rf <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">y2 =</span> <span class="kw">list</span>(y))</span> <span id="cb19-16"><a href="#cb19-16"></a><span class="co">#> # A tibble: 2 x 3</span></span> <span id="cb19-17"><a href="#cb19-17"></a><span class="co">#> # Rowwise: g</span></span> <span id="cb19-18"><a href="#cb19-18"></a><span class="co">#> g y y2 </span></span> <span id="cb19-19"><a href="#cb19-19"></a><span class="co">#> <int> <list> <list> </span></span> <span id="cb19-20"><a href="#cb19-20"></a><span class="co">#> 1 1 <int [3]> <int [3]></span></span> <span id="cb19-21"><a href="#cb19-21"></a><span class="co">#> 2 2 <chr [1]> <chr [1]></span></span></code></pre></div> </div> <div id="modelling" class="section level3"> <h3>Modelling</h3> <p><code>rowwise()</code> data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We’ll start by creating a nested data frame:</p> <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a>by_cyl <-<span class="st"> </span>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">nest_by</span>(cyl)</span> <span id="cb20-2"><a href="#cb20-2"></a><span class="co">#> `summarise()` ungrouping output (override with `.groups` argument)</span></span> <span id="cb20-3"><a href="#cb20-3"></a>by_cyl</span> <span id="cb20-4"><a href="#cb20-4"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb20-5"><a href="#cb20-5"></a><span class="co">#> # Rowwise: cyl</span></span> <span id="cb20-6"><a href="#cb20-6"></a><span class="co">#> cyl data </span></span> <span id="cb20-7"><a href="#cb20-7"></a><span class="co">#> <dbl> <list> </span></span> <span id="cb20-8"><a href="#cb20-8"></a><span class="co">#> 1 4 <tibble [11 × 12]></span></span> <span id="cb20-9"><a href="#cb20-9"></a><span class="co">#> 2 6 <tibble [7 × 12]> </span></span> <span id="cb20-10"><a href="#cb20-10"></a><span class="co">#> 3 8 <tibble [14 × 12]></span></span></code></pre></div> <p>This is a little different to the usual <code>group_by()</code> output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, <code>data</code>, that stores the data for that group. Also note that the output is <code>rowwise()</code>; this is important because it’s going to make working with that list of data frames much easier.</p> <p>Once we have one data frame per row, it’s straightforward to make one model per row:</p> <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>mods <-<span class="st"> </span>by_cyl <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">mod =</span> <span class="kw">list</span>(<span class="kw">lm</span>(mpg <span class="op">~</span><span class="st"> </span>wt, <span class="dt">data =</span> data)))</span> <span id="cb21-2"><a href="#cb21-2"></a>mods</span> <span id="cb21-3"><a href="#cb21-3"></a><span class="co">#> # A tibble: 3 x 3</span></span> <span id="cb21-4"><a href="#cb21-4"></a><span class="co">#> # Rowwise: cyl</span></span> <span id="cb21-5"><a href="#cb21-5"></a><span class="co">#> cyl data mod </span></span> <span id="cb21-6"><a href="#cb21-6"></a><span class="co">#> <dbl> <list> <list></span></span> <span id="cb21-7"><a href="#cb21-7"></a><span class="co">#> 1 4 <tibble [11 × 12]> <lm> </span></span> <span id="cb21-8"><a href="#cb21-8"></a><span class="co">#> 2 6 <tibble [7 × 12]> <lm> </span></span> <span id="cb21-9"><a href="#cb21-9"></a><span class="co">#> 3 8 <tibble [14 × 12]> <lm></span></span></code></pre></div> <p>And supplement that with one set of predictions per row:</p> <div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1"></a>mods <-<span class="st"> </span>mods <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">pred =</span> <span class="kw">list</span>(<span class="kw">predict</span>(mod, data)))</span> <span id="cb22-2"><a href="#cb22-2"></a>mods</span> <span id="cb22-3"><a href="#cb22-3"></a><span class="co">#> # A tibble: 3 x 4</span></span> <span id="cb22-4"><a href="#cb22-4"></a><span class="co">#> # Rowwise: cyl</span></span> <span id="cb22-5"><a href="#cb22-5"></a><span class="co">#> cyl data mod pred </span></span> <span id="cb22-6"><a href="#cb22-6"></a><span class="co">#> <dbl> <list> <list> <list> </span></span> <span id="cb22-7"><a href="#cb22-7"></a><span class="co">#> 1 4 <tibble [11 × 12]> <lm> <dbl [11]></span></span> <span id="cb22-8"><a href="#cb22-8"></a><span class="co">#> 2 6 <tibble [7 × 12]> <lm> <dbl [7]> </span></span> <span id="cb22-9"><a href="#cb22-9"></a><span class="co">#> 3 8 <tibble [14 × 12]> <lm> <dbl [14]></span></span></code></pre></div> <p>You could then summarise the model in a variety of ways:</p> <div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>mods <span class="op">%>%</span><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">rmse =</span> <span class="kw">sqrt</span>(<span class="kw">mean</span>((pred <span class="op">-</span><span class="st"> </span>data<span class="op">$</span>mpg) <span class="op">^</span><span class="st"> </span><span class="dv">2</span>)))</span> <span id="cb23-2"><a href="#cb23-2"></a><span class="co">#> `summarise()` regrouping output by 'cyl' (override with `.groups` argument)</span></span> <span id="cb23-3"><a href="#cb23-3"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb23-4"><a href="#cb23-4"></a><span class="co">#> # Groups: cyl [3]</span></span> <span id="cb23-5"><a href="#cb23-5"></a><span class="co">#> cyl rmse</span></span> <span id="cb23-6"><a href="#cb23-6"></a><span class="co">#> <dbl> <dbl></span></span> <span id="cb23-7"><a href="#cb23-7"></a><span class="co">#> 1 4 3.01 </span></span> <span id="cb23-8"><a href="#cb23-8"></a><span class="co">#> 2 6 0.985</span></span> <span id="cb23-9"><a href="#cb23-9"></a><span class="co">#> 3 8 1.87</span></span> <span id="cb23-10"><a href="#cb23-10"></a>mods <span class="op">%>%</span><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">rsq =</span> <span class="kw">summary</span>(mod)<span class="op">$</span>r.squared)</span> <span id="cb23-11"><a href="#cb23-11"></a><span class="co">#> `summarise()` regrouping output by 'cyl' (override with `.groups` argument)</span></span> <span id="cb23-12"><a href="#cb23-12"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb23-13"><a href="#cb23-13"></a><span class="co">#> # Groups: cyl [3]</span></span> <span id="cb23-14"><a href="#cb23-14"></a><span class="co">#> cyl rsq</span></span> <span id="cb23-15"><a href="#cb23-15"></a><span class="co">#> <dbl> <dbl></span></span> <span id="cb23-16"><a href="#cb23-16"></a><span class="co">#> 1 4 0.509</span></span> <span id="cb23-17"><a href="#cb23-17"></a><span class="co">#> 2 6 0.465</span></span> <span id="cb23-18"><a href="#cb23-18"></a><span class="co">#> 3 8 0.423</span></span> <span id="cb23-19"><a href="#cb23-19"></a>mods <span class="op">%>%</span><span class="st"> </span><span class="kw">summarise</span>(broom<span class="op">::</span><span class="kw">glance</span>(mod))</span> <span id="cb23-20"><a href="#cb23-20"></a><span class="co">#> `summarise()` regrouping output by 'cyl' (override with `.groups` argument)</span></span> <span id="cb23-21"><a href="#cb23-21"></a><span class="co">#> # A tibble: 3 x 13</span></span> <span id="cb23-22"><a href="#cb23-22"></a><span class="co">#> # Groups: cyl [3]</span></span> <span id="cb23-23"><a href="#cb23-23"></a><span class="co">#> cyl r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC</span></span> <span id="cb23-24"><a href="#cb23-24"></a><span class="co">#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> <span id="cb23-25"><a href="#cb23-25"></a><span class="co">#> 1 4 0.509 0.454 3.33 9.32 0.0137 1 -27.7 61.5 62.7</span></span> <span id="cb23-26"><a href="#cb23-26"></a><span class="co">#> 2 6 0.465 0.357 1.17 4.34 0.0918 1 -9.83 25.7 25.5</span></span> <span id="cb23-27"><a href="#cb23-27"></a><span class="co">#> 3 8 0.423 0.375 2.02 8.80 0.0118 1 -28.7 63.3 65.2</span></span> <span id="cb23-28"><a href="#cb23-28"></a><span class="co">#> # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int></span></span></code></pre></div> <p>Or easily access the parameters of each model:</p> <div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a>mods <span class="op">%>%</span><span class="st"> </span><span class="kw">summarise</span>(broom<span class="op">::</span><span class="kw">tidy</span>(mod))</span> <span id="cb24-2"><a href="#cb24-2"></a><span class="co">#> `summarise()` regrouping output by 'cyl' (override with `.groups` argument)</span></span> <span id="cb24-3"><a href="#cb24-3"></a><span class="co">#> # A tibble: 6 x 6</span></span> <span id="cb24-4"><a href="#cb24-4"></a><span class="co">#> # Groups: cyl [3]</span></span> <span id="cb24-5"><a href="#cb24-5"></a><span class="co">#> cyl term estimate std.error statistic p.value</span></span> <span id="cb24-6"><a href="#cb24-6"></a><span class="co">#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl></span></span> <span id="cb24-7"><a href="#cb24-7"></a><span class="co">#> 1 4 (Intercept) 39.6 4.35 9.10 0.00000777</span></span> <span id="cb24-8"><a href="#cb24-8"></a><span class="co">#> 2 4 wt -5.65 1.85 -3.05 0.0137 </span></span> <span id="cb24-9"><a href="#cb24-9"></a><span class="co">#> 3 6 (Intercept) 28.4 4.18 6.79 0.00105 </span></span> <span id="cb24-10"><a href="#cb24-10"></a><span class="co">#> 4 6 wt -2.78 1.33 -2.08 0.0918 </span></span> <span id="cb24-11"><a href="#cb24-11"></a><span class="co">#> # … with 2 more rows</span></span></code></pre></div> </div> </div> <div id="repeated-function-calls" class="section level2"> <h2>Repeated function calls</h2> <p><code>rowwise()</code> doesn’t just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that <code>rowwise()</code> and <code>mutate()</code> provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs.</p> <div id="simulations" class="section level3"> <h3>Simulations</h3> <p>I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution:</p> <div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>df <-<span class="st"> </span><span class="kw">tribble</span>(</span> <span id="cb25-2"><a href="#cb25-2"></a> <span class="op">~</span><span class="st"> </span>n, <span class="op">~</span><span class="st"> </span>min, <span class="op">~</span><span class="st"> </span>max,</span> <span id="cb25-3"><a href="#cb25-3"></a> <span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">1</span>,</span> <span id="cb25-4"><a href="#cb25-4"></a> <span class="dv">2</span>, <span class="dv">10</span>, <span class="dv">100</span>,</span> <span id="cb25-5"><a href="#cb25-5"></a> <span class="dv">3</span>, <span class="dv">100</span>, <span class="dv">1000</span>,</span> <span id="cb25-6"><a href="#cb25-6"></a>)</span></code></pre></div> <p>You can supply these parameters to <code>runif()</code> by using <code>rowwise()</code> and <code>mutate()</code>:</p> <div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb26-2"><a href="#cb26-2"></a><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb26-3"><a href="#cb26-3"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">data =</span> <span class="kw">list</span>(<span class="kw">runif</span>(n, min, max)))</span> <span id="cb26-4"><a href="#cb26-4"></a><span class="co">#> # A tibble: 3 x 4</span></span> <span id="cb26-5"><a href="#cb26-5"></a><span class="co">#> # Rowwise: </span></span> <span id="cb26-6"><a href="#cb26-6"></a><span class="co">#> n min max data </span></span> <span id="cb26-7"><a href="#cb26-7"></a><span class="co">#> <dbl> <dbl> <dbl> <list> </span></span> <span id="cb26-8"><a href="#cb26-8"></a><span class="co">#> 1 1 0 1 <dbl [1]></span></span> <span id="cb26-9"><a href="#cb26-9"></a><span class="co">#> 2 2 10 100 <dbl [2]></span></span> <span id="cb26-10"><a href="#cb26-10"></a><span class="co">#> 3 3 100 1000 <dbl [3]></span></span></code></pre></div> <p>Note the use of <code>list()</code> here - <code>runif()</code> returns multiple values and a <code>mutate()</code> expression has to return something of length 1. <code>list()</code> means that we’ll get a list column where each row is a list containing multiple values. If you forget to use <code>list()</code>, dplyr will give you a hint:</p> <div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb27-2"><a href="#cb27-2"></a><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb27-3"><a href="#cb27-3"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">data =</span> <span class="kw">runif</span>(n, min, max))</span> <span id="cb27-4"><a href="#cb27-4"></a><span class="co">#> Error: Problem with `mutate()` input `data`.</span></span> <span id="cb27-5"><a href="#cb27-5"></a><span class="co">#> x Input `data` can't be recycled to size 1.</span></span> <span id="cb27-6"><a href="#cb27-6"></a><span class="co">#> ℹ Input `data` is `runif(n, min, max)`.</span></span> <span id="cb27-7"><a href="#cb27-7"></a><span class="co">#> ℹ Input `data` must be size 1, not 2.</span></span> <span id="cb27-8"><a href="#cb27-8"></a><span class="co">#> ℹ Did you mean: `data = list(runif(n, min, max))` ?</span></span> <span id="cb27-9"><a href="#cb27-9"></a><span class="co">#> ℹ The error occurred in row 2.</span></span></code></pre></div> </div> <div id="multiple-combinations" class="section level3"> <h3>Multiple combinations</h3> <p>What if you want to call a function for every combination of inputs? You can use <code>expand.grid()</code> (or <code>tidyr::expand_grid()</code>) to generate the data frame and then repeat the same pattern as above:</p> <div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1"></a>df <-<span class="st"> </span><span class="kw">expand.grid</span>(<span class="dt">mean =</span> <span class="kw">c</span>(<span class="op">-</span><span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">1</span>), <span class="dt">sd =</span> <span class="kw">c</span>(<span class="dv">1</span>, <span class="dv">10</span>, <span class="dv">100</span>))</span> <span id="cb28-2"><a href="#cb28-2"></a></span> <span id="cb28-3"><a href="#cb28-3"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb28-4"><a href="#cb28-4"></a><span class="st"> </span><span class="kw">rowwise</span>() <span class="op">%>%</span><span class="st"> </span></span> <span id="cb28-5"><a href="#cb28-5"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">data =</span> <span class="kw">list</span>(<span class="kw">rnorm</span>(<span class="dv">10</span>, mean, sd)))</span> <span id="cb28-6"><a href="#cb28-6"></a><span class="co">#> # A tibble: 9 x 3</span></span> <span id="cb28-7"><a href="#cb28-7"></a><span class="co">#> # Rowwise: </span></span> <span id="cb28-8"><a href="#cb28-8"></a><span class="co">#> mean sd data </span></span> <span id="cb28-9"><a href="#cb28-9"></a><span class="co">#> <dbl> <dbl> <list> </span></span> <span id="cb28-10"><a href="#cb28-10"></a><span class="co">#> 1 -1 1 <dbl [10]></span></span> <span id="cb28-11"><a href="#cb28-11"></a><span class="co">#> 2 0 1 <dbl [10]></span></span> <span id="cb28-12"><a href="#cb28-12"></a><span class="co">#> 3 1 1 <dbl [10]></span></span> <span id="cb28-13"><a href="#cb28-13"></a><span class="co">#> 4 -1 10 <dbl [10]></span></span> <span id="cb28-14"><a href="#cb28-14"></a><span class="co">#> # … with 5 more rows</span></span></code></pre></div> </div> <div id="varying-functions" class="section level3"> <h3>Varying functions</h3> <p>In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it’s still possible, and it’s a natural place to use <code>do.call()</code>:</p> <div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>df <-<span class="st"> </span><span class="kw">tribble</span>(</span> <span id="cb29-2"><a href="#cb29-2"></a> <span class="op">~</span>rng, <span class="op">~</span>params,</span> <span id="cb29-3"><a href="#cb29-3"></a> <span class="st">"runif"</span>, <span class="kw">list</span>(<span class="dt">n =</span> <span class="dv">10</span>), </span> <span id="cb29-4"><a href="#cb29-4"></a> <span class="st">"rnorm"</span>, <span class="kw">list</span>(<span class="dt">n =</span> <span class="dv">20</span>),</span> <span id="cb29-5"><a href="#cb29-5"></a> <span class="st">"rpois"</span>, <span class="kw">list</span>(<span class="dt">n =</span> <span class="dv">10</span>, <span class="dt">lambda =</span> <span class="dv">5</span>),</span> <span id="cb29-6"><a href="#cb29-6"></a>) <span class="op">%>%</span></span> <span id="cb29-7"><a href="#cb29-7"></a><span class="st"> </span><span class="kw">rowwise</span>()</span> <span id="cb29-8"><a href="#cb29-8"></a></span> <span id="cb29-9"><a href="#cb29-9"></a>df <span class="op">%>%</span><span class="st"> </span></span> <span id="cb29-10"><a href="#cb29-10"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">data =</span> <span class="kw">list</span>(<span class="kw">do.call</span>(rng, params)))</span> <span id="cb29-11"><a href="#cb29-11"></a><span class="co">#> # A tibble: 3 x 3</span></span> <span id="cb29-12"><a href="#cb29-12"></a><span class="co">#> # Rowwise: </span></span> <span id="cb29-13"><a href="#cb29-13"></a><span class="co">#> rng params data </span></span> <span id="cb29-14"><a href="#cb29-14"></a><span class="co">#> <chr> <list> <list> </span></span> <span id="cb29-15"><a href="#cb29-15"></a><span class="co">#> 1 runif <named list [1]> <dbl [10]></span></span> <span id="cb29-16"><a href="#cb29-16"></a><span class="co">#> 2 rnorm <named list [1]> <dbl [20]></span></span> <span id="cb29-17"><a href="#cb29-17"></a><span class="co">#> 3 rpois <named list [2]> <int [10]></span></span></code></pre></div> </div> </div> <div id="previously" class="section level2"> <h2>Previously</h2> <div id="rowwise" class="section level3"> <h3><code>rowwise()</code></h3> <p><code>rowwise()</code> was also questioning for quite some time, partly because I didn’t appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr <code>map()</code> functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions.</p> <p>I was also resistant to <code>rowwise()</code> because I felt like automatically switching between <code>[</code> to <code>[[</code> was too magical in the same way that automatically <code>list()</code>-ing results made <code>do()</code> too magical. I’ve now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between <code>[</code> and <code>[[</code> mystifying and <code>rowwise()</code> means that you don’t need to think about it.</p> <p>Since <code>rowwise()</code> clearly is useful it is not longer questioning, and we expect it to be around for the long term.</p> </div> <div id="do" class="section level3"> <h3><code>do()</code></h3> <p>We’ve questioned the need for <code>do()</code> for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation:</p> <ul> <li><p>Without argument names: you could call functions that input and output data frames using <code>.</code> to refer to the “current” group. For example, the following code gets the first row of each group:</p> <div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb30-2"><a href="#cb30-2"></a><span class="st"> </span><span class="kw">group_by</span>(cyl) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb30-3"><a href="#cb30-3"></a><span class="st"> </span><span class="kw">do</span>(<span class="kw">head</span>(., <span class="dv">1</span>))</span> <span id="cb30-4"><a href="#cb30-4"></a><span class="co">#> # A tibble: 3 x 13</span></span> <span id="cb30-5"><a href="#cb30-5"></a><span class="co">#> # Groups: cyl [3]</span></span> <span id="cb30-6"><a href="#cb30-6"></a><span class="co">#> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 cyl4</span></span> <span id="cb30-7"><a href="#cb30-7"></a><span class="co">#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> <span id="cb30-8"><a href="#cb30-8"></a><span class="co">#> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 8 16</span></span> <span id="cb30-9"><a href="#cb30-9"></a><span class="co">#> 2 21 6 160 110 3.9 2.62 16.5 0 1 4 4 12 24</span></span> <span id="cb30-10"><a href="#cb30-10"></a><span class="co">#> 3 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 16 32</span></span></code></pre></div> <p>This has been superseded <code>cur_data()</code> plus the more permissive <code>summarise()</code> which can now create multiple columns and multiple rows.</p> <div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb31-2"><a href="#cb31-2"></a><span class="st"> </span><span class="kw">group_by</span>(cyl) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb31-3"><a href="#cb31-3"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="kw">head</span>(<span class="kw">cur_data</span>(), <span class="dv">1</span>))</span> <span id="cb31-4"><a href="#cb31-4"></a><span class="co">#> `summarise()` ungrouping output (override with `.groups` argument)</span></span> <span id="cb31-5"><a href="#cb31-5"></a><span class="co">#> # A tibble: 3 x 13</span></span> <span id="cb31-6"><a href="#cb31-6"></a><span class="co">#> cyl mpg disp hp drat wt qsec vs am gear carb cyl2 cyl4</span></span> <span id="cb31-7"><a href="#cb31-7"></a><span class="co">#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> <span id="cb31-8"><a href="#cb31-8"></a><span class="co">#> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 8 16</span></span> <span id="cb31-9"><a href="#cb31-9"></a><span class="co">#> 2 6 21 160 110 3.9 2.62 16.5 0 1 4 4 12 24</span></span> <span id="cb31-10"><a href="#cb31-10"></a><span class="co">#> 3 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 16 32</span></span></code></pre></div></li> <li><p>With arguments: it worked like <code>mutate()</code> but automatically wrapped every element in a list:</p> <div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb32-2"><a href="#cb32-2"></a><span class="st"> </span><span class="kw">group_by</span>(cyl) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb32-3"><a href="#cb32-3"></a><span class="st"> </span><span class="kw">do</span>(<span class="dt">nrows =</span> <span class="kw">nrow</span>(.))</span> <span id="cb32-4"><a href="#cb32-4"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb32-5"><a href="#cb32-5"></a><span class="co">#> # Rowwise: </span></span> <span id="cb32-6"><a href="#cb32-6"></a><span class="co">#> cyl nrows </span></span> <span id="cb32-7"><a href="#cb32-7"></a><span class="co">#> <dbl> <list> </span></span> <span id="cb32-8"><a href="#cb32-8"></a><span class="co">#> 1 4 <int [1]></span></span> <span id="cb32-9"><a href="#cb32-9"></a><span class="co">#> 2 6 <int [1]></span></span> <span id="cb32-10"><a href="#cb32-10"></a><span class="co">#> 3 8 <int [1]></span></span></code></pre></div> <p>I now believe that behaviour is both too magical and not very useful, and it can be replaced by <code>summarise()</code> and <code>cur_data()</code> .</p> <div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-2"><a href="#cb33-2"></a><span class="st"> </span><span class="kw">group_by</span>(cyl) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb33-3"><a href="#cb33-3"></a><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">nrows =</span> <span class="kw">nrow</span>(<span class="kw">cur_data</span>()))</span> <span id="cb33-4"><a href="#cb33-4"></a><span class="co">#> `summarise()` ungrouping output (override with `.groups` argument)</span></span> <span id="cb33-5"><a href="#cb33-5"></a><span class="co">#> # A tibble: 3 x 2</span></span> <span id="cb33-6"><a href="#cb33-6"></a><span class="co">#> cyl nrows</span></span> <span id="cb33-7"><a href="#cb33-7"></a><span class="co">#> <dbl> <int></span></span> <span id="cb33-8"><a href="#cb33-8"></a><span class="co">#> 1 4 11</span></span> <span id="cb33-9"><a href="#cb33-9"></a><span class="co">#> 2 6 7</span></span> <span id="cb33-10"><a href="#cb33-10"></a><span class="co">#> 3 8 14</span></span></code></pre></div> <p>If needed (unlike here), you can wrap the results in a list yourself.</p></li> </ul> <p>The addition of <code>cur_data()</code>/<code>across()</code> and the increased scope of <code>summarise()</code> means that <code>do()</code> is no longer needed, so it is now superseded.</p> </div> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>