EVOLUTION-MANAGER
Edit File: syntax.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Technical description of tidyselect</title> <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); </script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css" data-origin="pandoc"> pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } for (var j = 0; j < rules.length; j++) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') continue; // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Technical description of tidyselect</h1> <p>This is a technical description of the tidyselect syntax.</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(tidyselect)</span> <span id="cb1-2"><a href="#cb1-2"></a><span class="kw">library</span>(magrittr)</span> <span id="cb1-3"><a href="#cb1-3"></a></span> <span id="cb1-4"><a href="#cb1-4"></a><span class="co"># For better printing</span></span> <span id="cb1-5"><a href="#cb1-5"></a>mtcars <-<span class="st"> </span>tibble<span class="op">::</span><span class="kw">as_tibble</span>(mtcars)</span> <span id="cb1-6"><a href="#cb1-6"></a>iris <-<span class="st"> </span>tibble<span class="op">::</span><span class="kw">as_tibble</span>(iris)</span></code></pre></div> <p>To illustrate the semantics of tidyselect, we’ll use variants of <code>dplyr::select()</code> and <code>dplyr::rename()</code> that return the named vector of locations for the selected or renamed elements:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>select_loc <-<span class="st"> </span><span class="cf">function</span>(data, ...) {</span> <span id="cb2-2"><a href="#cb2-2"></a> <span class="kw">eval_select</span>(rlang<span class="op">::</span><span class="kw">expr</span>(<span class="kw">c</span>(...)), data)</span> <span id="cb2-3"><a href="#cb2-3"></a>}</span> <span id="cb2-4"><a href="#cb2-4"></a></span> <span id="cb2-5"><a href="#cb2-5"></a>rename_loc <-<span class="st"> </span><span class="cf">function</span>(data, ...) {</span> <span id="cb2-6"><a href="#cb2-6"></a> <span class="kw">eval_rename</span>(rlang<span class="op">::</span><span class="kw">expr</span>(<span class="kw">c</span>(...)), data)</span> <span id="cb2-7"><a href="#cb2-7"></a>}</span></code></pre></div> <div id="sets-of-variables" class="section level2"> <h2>Sets of variables</h2> <p>The tidyselect syntax is all about <strong>sets</strong> of variables, internally represented by integer vectors of <strong>locations</strong>. For example, <code>c(1L, 2L)</code> represents the set of the first and second variables, as does <code>c(1L, 2L, 1L)</code>.</p> <p>If a vector of locations contains duplicates, they are normally treated as the same element, since they represent sets. An exception to this occurs with named elements whose names differ. If the names don’t match, they are treated as different elements in order to allow renaming a variable to multiple names (see section on Renaming variables).</p> <p>The syntax of tidyselect is generally designed for set combination. For instance, <code>c(foo(), bar())</code> represents the union of the variables in <code>foo()</code> and those in <code>bar()</code>.</p> <div id="bare-names" class="section level3"> <h3>Bare names</h3> <p>Within <em>data-expressions</em> (see Evaluation section), bare names represent their own locations, i.e. a set of size 1. The following expressions are equivalent:</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(mpg<span class="op">:</span>hp, <span class="op">-</span>cyl, vs)</span> <span id="cb3-2"><a href="#cb3-2"></a><span class="co">#> mpg disp hp vs </span></span> <span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> 1 3 4 8</span></span> <span id="cb3-4"><a href="#cb3-4"></a></span> <span id="cb3-5"><a href="#cb3-5"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dv">1</span><span class="op">:</span><span class="dv">4</span>, <span class="dv">-2</span>, <span class="dv">8</span>)</span> <span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> mpg disp hp vs </span></span> <span id="cb3-7"><a href="#cb3-7"></a><span class="co">#> 1 3 4 8</span></span></code></pre></div> </div> <div id="the-operator" class="section level3"> <h3>The <code>:</code> operator</h3> <p><code>:</code> can be used to select consecutive variables between two locations. It returns the corresponding sequence of locations.</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dv">2</span><span class="op">:</span><span class="dv">4</span>)</span> <span id="cb4-2"><a href="#cb4-2"></a><span class="co">#> cyl disp hp </span></span> <span id="cb4-3"><a href="#cb4-3"></a><span class="co">#> 2 3 4</span></span></code></pre></div> <p>Because bare names represent their own locations, it is easy to select a range of variables:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(cyl<span class="op">:</span>hp)</span> <span id="cb5-2"><a href="#cb5-2"></a><span class="co">#> cyl disp hp </span></span> <span id="cb5-3"><a href="#cb5-3"></a><span class="co">#> 2 3 4</span></span></code></pre></div> </div> <div id="boolean-operators" class="section level3"> <h3>Boolean operators</h3> <p>Boolean operators provide a more intuitive approach to set combination. Though sets are internally represented with vectors of locations, they could equally be represented with a full logical vector of inclusion indicators (taking the <code>which()</code> of this vector would then recover the locations). The boolean operators should be considered in terms of the logical representation of sets.</p> <p>The <code>|</code> operator takes the <strong>union</strong> of two sets:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">|</span><span class="st"> </span><span class="kw">ends_with</span>(<span class="st">"Width"</span>))</span> <span id="cb6-2"><a href="#cb6-2"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Width </span></span> <span id="cb6-3"><a href="#cb6-3"></a><span class="co">#> 1 2 4</span></span></code></pre></div> <p>The <code>&</code> operator takes the <strong>intersection</strong> of two sets:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">&</span><span class="st"> </span><span class="kw">ends_with</span>(<span class="st">"Width"</span>))</span> <span id="cb7-2"><a href="#cb7-2"></a><span class="co">#> Sepal.Width </span></span> <span id="cb7-3"><a href="#cb7-3"></a><span class="co">#> 2</span></span></code></pre></div> <p>The <code>!</code> operator takes the <strong>complement</strong> of a set:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="op">!</span><span class="kw">ends_with</span>(<span class="st">"Width"</span>))</span> <span id="cb8-2"><a href="#cb8-2"></a><span class="co">#> Sepal.Length Petal.Length Species </span></span> <span id="cb8-3"><a href="#cb8-3"></a><span class="co">#> 1 3 5</span></span></code></pre></div> <p>Taking the intersection with a complement produces a set <strong>difference</strong>:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">&</span><span class="st"> </span><span class="op">!</span><span class="kw">ends_with</span>(<span class="st">"Width"</span>))</span> <span id="cb9-2"><a href="#cb9-2"></a><span class="co">#> Sepal.Length </span></span> <span id="cb9-3"><a href="#cb9-3"></a><span class="co">#> 1</span></span></code></pre></div> </div> <div id="dots-c-and-unary--" class="section level3"> <h3>Dots, <code>c()</code>, and unary <code>-</code></h3> <p>tidyselect functions can take dots like <code>dplyr::select()</code>, or a named argument like <code>tidyr::pivot_longer()</code>. In the latter case, the dots syntax is accessible via <code>c()</code>. In fact <code>...</code> syntax is implemented through <code>c(...)</code> and is thus completely equivalent.</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(mpg, disp<span class="op">:</span>hp)</span> <span id="cb10-2"><a href="#cb10-2"></a><span class="co">#> mpg disp hp </span></span> <span id="cb10-3"><a href="#cb10-3"></a><span class="co">#> 1 3 4</span></span> <span id="cb10-4"><a href="#cb10-4"></a></span> <span id="cb10-5"><a href="#cb10-5"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">c</span>(mpg, disp<span class="op">:</span>hp))</span> <span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> mpg disp hp </span></span> <span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> 1 3 4</span></span></code></pre></div> <p>Dots and <code>c()</code> are syntax for:</p> <ul> <li>Set union or set difference</li> <li>Renaming variables</li> </ul> <p>Non-negative inputs are recursively joined with <code>union()</code>. The precedence is left-associative, just like with boolean operators. These expressions are all syntax for <em>set union</em>:</p> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="kw">ends_with</span>(<span class="st">"Width"</span>), Species)</span> <span id="cb11-2"><a href="#cb11-2"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Width Species </span></span> <span id="cb11-3"><a href="#cb11-3"></a><span class="co">#> 1 2 4 5</span></span> <span id="cb11-4"><a href="#cb11-4"></a></span> <span id="cb11-5"><a href="#cb11-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">|</span><span class="st"> </span><span class="kw">ends_with</span>(<span class="st">"Width"</span>) <span class="op">|</span><span class="st"> </span>Species)</span> <span id="cb11-6"><a href="#cb11-6"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Width Species </span></span> <span id="cb11-7"><a href="#cb11-7"></a><span class="co">#> 1 2 4 5</span></span> <span id="cb11-8"><a href="#cb11-8"></a></span> <span id="cb11-9"><a href="#cb11-9"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">union</span>(<span class="kw">union</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="kw">ends_with</span>(<span class="st">"Width"</span>)), 5L))</span> <span id="cb11-10"><a href="#cb11-10"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Width Species </span></span> <span id="cb11-11"><a href="#cb11-11"></a><span class="co">#> 1 2 4 5</span></span></code></pre></div> <p>Unary <code>-</code> is normally syntax for <em>set difference</em>:</p> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="op">-</span><span class="kw">ends_with</span>(<span class="st">"Width"</span>), <span class="op">-</span>Sepal.Length)</span> <span id="cb12-2"><a href="#cb12-2"></a><span class="co">#> named integer(0)</span></span> <span id="cb12-3"><a href="#cb12-3"></a></span> <span id="cb12-4"><a href="#cb12-4"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">setdiff</span>(<span class="kw">setdiff</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="kw">ends_with</span>(<span class="st">"Width"</span>)), 1L))</span> <span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> named integer(0)</span></span></code></pre></div> <p>If the first <code>...</code> or <code>c()</code> input is negative, an implicit <code>everything()</code> is appended.</p> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="op">-</span><span class="kw">starts_with</span>(<span class="st">"Sepal"</span>))</span> <span id="cb13-2"><a href="#cb13-2"></a><span class="co">#> Petal.Length Petal.Width Species </span></span> <span id="cb13-3"><a href="#cb13-3"></a><span class="co">#> 3 4 5</span></span> <span id="cb13-4"><a href="#cb13-4"></a></span> <span id="cb13-5"><a href="#cb13-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">everything</span>(), <span class="op">-</span><span class="kw">starts_with</span>(<span class="st">"Sepal"</span>))</span> <span id="cb13-6"><a href="#cb13-6"></a><span class="co">#> Petal.Length Petal.Width Species </span></span> <span id="cb13-7"><a href="#cb13-7"></a><span class="co">#> 3 4 5</span></span> <span id="cb13-8"><a href="#cb13-8"></a></span> <span id="cb13-9"><a href="#cb13-9"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">setdiff</span>(<span class="kw">everything</span>(), <span class="kw">starts_with</span>(<span class="st">"Sepal"</span>)))</span> <span id="cb13-10"><a href="#cb13-10"></a><span class="co">#> Petal.Length Petal.Width Species </span></span> <span id="cb13-11"><a href="#cb13-11"></a><span class="co">#> 3 4 5</span></span></code></pre></div> <p>In this case, unary <code>-</code> is syntax for <em>set complement</em>. Unary <code>-</code> and <code>!</code> are equivalent:</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="op">-</span><span class="kw">starts_with</span>(<span class="st">"Sepal"</span>))</span> <span id="cb14-2"><a href="#cb14-2"></a><span class="co">#> Petal.Length Petal.Width Species </span></span> <span id="cb14-3"><a href="#cb14-3"></a><span class="co">#> 3 4 5</span></span> <span id="cb14-4"><a href="#cb14-4"></a></span> <span id="cb14-5"><a href="#cb14-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="op">!</span><span class="kw">starts_with</span>(<span class="st">"Sepal"</span>))</span> <span id="cb14-6"><a href="#cb14-6"></a><span class="co">#> Petal.Length Petal.Width Species </span></span> <span id="cb14-7"><a href="#cb14-7"></a><span class="co">#> 3 4 5</span></span></code></pre></div> <p>Each level of <code>c()</code> is independent. In particular, a nested <code>c()</code> starting with <code>-</code> always stands for set complement:</p> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">c</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="op">-</span>Sepal.Length))</span> <span id="cb15-2"><a href="#cb15-2"></a><span class="co">#> Sepal.Width </span></span> <span id="cb15-3"><a href="#cb15-3"></a><span class="co">#> 2</span></span> <span id="cb15-4"><a href="#cb15-4"></a></span> <span id="cb15-5"><a href="#cb15-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">c</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>), <span class="kw">c</span>(<span class="op">-</span>Sepal.Length)))</span> <span id="cb15-6"><a href="#cb15-6"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species </span></span> <span id="cb15-7"><a href="#cb15-7"></a><span class="co">#> 1 2 3 4 5</span></span></code></pre></div> <p>In boolean terms, these expressions are equivalent to:</p> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">&</span><span class="st"> </span><span class="op">!</span>Sepal.Length)</span> <span id="cb16-2"><a href="#cb16-2"></a><span class="co">#> Sepal.Width </span></span> <span id="cb16-3"><a href="#cb16-3"></a><span class="co">#> 2</span></span> <span id="cb16-4"><a href="#cb16-4"></a></span> <span id="cb16-5"><a href="#cb16-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">starts_with</span>(<span class="st">"Sepal"</span>) <span class="op">|</span><span class="st"> </span><span class="op">!</span>Sepal.Length)</span> <span id="cb16-6"><a href="#cb16-6"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species </span></span> <span id="cb16-7"><a href="#cb16-7"></a><span class="co">#> 1 2 3 4 5</span></span></code></pre></div> <p>In general, when unary <code>-</code> is used alone outside <code>...</code> or <code>c()</code>, it stands for set complement.</p> </div> <div id="renaming-variables" class="section level3"> <h3>Renaming variables</h3> <div id="name-combination-and-propagation" class="section level4"> <h4>Name combination and propagation</h4> <p>When named inputs are provided in <code>...</code> or <code>c()</code>, the selection is renamed. If the inputs are already named, the outer and inner names are <strong>combined</strong> with a <code>...</code> separator:</p> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> <span class="kw">c</span>(<span class="dt">bar =</span> mpg, <span class="dt">baz =</span> cyl))</span> <span id="cb17-2"><a href="#cb17-2"></a><span class="co">#> foo...bar foo...baz </span></span> <span id="cb17-3"><a href="#cb17-3"></a><span class="co">#> 1 2</span></span></code></pre></div> <p>Otherwise the outer names is <strong>propagated</strong> to the selected elements according to the following rules:</p> <ul> <li><p>With data frames, a numeric suffix is appended because columns must be uniquely named.</p> <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> <span class="kw">c</span>(mpg, cyl))</span> <span id="cb18-2"><a href="#cb18-2"></a><span class="co">#> foo1 foo2 </span></span> <span id="cb18-3"><a href="#cb18-3"></a><span class="co">#> 1 2</span></span></code></pre></div></li> <li><p>With normal vectors, the name is simply assigned to all selected inputs.</p> <div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1"></a><span class="kw">as.list</span>(mtcars) <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> <span class="kw">c</span>(mpg, cyl))</span> <span id="cb19-2"><a href="#cb19-2"></a><span class="co">#> foo foo </span></span> <span id="cb19-3"><a href="#cb19-3"></a><span class="co">#> 1 2</span></span></code></pre></div></li> </ul> <p>Combination and propagation can be composed by using nested <code>c()</code>:</p> <div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> <span class="kw">c</span>(<span class="dt">bar =</span> <span class="kw">c</span>(mpg, cyl)))</span> <span id="cb20-2"><a href="#cb20-2"></a><span class="co">#> foo...bar1 foo...bar2 </span></span> <span id="cb20-3"><a href="#cb20-3"></a><span class="co">#> 1 2</span></span></code></pre></div> </div> <div id="set-combination-with-named-variables" class="section level4"> <h4>Set combination with named variables</h4> <p>Named elements have special rules to determine their identities in a set. Unnamed elements match any names:</p> <ul> <li><code>a | c(foo = a)</code> is equivalent to <code>c(foo = a)</code>.</li> <li><code>a & c(foo = a)</code> is equivalent to <code>c(foo = a)</code>.</li> </ul> <p>Named elements with different names are distinct:</p> <ul> <li><code>c(foo = a) & c(bar = a)</code> is equivalent to <code>c()</code>.</li> <li><code>c(foo = a) | c(bar = a)</code> is equivalent to <code>c(foo = a, bar = a)</code>.</li> </ul> <p>Because unnamed elements match any named ones, it is possible to select multiple elements and rename one of them:</p> <div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="op">!</span>Species, <span class="dt">foo =</span> Sepal.Width)</span> <span id="cb21-2"><a href="#cb21-2"></a><span class="co">#> Sepal.Length foo Petal.Length Petal.Width </span></span> <span id="cb21-3"><a href="#cb21-3"></a><span class="co">#> 1 2 3 4</span></span></code></pre></div> </div> </div> <div id="predicate-functions" class="section level3"> <h3>Predicate functions</h3> <p>Predicate function objects can be supplied as input in an env-expression, typically with the selection helper <code>where()</code>. They are applied to all elements of the data, and should return <code>TRUE</code> or <code>FALSE</code> to indicate inclusion. Predicates in env-expressions are effectively expanded to the set of variables that they represent:</p> <div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">where</span>(is.numeric))</span> <span id="cb22-2"><a href="#cb22-2"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Length Petal.Width </span></span> <span id="cb22-3"><a href="#cb22-3"></a><span class="co">#> 1 2 3 4</span></span> <span id="cb22-4"><a href="#cb22-4"></a></span> <span id="cb22-5"><a href="#cb22-5"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">where</span>(is.factor))</span> <span id="cb22-6"><a href="#cb22-6"></a><span class="co">#> Species </span></span> <span id="cb22-7"><a href="#cb22-7"></a><span class="co">#> 5</span></span> <span id="cb22-8"><a href="#cb22-8"></a></span> <span id="cb22-9"><a href="#cb22-9"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">where</span>(is.numeric) <span class="op">|</span><span class="st"> </span><span class="kw">where</span>(is.factor))</span> <span id="cb22-10"><a href="#cb22-10"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species </span></span> <span id="cb22-11"><a href="#cb22-11"></a><span class="co">#> 1 2 3 4 5</span></span> <span id="cb22-12"><a href="#cb22-12"></a></span> <span id="cb22-13"><a href="#cb22-13"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">where</span>(is.numeric) <span class="op">&</span><span class="st"> </span><span class="kw">where</span>(is.factor))</span> <span id="cb22-14"><a href="#cb22-14"></a><span class="co">#> named integer(0)</span></span></code></pre></div> </div> </div> <div id="selection-helpers" class="section level2"> <h2>Selection helpers</h2> <p>We call <em>selection helpers</em> any function that inspects the currently active variables with <code>peek_vars()</code> and returns a selection.</p> <ul> <li><code>peek_vars()</code> returns a character vector of names.</li> <li>The returned selection can be any output conforming to the types described in the Data types section.</li> </ul> <p>Examples of selection helpers are <code>all_of()</code>, <code>contains()</code>, or <code>last_col()</code>. These selection helpers are evaluated as env-expressions (see Evaluation section).</p> </div> <div id="supported-data-types" class="section level2"> <h2>Supported data types</h2> <p>The following data types can be returned from selection helpers or forced via <code>!!</code> or <code>force()</code> (the latter works in tidyselect because it is treated as an env-expression, see Evaluation section):</p> <ul> <li><p>Vectors of locations:</p> <div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">force</span>(<span class="kw">c</span>(<span class="dv">1</span>, <span class="dv">3</span>)))</span> <span id="cb23-2"><a href="#cb23-2"></a><span class="co">#> Sepal.Length Petal.Length </span></span> <span id="cb23-3"><a href="#cb23-3"></a><span class="co">#> 1 3</span></span></code></pre></div></li> <li><p>Vectors of names. These are matched and transformed to locations.</p> <div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">force</span>(<span class="kw">c</span>(<span class="st">"Sepal.Length"</span>, <span class="st">"Petal.Length"</span>)))</span> <span id="cb24-2"><a href="#cb24-2"></a><span class="co">#> Sepal.Length Petal.Length </span></span> <span id="cb24-3"><a href="#cb24-3"></a><span class="co">#> 1 3</span></span></code></pre></div></li> <li><p>Predicate functions. These are applied to all elements to determine inclusion.</p> <div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1"></a>iris <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">force</span>(is.numeric))</span> <span id="cb25-2"><a href="#cb25-2"></a><span class="co">#> Sepal.Length Sepal.Width Petal.Length Petal.Width </span></span> <span id="cb25-3"><a href="#cb25-3"></a><span class="co">#> 1 2 3 4</span></span></code></pre></div></li> </ul> </div> <div id="evaluation" class="section level2"> <h2>Evaluation</h2> <div id="data-expressions-and-env-expressions" class="section level3"> <h3>Data-expressions and env-expressions</h3> <p>tidyselect is not a typical tidy evaluation UI. The main difference is that there is no data masking. In a typical tidy eval function, expressions are evaluated with data-vars first in scope, followed by env-vars:</p> <div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1"></a>mask <-<span class="st"> </span><span class="cf">function</span>(data, expr) {</span> <span id="cb26-2"><a href="#cb26-2"></a> rlang<span class="op">::</span><span class="kw">eval_tidy</span>(rlang<span class="op">::</span><span class="kw">enquo</span>(expr), data)</span> <span id="cb26-3"><a href="#cb26-3"></a>}</span> <span id="cb26-4"><a href="#cb26-4"></a></span> <span id="cb26-5"><a href="#cb26-5"></a>foo <-<span class="st"> </span><span class="dv">10</span></span> <span id="cb26-6"><a href="#cb26-6"></a>cyl <-<span class="st"> </span><span class="dv">200</span></span> <span id="cb26-7"><a href="#cb26-7"></a></span> <span id="cb26-8"><a href="#cb26-8"></a><span class="co"># `cyl` represents the data frame column here:</span></span> <span id="cb26-9"><a href="#cb26-9"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">mask</span>(cyl <span class="op">*</span><span class="st"> </span>foo)</span> <span id="cb26-10"><a href="#cb26-10"></a><span class="co">#> [1] 60 60 40 60 80 60 80 40 40 60 60 80 80 80 80 80 80 40 40 40 40 80 80 80 80</span></span> <span id="cb26-11"><a href="#cb26-11"></a><span class="co">#> [26] 40 40 40 80 60 80 40</span></span></code></pre></div> <p>It is possible to bypass the data frame variables by forcing symbols to be looked up in the environment with <code>!!</code> or <code>.env</code>:</p> <div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">mask</span>(<span class="op">!!</span>cyl <span class="op">*</span><span class="st"> </span>foo)</span> <span id="cb27-2"><a href="#cb27-2"></a><span class="co">#> [1] 2000</span></span> <span id="cb27-3"><a href="#cb27-3"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">mask</span>(.env<span class="op">$</span>cyl <span class="op">*</span><span class="st"> </span>foo)</span> <span id="cb27-4"><a href="#cb27-4"></a><span class="co">#> [1] 2000</span></span></code></pre></div> <p>With tidyselect, there is no such hierarchical data masking. Instead, expressions are evaluated either in the context of the data frame or in the user environment, without overlap. The scope of lookup depends on the kind of expression:</p> <ol style="list-style-type: decimal"> <li><p><strong>data-expressions</strong> are evaluated in the data frame only. This includes bare symbols, the boolean operators, <code>-</code>, <code>:</code>, and <code>c()</code>. You can’t refer to environment-variables in a data-expression:</p> <div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1"></a>cyl_pos <-<span class="st"> </span><span class="dv">2</span></span> <span id="cb28-2"><a href="#cb28-2"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(mpg <span class="op">|</span><span class="st"> </span>cyl_pos)</span> <span id="cb28-3"><a href="#cb28-3"></a><span class="co">#> Error: Can't subset columns that don't exist.</span></span> <span id="cb28-4"><a href="#cb28-4"></a><span class="co">#> ✖ Column `cyl_pos` doesn't exist.</span></span></code></pre></div></li> <li><p><strong>env-expressions</strong> are evaluated in the environment. This includes all calls other than those mentioned above, as well as symbols that are part of those calls. You can’t refer to data-variables in a data-expression:</p> <div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">all_of</span>(mpg))</span> <span id="cb29-2"><a href="#cb29-2"></a><span class="co">#> Error: object 'mpg' not found</span></span></code></pre></div></li> </ol> <p>Because the scoping is unambiguous, you can safely refer to env-vars in an env-expression, without having to worry about potential naming clashes with data-vars:</p> <div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1"></a>x <-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">x =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">3</span>, <span class="dt">y =</span> <span class="dv">4</span><span class="op">:</span><span class="dv">6</span>, <span class="dt">z =</span> <span class="dv">7</span><span class="op">:</span><span class="dv">9</span>)</span> <span id="cb30-2"><a href="#cb30-2"></a></span> <span id="cb30-3"><a href="#cb30-3"></a><span class="co"># `ncol(x)` is an env-expression, so `x` represents the data frame in</span></span> <span id="cb30-4"><a href="#cb30-4"></a><span class="co"># the environment rather than the column in the data frame</span></span> <span id="cb30-5"><a href="#cb30-5"></a>x <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dv">2</span><span class="op">:</span><span class="kw">ncol</span>(x))</span> <span id="cb30-6"><a href="#cb30-6"></a><span class="co">#> y z </span></span> <span id="cb30-7"><a href="#cb30-7"></a><span class="co">#> 2 3</span></span></code></pre></div> <p>If you have variable names in a character vector, it is safe to refer to the env-var containing the names with <code>all_of()</code> because it is an env-expression:</p> <div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1"></a>y <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"y"</span>, <span class="st">"z"</span>)</span> <span id="cb31-2"><a href="#cb31-2"></a>x <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="kw">all_of</span>(y))</span> <span id="cb31-3"><a href="#cb31-3"></a><span class="co">#> y z </span></span> <span id="cb31-4"><a href="#cb31-4"></a><span class="co">#> 2 3</span></span></code></pre></div> <p>Note that currently, env-vars are still allowed in some data-expressions, for compatibility. However this is in the process of being deprecated and you should see a note recommending to use <code>all_of()</code> instead. This note will become a deprecation warning in the future, and then an error.</p> <div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(cyl_pos)</span> <span id="cb32-2"><a href="#cb32-2"></a><span class="co">#> Note: Using an external vector in selections is ambiguous.</span></span> <span id="cb32-3"><a href="#cb32-3"></a><span class="co">#> ℹ Use `all_of(cyl_pos)` instead of `cyl_pos` to silence this message.</span></span> <span id="cb32-4"><a href="#cb32-4"></a><span class="co">#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.</span></span> <span id="cb32-5"><a href="#cb32-5"></a><span class="co">#> This message is displayed once per session.</span></span> <span id="cb32-6"><a href="#cb32-6"></a><span class="co">#> cyl </span></span> <span id="cb32-7"><a href="#cb32-7"></a><span class="co">#> 2</span></span></code></pre></div> </div> <div id="arithmetic-operators" class="section level3"> <h3>Arithmetic operators</h3> <p>Within data-expressions (see Evaluation section), <code>+</code>, <code>*</code> and <code>/</code> are overridden to cause an error. This is to prevent confusion stemming from normal data masking usage where variables can be transformed on the fly:</p> <div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(cyl<span class="op">^</span><span class="dv">2</span>)</span> <span id="cb33-2"><a href="#cb33-2"></a><span class="co">#> Error: Can't use arithmetic operator `^` in selection context.</span></span> <span id="cb33-3"><a href="#cb33-3"></a></span> <span id="cb33-4"><a href="#cb33-4"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(mpg <span class="op">*</span><span class="st"> </span>wt)</span> <span id="cb33-5"><a href="#cb33-5"></a><span class="co">#> Error: Can't use arithmetic operator `*` in selection context.</span></span></code></pre></div> </div> </div> <div id="selecting-versus-renaming" class="section level2"> <h2>Selecting versus renaming</h2> <p>The select and rename variants take the same types of inputs and have the same type of return value. They have a few important differences.</p> <div id="all-renaming-inputs-must-be-named" class="section level3"> <h3>All renaming inputs must be named</h3> <p>Unlike <code>eval_select()</code> which can select without renaming, <code>eval_rename()</code> expects a fully named selection. If one or several names are missing, an error is thrown.</p> <div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(mpg)</span> <span id="cb34-2"><a href="#cb34-2"></a><span class="co">#> mpg </span></span> <span id="cb34-3"><a href="#cb34-3"></a><span class="co">#> 1</span></span> <span id="cb34-4"><a href="#cb34-4"></a></span> <span id="cb34-5"><a href="#cb34-5"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">rename_loc</span>(mpg)</span> <span id="cb34-6"><a href="#cb34-6"></a><span class="co">#> Error: All renaming inputs must be named.</span></span></code></pre></div> </div> <div id="renaming-to-an-existing-variable-name" class="section level3"> <h3>Renaming to an existing variable name</h3> <p>If the input data is a data frame, tidyselect generally throws an error when duplicate column names are selected, in order to respect the invariant of unique column names.</p> <div class="sourceCode" id="cb35"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1"></a><span class="co"># Lists can have duplicates</span></span> <span id="cb35-2"><a href="#cb35-2"></a><span class="kw">as.list</span>(mtcars) <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> mpg, <span class="dt">foo =</span> cyl)</span> <span id="cb35-3"><a href="#cb35-3"></a><span class="co">#> foo foo </span></span> <span id="cb35-4"><a href="#cb35-4"></a><span class="co">#> 1 2</span></span> <span id="cb35-5"><a href="#cb35-5"></a></span> <span id="cb35-6"><a href="#cb35-6"></a><span class="co"># Data frames cannot</span></span> <span id="cb35-7"><a href="#cb35-7"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> mpg, <span class="dt">foo =</span> cyl)</span> <span id="cb35-8"><a href="#cb35-8"></a><span class="co">#> Error: Names must be unique.</span></span> <span id="cb35-9"><a href="#cb35-9"></a><span class="co">#> ✖ These names are duplicated:</span></span> <span id="cb35-10"><a href="#cb35-10"></a><span class="co">#> * "foo" at locations 1 and 2.</span></span></code></pre></div> <p>A selection can rename a variable to an existing name if the latter is not part of the selection:</p> <div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(cyl, <span class="dt">cyl =</span> mpg)</span> <span id="cb36-2"><a href="#cb36-2"></a><span class="co">#> Error: Names must be unique.</span></span> <span id="cb36-3"><a href="#cb36-3"></a><span class="co">#> ✖ These names are duplicated:</span></span> <span id="cb36-4"><a href="#cb36-4"></a><span class="co">#> * "cyl" at locations 1 and 2.</span></span> <span id="cb36-5"><a href="#cb36-5"></a></span> <span id="cb36-6"><a href="#cb36-6"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(disp, <span class="dt">cyl =</span> mpg)</span> <span id="cb36-7"><a href="#cb36-7"></a><span class="co">#> disp cyl </span></span> <span id="cb36-8"><a href="#cb36-8"></a><span class="co">#> 3 1</span></span></code></pre></div> <p>This is not possible when renaming.</p> <div class="sourceCode" id="cb37"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">rename_loc</span>(cyl, <span class="dt">cyl =</span> mpg)</span> <span id="cb37-2"><a href="#cb37-2"></a><span class="co">#> Error: All renaming inputs must be named.</span></span> <span id="cb37-3"><a href="#cb37-3"></a></span> <span id="cb37-4"><a href="#cb37-4"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">rename_loc</span>(disp, <span class="dt">cyl =</span> mpg)</span> <span id="cb37-5"><a href="#cb37-5"></a><span class="co">#> Error: All renaming inputs must be named.</span></span></code></pre></div> <p>However, the name conflict can be solved by renaming the existing variable to another name:</p> <div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(<span class="dt">foo =</span> cyl, <span class="dt">cyl =</span> mpg)</span> <span id="cb38-2"><a href="#cb38-2"></a><span class="co">#> foo cyl </span></span> <span id="cb38-3"><a href="#cb38-3"></a><span class="co">#> 2 1</span></span> <span id="cb38-4"><a href="#cb38-4"></a></span> <span id="cb38-5"><a href="#cb38-5"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">rename_loc</span>(<span class="dt">foo =</span> cyl, <span class="dt">cyl =</span> mpg)</span> <span id="cb38-6"><a href="#cb38-6"></a><span class="co">#> foo cyl </span></span> <span id="cb38-7"><a href="#cb38-7"></a><span class="co">#> 2 1</span></span></code></pre></div> </div> </div> <div id="duplicate-columns-in-data-frames" class="section level2"> <h2>Duplicate columns in data frames</h2> <p>Normally a data frame shouldn’t have duplicate names. However, the real world is messy and duplicates do happen in the wild. tidyselect tries to be as permissive as it can with these duplicates so that users can restore unique names with <code>select()</code> or <code>rename()</code>.</p> <p>First let’s create a data frame with duplicate names:</p> <div class="sourceCode" id="cb39"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1"></a>dups <-<span class="st"> </span>vctrs<span class="op">::</span><span class="kw">new_data_frame</span>(<span class="kw">list</span>(<span class="dt">x =</span> <span class="dv">1</span>, <span class="dt">y =</span> <span class="dv">2</span>, <span class="dt">x =</span> <span class="dv">3</span>))</span></code></pre></div> <p>If the duplicates are not part of the selection, they are simply ignored:</p> <div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1"></a>dups <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(y)</span> <span id="cb40-2"><a href="#cb40-2"></a><span class="co">#> y </span></span> <span id="cb40-3"><a href="#cb40-3"></a><span class="co">#> 2</span></span></code></pre></div> <p>If the duplicates are selected, this is an error:</p> <div class="sourceCode" id="cb41"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1"></a>dups <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(x)</span> <span id="cb41-2"><a href="#cb41-2"></a><span class="co">#> Error: Names must be unique.</span></span> <span id="cb41-3"><a href="#cb41-3"></a><span class="co">#> ✖ These names are duplicated:</span></span> <span id="cb41-4"><a href="#cb41-4"></a><span class="co">#> * "x" at locations 1 and 2.</span></span></code></pre></div> <p>The duplicate names can be repaired by renaming chosen locations:</p> <div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1"></a>dups <span class="op">%>%</span><span class="st"> </span><span class="kw">select_loc</span>(x, <span class="dt">foo =</span> <span class="dv">3</span>)</span> <span id="cb42-2"><a href="#cb42-2"></a><span class="co">#> x foo </span></span> <span id="cb42-3"><a href="#cb42-3"></a><span class="co">#> 1 3</span></span> <span id="cb42-4"><a href="#cb42-4"></a></span> <span id="cb42-5"><a href="#cb42-5"></a>dups <span class="op">%>%</span><span class="st"> </span><span class="kw">rename_loc</span>(<span class="dt">foo =</span> <span class="dv">3</span>)</span> <span id="cb42-6"><a href="#cb42-6"></a><span class="co">#> foo </span></span> <span id="cb42-7"><a href="#cb42-7"></a><span class="co">#> 3</span></span></code></pre></div> </div> <div id="acknowledgements" class="section level2"> <h2>Acknowledgements</h2> <p>The tidyselect syntax was inspired by the <code>base::subset()</code> function written by Peter Dalgaard. The <code>select</code> parameter of <code>subset.data.frame()</code> is evaluated in a data mask where the column names are bound to their locations in the data frame. This allows <code>:</code> to create sequences of variable locations. The locations can be combined with <code>c()</code>. This selection interface set the tone for the development of the tidyselect syntax.</p> <div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1"></a>mtcars <span class="op">%>%</span><span class="st"> </span><span class="kw">subset</span>(<span class="dt">select =</span> <span class="kw">c</span>(cyl, hp<span class="op">:</span>wt))</span> <span id="cb43-2"><a href="#cb43-2"></a><span class="co">#> # A tibble: 32 x 4</span></span> <span id="cb43-3"><a href="#cb43-3"></a><span class="co">#> cyl hp drat wt</span></span> <span id="cb43-4"><a href="#cb43-4"></a><span class="co">#> <dbl> <dbl> <dbl> <dbl></span></span> <span id="cb43-5"><a href="#cb43-5"></a><span class="co">#> 1 6 110 3.9 2.62</span></span> <span id="cb43-6"><a href="#cb43-6"></a><span class="co">#> 2 6 110 3.9 2.88</span></span> <span id="cb43-7"><a href="#cb43-7"></a><span class="co">#> 3 4 93 3.85 2.32</span></span> <span id="cb43-8"><a href="#cb43-8"></a><span class="co">#> 4 6 110 3.08 3.22</span></span> <span id="cb43-9"><a href="#cb43-9"></a><span class="co">#> # … with 28 more rows</span></span></code></pre></div> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>