EVOLUTION-MANAGER
Edit File: sql.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Writing SQL with dbplyr</title> <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); </script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css" data-origin="pandoc"> pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } for (var j = 0; j < rules.length; j++) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') continue; // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Writing SQL with dbplyr</h1> <p>This vignette discusses why you might use dbplyr instead of writing SQL yourself, and what to do when dbplyr’s built-in translations can’t create the SQL that you need.</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(dplyr)</span> <span id="cb1-2"><a href="#cb1-2"></a><span class="kw">library</span>(dbplyr)</span> <span id="cb1-3"><a href="#cb1-3"></a></span> <span id="cb1-4"><a href="#cb1-4"></a>mf <-<span class="st"> </span><span class="kw">memdb_frame</span>(<span class="dt">x =</span> <span class="dv">1</span>, <span class="dt">y =</span> <span class="dv">2</span>)</span></code></pre></div> <div id="why-use-dbplyr" class="section level2"> <h2>Why use dbplyr?</h2> <p>One simple nicety of dplyr is that it will automatically generate subqueries if you want to use a freshly created variable in <code>mutate()</code>:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb2-2"><a href="#cb2-2"></a><span class="st"> </span><span class="kw">mutate</span>(</span> <span id="cb2-3"><a href="#cb2-3"></a> <span class="dt">a =</span> y <span class="op">*</span><span class="st"> </span>x, </span> <span id="cb2-4"><a href="#cb2-4"></a> <span class="dt">b =</span> a <span class="op">^</span><span class="st"> </span><span class="dv">2</span>,</span> <span id="cb2-5"><a href="#cb2-5"></a> ) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb2-6"><a href="#cb2-6"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb2-7"><a href="#cb2-7"></a><span class="co">#> <SQL></span></span> <span id="cb2-8"><a href="#cb2-8"></a><span class="co">#> SELECT `x`, `y`, `a`, POWER(`a`, 2.0) AS `b`</span></span> <span id="cb2-9"><a href="#cb2-9"></a><span class="co">#> FROM (SELECT `x`, `y`, `y` * `x` AS `a`</span></span> <span id="cb2-10"><a href="#cb2-10"></a><span class="co">#> FROM `dbplyr_002`)</span></span></code></pre></div> <p>In general, it’s much easier to work iteratively in dbplyr. You can easily give intermediate queries names, and reuse them in multiple places. Or if you have a common operation that you want to do to many queries, you can easily wrap it up in a function. It’s also easy to chain <code>count()</code> to the end of any query to check the results are about what you expect.</p> </div> <div id="what-happens-when-dbplyr-fails" class="section level2"> <h2>What happens when dbplyr fails?</h2> <p>dbplyr aims to translate the most common R functions to their SQL equivalents, allowing you to ignore the vagaries of the SQL dialect that you’re working with, so you can focus on the data analysis problem at hand. But different backends have different capabilities, and sometimes there are SQL functions that don’t have exact equivalents in R. In those cases, you’ll need to write SQL code directly. This section shows you how you can do so.</p> <div id="prefix-functions" class="section level3"> <h3>Prefix functions</h3> <p>Any function that dbplyr doesn’t know about will be left as is:</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb3-2"><a href="#cb3-2"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">z =</span> <span class="kw">foofify</span>(x, y)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb3-3"><a href="#cb3-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> <SQL></span></span> <span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> SELECT `x`, `y`, foofify(`x`, `y`) AS `z`</span></span> <span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> FROM `dbplyr_002`</span></span></code></pre></div> <p>Because SQL functions are general case insensitive, I recommend using upper case when you’re using SQL functions in R code. That makes it easier to spot that you’re doing something unusual:</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-2"><a href="#cb4-2"></a><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">z =</span> <span class="kw">FOOFIFY</span>(x, y)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb4-3"><a href="#cb4-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb4-4"><a href="#cb4-4"></a><span class="co">#> <SQL></span></span> <span id="cb4-5"><a href="#cb4-5"></a><span class="co">#> SELECT `x`, `y`, FOOFIFY(`x`, `y`) AS `z`</span></span> <span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> FROM `dbplyr_002`</span></span></code></pre></div> </div> <div id="infix-functions" class="section level3"> <h3>Infix functions</h3> <p>As well as prefix functions (where the name of the function comes before the arguments), dbplyr also translates infix functions. That allows you to use expressions like <code>LIKE</code> which does a limited form of pattern matching:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb5-2"><a href="#cb5-2"></a><span class="st"> </span><span class="kw">filter</span>(x <span class="op">%LIKE%</span><span class="st"> "%foo%"</span>) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb5-3"><a href="#cb5-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb5-4"><a href="#cb5-4"></a><span class="co">#> <SQL></span></span> <span id="cb5-5"><a href="#cb5-5"></a><span class="co">#> SELECT *</span></span> <span id="cb5-6"><a href="#cb5-6"></a><span class="co">#> FROM `dbplyr_002`</span></span> <span id="cb5-7"><a href="#cb5-7"></a><span class="co">#> WHERE (`x` LIKE '%foo%')</span></span></code></pre></div> <p>Or use <code>||</code> for string concatenation (note that backends should translate <code>paste()</code> and <code>paste0()</code> for you):</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb6-2"><a href="#cb6-2"></a><span class="st"> </span><span class="kw">transmute</span>(<span class="dt">z =</span> x <span class="op">%||%</span><span class="st"> </span>y) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb6-3"><a href="#cb6-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb6-4"><a href="#cb6-4"></a><span class="co">#> <SQL></span></span> <span id="cb6-5"><a href="#cb6-5"></a><span class="co">#> SELECT `x` || `y` AS `z`</span></span> <span id="cb6-6"><a href="#cb6-6"></a><span class="co">#> FROM `dbplyr_002`</span></span></code></pre></div> </div> <div id="special-forms" class="section level3"> <h3>Special forms</h3> <p>SQL functions tend to have a greater variety of syntax than R. That means there are a number of expressions that can’t be translated directly from R code. To insert these in your own queries, you can use literal SQL inside <code>sql()</code>:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb7-2"><a href="#cb7-2"></a><span class="st"> </span><span class="kw">transmute</span>(<span class="dt">factorial =</span> <span class="kw">sql</span>(<span class="st">"x!"</span>)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb7-3"><a href="#cb7-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb7-4"><a href="#cb7-4"></a><span class="co">#> <SQL></span></span> <span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> SELECT x! AS `factorial`</span></span> <span id="cb7-6"><a href="#cb7-6"></a><span class="co">#> FROM `dbplyr_002`</span></span> <span id="cb7-7"><a href="#cb7-7"></a></span> <span id="cb7-8"><a href="#cb7-8"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb7-9"><a href="#cb7-9"></a><span class="st"> </span><span class="kw">transmute</span>(<span class="dt">factorial =</span> <span class="kw">sql</span>(<span class="st">"CAST(x AS FLOAT)"</span>)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb7-10"><a href="#cb7-10"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb7-11"><a href="#cb7-11"></a><span class="co">#> <SQL></span></span> <span id="cb7-12"><a href="#cb7-12"></a><span class="co">#> SELECT CAST(x AS FLOAT) AS `factorial`</span></span> <span id="cb7-13"><a href="#cb7-13"></a><span class="co">#> FROM `dbplyr_002`</span></span></code></pre></div> <p>Note that you can use <code>sql()</code> at any depth inside the expression:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>mf <span class="op">%>%</span><span class="st"> </span></span> <span id="cb8-2"><a href="#cb8-2"></a><span class="st"> </span><span class="kw">filter</span>(x <span class="op">==</span><span class="st"> </span><span class="kw">sql</span>(<span class="st">"ANY VALUES(1, 2, 3)"</span>)) <span class="op">%>%</span><span class="st"> </span></span> <span id="cb8-3"><a href="#cb8-3"></a><span class="st"> </span><span class="kw">show_query</span>()</span> <span id="cb8-4"><a href="#cb8-4"></a><span class="co">#> <SQL></span></span> <span id="cb8-5"><a href="#cb8-5"></a><span class="co">#> SELECT *</span></span> <span id="cb8-6"><a href="#cb8-6"></a><span class="co">#> FROM `dbplyr_002`</span></span> <span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> WHERE (`x` = ANY VALUES(1, 2, 3))</span></span></code></pre></div> </div> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>