EVOLUTION-MANAGER
Edit File: tabyls.html
<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8"> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="viewport" content="width=device-width, initial-scale=1"> <style type="text/css"> @font-face { font-family: octicons-link; src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAZwABAAAAAACFQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEU0lHAAAGaAAAAAgAAAAIAAAAAUdTVUIAAAZcAAAACgAAAAoAAQAAT1MvMgAAAyQAAABJAAAAYFYEU3RjbWFwAAADcAAAAEUAAACAAJThvmN2dCAAAATkAAAABAAAAAQAAAAAZnBnbQAAA7gAAACyAAABCUM+8IhnYXNwAAAGTAAAABAAAAAQABoAI2dseWYAAAFsAAABPAAAAZwcEq9taGVhZAAAAsgAAAA0AAAANgh4a91oaGVhAAADCAAAABoAAAAkCA8DRGhtdHgAAAL8AAAADAAAAAwGAACfbG9jYQAAAsAAAAAIAAAACABiATBtYXhwAAACqAAAABgAAAAgAA8ASm5hbWUAAAToAAABQgAAAlXu73sOcG9zdAAABiwAAAAeAAAAME3QpOBwcmVwAAAEbAAAAHYAAAB/aFGpk3jaTY6xa8JAGMW/O62BDi0tJLYQincXEypYIiGJjSgHniQ6umTsUEyLm5BV6NDBP8Tpts6F0v+k/0an2i+itHDw3v2+9+DBKTzsJNnWJNTgHEy4BgG3EMI9DCEDOGEXzDADU5hBKMIgNPZqoD3SilVaXZCER3/I7AtxEJLtzzuZfI+VVkprxTlXShWKb3TBecG11rwoNlmmn1P2WYcJczl32etSpKnziC7lQyWe1smVPy/Lt7Kc+0vWY/gAgIIEqAN9we0pwKXreiMasxvabDQMM4riO+qxM2ogwDGOZTXxwxDiycQIcoYFBLj5K3EIaSctAq2kTYiw+ymhce7vwM9jSqO8JyVd5RH9gyTt2+J/yUmYlIR0s04n6+7Vm1ozezUeLEaUjhaDSuXHwVRgvLJn1tQ7xiuVv/ocTRF42mNgZGBgYGbwZOBiAAFGJBIMAAizAFoAAABiAGIAznjaY2BkYGAA4in8zwXi+W2+MjCzMIDApSwvXzC97Z4Ig8N/BxYGZgcgl52BCSQKAA3jCV8CAABfAAAAAAQAAEB42mNgZGBg4f3vACQZQABIMjKgAmYAKEgBXgAAeNpjYGY6wTiBgZWBg2kmUxoDA4MPhGZMYzBi1AHygVLYQUCaawqDA4PChxhmh/8ODDEsvAwHgMKMIDnGL0x7gJQCAwMAJd4MFwAAAHjaY2BgYGaA4DAGRgYQkAHyGMF8NgYrIM3JIAGVYYDT+AEjAwuDFpBmA9KMDEwMCh9i/v8H8sH0/4dQc1iAmAkALaUKLgAAAHjaTY9LDsIgEIbtgqHUPpDi3gPoBVyRTmTddOmqTXThEXqrob2gQ1FjwpDvfwCBdmdXC5AVKFu3e5MfNFJ29KTQT48Ob9/lqYwOGZxeUelN2U2R6+cArgtCJpauW7UQBqnFkUsjAY/kOU1cP+DAgvxwn1chZDwUbd6CFimGXwzwF6tPbFIcjEl+vvmM/byA48e6tWrKArm4ZJlCbdsrxksL1AwWn/yBSJKpYbq8AXaaTb8AAHja28jAwOC00ZrBeQNDQOWO//sdBBgYGRiYWYAEELEwMTE4uzo5Zzo5b2BxdnFOcALxNjA6b2ByTswC8jYwg0VlNuoCTWAMqNzMzsoK1rEhNqByEyerg5PMJlYuVueETKcd/89uBpnpvIEVomeHLoMsAAe1Id4AAAAAAAB42oWQT07CQBTGv0JBhagk7HQzKxca2sJCE1hDt4QF+9JOS0nbaaYDCQfwCJ7Au3AHj+LO13FMmm6cl7785vven0kBjHCBhfpYuNa5Ph1c0e2Xu3jEvWG7UdPDLZ4N92nOm+EBXuAbHmIMSRMs+4aUEd4Nd3CHD8NdvOLTsA2GL8M9PODbcL+hD7C1xoaHeLJSEao0FEW14ckxC+TU8TxvsY6X0eLPmRhry2WVioLpkrbp84LLQPGI7c6sOiUzpWIWS5GzlSgUzzLBSikOPFTOXqly7rqx0Z1Q5BAIoZBSFihQYQOOBEdkCOgXTOHA07HAGjGWiIjaPZNW13/+lm6S9FT7rLHFJ6fQbkATOG1j2OFMucKJJsxIVfQORl+9Jyda6Sl1dUYhSCm1dyClfoeDve4qMYdLEbfqHf3O/AdDumsjAAB42mNgYoAAZQYjBmyAGYQZmdhL8zLdDEydARfoAqIAAAABAAMABwAKABMAB///AA8AAQAAAAAAAAAAAAAAAAABAAAAAA==) format('woff'); } body { -webkit-text-size-adjust: 100%; text-size-adjust: 100%; color: #333; font-family: "Helvetica Neue", Helvetica, "Segoe UI", Arial, freesans, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; font-size: 16px; line-height: 1.6; word-wrap: break-word; } a { background-color: transparent; } a:active, a:hover { outline: 0; } strong { font-weight: bold; } h1 { font-size: 2em; margin: 0.67em 0; } img { border: 0; } hr { box-sizing: content-box; height: 0; } pre { overflow: auto; } code, kbd, pre { font-family: monospace, monospace; font-size: 1em; } input { color: inherit; font: inherit; margin: 0; } html input[disabled] { cursor: default; } input { line-height: normal; } input[type="checkbox"] { box-sizing: border-box; padding: 0; } table { border-collapse: collapse; border-spacing: 0; } td, th { padding: 0; } * { box-sizing: border-box; } input { font: 13px / 1.4 Helvetica, arial, nimbussansl, liberationsans, freesans, clean, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; } a { color: #4078c0; text-decoration: none; } a:hover, a:active { text-decoration: underline; } hr { height: 0; margin: 15px 0; overflow: hidden; background: transparent; border: 0; border-bottom: 1px solid #ddd; } hr:before { display: table; content: ""; } hr:after { display: table; clear: both; content: ""; } h1, h2, h3, h4, h5, h6 { margin-top: 15px; margin-bottom: 15px; line-height: 1.1; } h1 { font-size: 30px; } h2 { font-size: 21px; } h3 { font-size: 16px; } h4 { font-size: 14px; } h5 { font-size: 12px; } h6 { font-size: 11px; } blockquote { margin: 0; } ul, ol { padding: 0; margin-top: 0; margin-bottom: 0; } ol ol, ul ol { list-style-type: lower-roman; } ul ul ol, ul ol ol, ol ul ol, ol ol ol { list-style-type: lower-alpha; } dd { margin-left: 0; } code { font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; font-size: 12px; } pre { margin-top: 0; margin-bottom: 0; font: 12px Consolas, "Liberation Mono", Menlo, Courier, monospace; } .select::-ms-expand { opacity: 0; } .octicon { font: normal normal normal 16px/1 octicons-link; display: inline-block; text-decoration: none; text-rendering: auto; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; } .octicon-link:before { content: '\f05c'; } .markdown-body:before { display: table; content: ""; } .markdown-body:after { display: table; clear: both; content: ""; } .markdown-body>*:first-child { margin-top: 0 !important; } .markdown-body>*:last-child { margin-bottom: 0 !important; } a:not([href]) { color: inherit; text-decoration: none; } .anchor { display: inline-block; padding-right: 2px; margin-left: -18px; } .anchor:focus { outline: none; } h1, h2, h3, h4, h5, h6 { margin-top: 1em; margin-bottom: 16px; font-weight: bold; line-height: 1.4; } h1 .octicon-link, h2 .octicon-link, h3 .octicon-link, h4 .octicon-link, h5 .octicon-link, h6 .octicon-link { color: #000; vertical-align: middle; visibility: hidden; } h1:hover .anchor, h2:hover .anchor, h3:hover .anchor, h4:hover .anchor, h5:hover .anchor, h6:hover .anchor { text-decoration: none; } h1:hover .anchor .octicon-link, h2:hover .anchor .octicon-link, h3:hover .anchor .octicon-link, h4:hover .anchor .octicon-link, h5:hover .anchor .octicon-link, h6:hover .anchor .octicon-link { visibility: visible; } h1 { padding-bottom: 0.3em; font-size: 2.25em; line-height: 1.2; border-bottom: 1px solid #eee; } h1 .anchor { line-height: 1; } h2 { padding-bottom: 0.3em; font-size: 1.75em; line-height: 1.225; border-bottom: 1px solid #eee; } h2 .anchor { line-height: 1; } h3 { font-size: 1.5em; line-height: 1.43; } h3 .anchor { line-height: 1.2; } h4 { font-size: 1.25em; } h4 .anchor { line-height: 1.2; } h5 { font-size: 1em; } h5 .anchor { line-height: 1.1; } h6 { font-size: 1em; color: #777; } h6 .anchor { line-height: 1.1; } p, blockquote, ul, ol, dl, table, pre { margin-top: 0; margin-bottom: 16px; } hr { height: 4px; padding: 0; margin: 16px 0; background-color: #e7e7e7; border: 0 none; } ul, ol { padding-left: 2em; } ul ul, ul ol, ol ol, ol ul { margin-top: 0; margin-bottom: 0; } li>p { margin-top: 16px; } dl { padding: 0; } dl dt { padding: 0; margin-top: 16px; font-size: 1em; font-style: italic; font-weight: bold; } dl dd { padding: 0 16px; margin-bottom: 16px; } blockquote { padding: 0 15px; color: #777; border-left: 4px solid #ddd; } blockquote>:first-child { margin-top: 0; } blockquote>:last-child { margin-bottom: 0; } table { display: block; width: 100%; overflow: auto; word-break: normal; word-break: keep-all; } table th { font-weight: bold; } table th, table td { padding: 6px 13px; border: 1px solid #ddd; } table tr { background-color: #fff; border-top: 1px solid #ccc; } table tr:nth-child(2n) { background-color: #f8f8f8; } img { max-width: 100%; box-sizing: content-box; background-color: #fff; } code { padding: 0; padding-top: 0.2em; padding-bottom: 0.2em; margin: 0; font-size: 85%; background-color: rgba(0,0,0,0.04); border-radius: 3px; } code:before, code:after { letter-spacing: -0.2em; content: "\00a0"; } pre>code { padding: 0; margin: 0; font-size: 100%; word-break: normal; white-space: pre; background: transparent; border: 0; } .highlight { margin-bottom: 16px; } .highlight pre, pre { padding: 16px; overflow: auto; font-size: 85%; line-height: 1.45; background-color: #f7f7f7; border-radius: 3px; } .highlight pre { margin-bottom: 0; word-break: normal; } pre { word-wrap: normal; } pre code { display: inline; max-width: initial; padding: 0; margin: 0; overflow: initial; line-height: inherit; word-wrap: normal; background-color: transparent; border: 0; } pre code:before, pre code:after { content: normal; } kbd { display: inline-block; padding: 3px 5px; font-size: 11px; line-height: 10px; color: #555; vertical-align: middle; background-color: #fcfcfc; border: solid 1px #ccc; border-bottom-color: #bbb; border-radius: 3px; box-shadow: inset 0 -1px 0 #bbb; } .pl-c { color: #969896; } .pl-c1, .pl-s .pl-v { color: #0086b3; } .pl-e, .pl-en { color: #795da3; } .pl-s .pl-s1, .pl-smi { color: #333; } .pl-ent { color: #63a35c; } .pl-k { color: #a71d5d; } .pl-pds, .pl-s, .pl-s .pl-pse .pl-s1, .pl-sr, .pl-sr .pl-cce, .pl-sr .pl-sra, .pl-sr .pl-sre { color: #183691; } .pl-v { color: #ed6a43; } .pl-id { color: #b52a1d; } .pl-ii { background-color: #b52a1d; color: #f8f8f8; } .pl-sr .pl-cce { color: #63a35c; font-weight: bold; } .pl-ml { color: #693a17; } .pl-mh, .pl-mh .pl-en, .pl-ms { color: #1d3e81; font-weight: bold; } .pl-mq { color: #008080; } .pl-mi { color: #333; font-style: italic; } .pl-mb { color: #333; font-weight: bold; } .pl-md { background-color: #ffecec; color: #bd2c00; } .pl-mi1 { background-color: #eaffea; color: #55a532; } .pl-mdr { color: #795da3; font-weight: bold; } .pl-mo { color: #1d3e81; } kbd { display: inline-block; padding: 3px 5px; font: 11px Consolas, "Liberation Mono", Menlo, Courier, monospace; line-height: 10px; color: #555; vertical-align: middle; background-color: #fcfcfc; border: solid 1px #ccc; border-bottom-color: #bbb; border-radius: 3px; box-shadow: inset 0 -1px 0 #bbb; } .task-list-item { list-style-type: none; } .task-list-item+.task-list-item { margin-top: 3px; } .task-list-item input { margin: 0 0.35em 0.25em -1.6em; vertical-align: middle; } :checked+.radio-label { z-index: 1; position: relative; border-color: #4078c0; } .sourceLine { display: inline-block; } code .kw { color: #000000; } code .dt { color: #ed6a43; } code .dv { color: #009999; } code .bn { color: #009999; } code .fl { color: #009999; } code .ch { color: #009999; } code .st { color: #183691; } code .co { color: #969896; } code .ot { color: #0086b3; } code .al { color: #a61717; } code .fu { color: #63a35c; } code .er { color: #a61717; background-color: #e3d2d2; } code .wa { color: #000000; } code .cn { color: #008080; } code .sc { color: #008080; } code .vs { color: #183691; } code .ss { color: #183691; } code .im { color: #000000; } code .va {color: #008080; } code .cf { color: #000000; } code .op { color: #000000; } code .bu { color: #000000; } code .ex { color: #000000; } code .pp { color: #999999; } code .at { color: #008080; } code .do { color: #969896; } code .an { color: #008080; } code .cv { color: #008080; } code .in { color: #008080; } </style> <style> body { box-sizing: border-box; min-width: 200px; max-width: 980px; margin: 0 auto; padding: 45px; padding-top: 0px; } </style> </head> <body> <h1 id="tabyls-a-tidy-fully-featured-approach-to-counting-things">tabyls: a tidy, fully-featured approach to counting things</h1> <p>2021-01-04</p> <h2 id="motivation-why-tabyl">Motivation: why tabyl?</h2> <p>Analysts do a lot of counting. Indeed, it’s been said that “data science is mostly counting things.” But the base R function for counting, <code>table()</code>, leaves much to be desired:</p> <ul> <li>It doesn’t accept data.frame inputs (and thus doesn’t play nicely with the <code>%>%</code> pipe)</li> <li>It doesn’t output data.frames</li> <li>Its results are hard to format. Compare the look and formatting choices of an R table to a Microsoft Excel PivotTable or even the table formatting provided by SPSS.</li> </ul> <p><code>tabyl()</code> is an approach to tabulating variables that addresses these shortcomings. It’s part of the janitor package because counting is such a fundamental part of data cleaning and exploration.</p> <p><code>tabyl()</code> is tidyverse-aligned and is primarily built upon the dplyr and tidyr packages.</p> <h2 id="how-it-works">How it works</h2> <p>On its surface, <code>tabyl()</code> produces frequency tables using 1, 2, or 3 variables. Under the hood, <code>tabyl()</code> also attaches a copy of these counts as an attribute of the resulting data.frame.</p> <p>The result looks like a basic data.frame of counts, but because it’s also a <code>tabyl</code> containing this metadata, you can use <code>adorn_</code> functions to add additional information and pretty formatting.</p> <p>The <code>adorn_</code> functions are built to work on <code>tabyls</code>, but have been adapted to work with similar, non-tabyl data.frames that need formatting.</p> <h1 id="examples">Examples</h1> <p>This vignette demonstrates <code>tabyl</code> in the context of studying humans in the <code>starwars</code> dataset from dplyr:</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" title="1"><span class="kw">library</span>(dplyr)</a> <a class="sourceLine" id="cb1-2" title="2">humans <-<span class="st"> </span>starwars <span class="op">%>%</span></a> <a class="sourceLine" id="cb1-3" title="3"><span class="st"> </span><span class="kw">filter</span>(species <span class="op">==</span><span class="st"> "Human"</span>)</a></code></pre></div> <h2 id="one-way-tabyl">One-way tabyl</h2> <p>Tabulating a single variable is the simplest kind of tabyl:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb2-1" title="1"><span class="kw">library</span>(janitor)</a> <a class="sourceLine" id="cb2-2" title="2"></a> <a class="sourceLine" id="cb2-3" title="3">t1 <-<span class="st"> </span>humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb2-4" title="4"><span class="st"> </span><span class="kw">tabyl</span>(eye_color)</a> <a class="sourceLine" id="cb2-5" title="5"></a> <a class="sourceLine" id="cb2-6" title="6">t1</a> <a class="sourceLine" id="cb2-7" title="7"><span class="co">#> eye_color n percent</span></a> <a class="sourceLine" id="cb2-8" title="8"><span class="co">#> blue 12 0.34285714</span></a> <a class="sourceLine" id="cb2-9" title="9"><span class="co">#> blue-gray 1 0.02857143</span></a> <a class="sourceLine" id="cb2-10" title="10"><span class="co">#> brown 17 0.48571429</span></a> <a class="sourceLine" id="cb2-11" title="11"><span class="co">#> dark 1 0.02857143</span></a> <a class="sourceLine" id="cb2-12" title="12"><span class="co">#> hazel 2 0.05714286</span></a> <a class="sourceLine" id="cb2-13" title="13"><span class="co">#> yellow 2 0.05714286</span></a></code></pre></div> <p>When <code>NA</code> values are present, <code>tabyl()</code> also displays “valid” percentages, i.e., with missing values removed from the denominator. And while <code>tabyl()</code> is built to take a data.frame and column names, you can also produce a one-way tabyl by calling it directly on a vector:</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1">x <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"big"</span>, <span class="st">"big"</span>, <span class="st">"small"</span>, <span class="st">"small"</span>, <span class="st">"small"</span>, <span class="ot">NA</span>)</a> <a class="sourceLine" id="cb3-2" title="2"><span class="kw">tabyl</span>(x)</a> <a class="sourceLine" id="cb3-3" title="3"><span class="co">#> x n percent valid_percent</span></a> <a class="sourceLine" id="cb3-4" title="4"><span class="co">#> big 2 0.3333333 0.4</span></a> <a class="sourceLine" id="cb3-5" title="5"><span class="co">#> small 3 0.5000000 0.6</span></a> <a class="sourceLine" id="cb3-6" title="6"><span class="co">#> <NA> 1 0.1666667 NA</span></a></code></pre></div> <p>Most <code>adorn_</code> helper functions are built for 2-way tabyls, but those that make sense for a 1-way tabyl do work:</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1">t1 <span class="op">%>%</span></a> <a class="sourceLine" id="cb4-2" title="2"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="st">"row"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb4-3" title="3"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>()</a> <a class="sourceLine" id="cb4-4" title="4"><span class="co">#> eye_color n percent</span></a> <a class="sourceLine" id="cb4-5" title="5"><span class="co">#> blue 12 34.3%</span></a> <a class="sourceLine" id="cb4-6" title="6"><span class="co">#> blue-gray 1 2.9%</span></a> <a class="sourceLine" id="cb4-7" title="7"><span class="co">#> brown 17 48.6%</span></a> <a class="sourceLine" id="cb4-8" title="8"><span class="co">#> dark 1 2.9%</span></a> <a class="sourceLine" id="cb4-9" title="9"><span class="co">#> hazel 2 5.7%</span></a> <a class="sourceLine" id="cb4-10" title="10"><span class="co">#> yellow 2 5.7%</span></a> <a class="sourceLine" id="cb4-11" title="11"><span class="co">#> Total 35 100.0%</span></a></code></pre></div> <h2 id="two-way-tabyl">Two-way tabyl</h2> <p>This is often called a “crosstab” or “contingency” table. Calling <code>tabyl</code> on two columns of a data.frame produces the same result as the common combination of <code>dplyr::count()</code>, followed by <code>tidyr::pivot_wider()</code> to wide form:</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" title="1">t2 <-<span class="st"> </span>humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb5-2" title="2"><span class="st"> </span><span class="kw">tabyl</span>(gender, eye_color)</a> <a class="sourceLine" id="cb5-3" title="3"></a> <a class="sourceLine" id="cb5-4" title="4">t2</a> <a class="sourceLine" id="cb5-5" title="5"><span class="co">#> gender blue blue-gray brown dark hazel yellow</span></a> <a class="sourceLine" id="cb5-6" title="6"><span class="co">#> feminine 3 0 5 0 1 0</span></a> <a class="sourceLine" id="cb5-7" title="7"><span class="co">#> masculine 9 1 12 1 1 2</span></a></code></pre></div> <p>Since it’s a <code>tabyl</code>, we can enhance it with <code>adorn_</code> helper functions. For instance:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" title="1"></a> <a class="sourceLine" id="cb6-2" title="2">t2 <span class="op">%>%</span></a> <a class="sourceLine" id="cb6-3" title="3"><span class="st"> </span><span class="kw">adorn_percentages</span>(<span class="st">"row"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb6-4" title="4"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>(<span class="dt">digits =</span> <span class="dv">2</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb6-5" title="5"><span class="st"> </span><span class="kw">adorn_ns</span>()</a> <a class="sourceLine" id="cb6-6" title="6"><span class="co">#> gender blue blue-gray brown dark hazel yellow</span></a> <a class="sourceLine" id="cb6-7" title="7"><span class="co">#> feminine 33.33% (3) 0.00% (0) 55.56% (5) 0.00% (0) 11.11% (1) 0.00% (0)</span></a> <a class="sourceLine" id="cb6-8" title="8"><span class="co">#> masculine 34.62% (9) 3.85% (1) 46.15% (12) 3.85% (1) 3.85% (1) 7.69% (2)</span></a></code></pre></div> <p>Adornments have options to control axes, rounding, and other relevant formatting choices (more on that below).</p> <h2 id="three-way-tabyl">Three-way tabyl</h2> <p>Just as <code>table()</code> accepts three variables, so does <code>tabyl()</code>, producing a list of tabyls:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb7-1" title="1">t3 <-<span class="st"> </span>humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb7-2" title="2"><span class="st"> </span><span class="kw">tabyl</span>(eye_color, skin_color, gender)</a> <a class="sourceLine" id="cb7-3" title="3"></a> <a class="sourceLine" id="cb7-4" title="4"><span class="co"># the result is a tabyl of eye color x skin color, split into a list by gender</span></a> <a class="sourceLine" id="cb7-5" title="5">t3 </a> <a class="sourceLine" id="cb7-6" title="6"><span class="co">#> $feminine</span></a> <a class="sourceLine" id="cb7-7" title="7"><span class="co">#> eye_color dark fair light pale tan white</span></a> <a class="sourceLine" id="cb7-8" title="8"><span class="co">#> blue 0 2 1 0 0 0</span></a> <a class="sourceLine" id="cb7-9" title="9"><span class="co">#> blue-gray 0 0 0 0 0 0</span></a> <a class="sourceLine" id="cb7-10" title="10"><span class="co">#> brown 0 1 4 0 0 0</span></a> <a class="sourceLine" id="cb7-11" title="11"><span class="co">#> dark 0 0 0 0 0 0</span></a> <a class="sourceLine" id="cb7-12" title="12"><span class="co">#> hazel 0 0 1 0 0 0</span></a> <a class="sourceLine" id="cb7-13" title="13"><span class="co">#> yellow 0 0 0 0 0 0</span></a> <a class="sourceLine" id="cb7-14" title="14"><span class="co">#> </span></a> <a class="sourceLine" id="cb7-15" title="15"><span class="co">#> $masculine</span></a> <a class="sourceLine" id="cb7-16" title="16"><span class="co">#> eye_color dark fair light pale tan white</span></a> <a class="sourceLine" id="cb7-17" title="17"><span class="co">#> blue 0 7 2 0 0 0</span></a> <a class="sourceLine" id="cb7-18" title="18"><span class="co">#> blue-gray 0 1 0 0 0 0</span></a> <a class="sourceLine" id="cb7-19" title="19"><span class="co">#> brown 3 4 3 0 2 0</span></a> <a class="sourceLine" id="cb7-20" title="20"><span class="co">#> dark 1 0 0 0 0 0</span></a> <a class="sourceLine" id="cb7-21" title="21"><span class="co">#> hazel 0 1 0 0 0 0</span></a> <a class="sourceLine" id="cb7-22" title="22"><span class="co">#> yellow 0 0 0 1 0 1</span></a></code></pre></div> <p>If the <code>adorn_</code> helper functions are called on a list of data.frames - like the output of a three-way <code>tabyl</code> call - they will call <code>purrr::map()</code> to apply themselves to each data.frame in the list:</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" title="1"><span class="kw">library</span>(purrr)</a> <a class="sourceLine" id="cb8-2" title="2">humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-3" title="3"><span class="st"> </span><span class="kw">tabyl</span>(eye_color, skin_color, gender, <span class="dt">show_missing_levels =</span> <span class="ot">FALSE</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-4" title="4"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="st">"row"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-5" title="5"><span class="st"> </span><span class="kw">adorn_percentages</span>(<span class="st">"all"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-6" title="6"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>(<span class="dt">digits =</span> <span class="dv">1</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-7" title="7"><span class="st"> </span>adorn_ns <span class="op">%>%</span></a> <a class="sourceLine" id="cb8-8" title="8"><span class="st"> </span>adorn_title</a> <a class="sourceLine" id="cb8-9" title="9"><span class="co">#> $feminine</span></a> <a class="sourceLine" id="cb8-10" title="10"><span class="co">#> skin_color </span></a> <a class="sourceLine" id="cb8-11" title="11"><span class="co">#> eye_color fair light</span></a> <a class="sourceLine" id="cb8-12" title="12"><span class="co">#> blue 22.2% (2) 11.1% (1)</span></a> <a class="sourceLine" id="cb8-13" title="13"><span class="co">#> brown 11.1% (1) 44.4% (4)</span></a> <a class="sourceLine" id="cb8-14" title="14"><span class="co">#> hazel 0.0% (0) 11.1% (1)</span></a> <a class="sourceLine" id="cb8-15" title="15"><span class="co">#> Total 33.3% (3) 66.7% (6)</span></a> <a class="sourceLine" id="cb8-16" title="16"><span class="co">#> </span></a> <a class="sourceLine" id="cb8-17" title="17"><span class="co">#> $masculine</span></a> <a class="sourceLine" id="cb8-18" title="18"><span class="co">#> skin_color </span></a> <a class="sourceLine" id="cb8-19" title="19"><span class="co">#> eye_color dark fair light pale tan white</span></a> <a class="sourceLine" id="cb8-20" title="20"><span class="co">#> blue 0.0% (0) 26.9% (7) 7.7% (2) 0.0% (0) 0.0% (0) 0.0% (0)</span></a> <a class="sourceLine" id="cb8-21" title="21"><span class="co">#> blue-gray 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)</span></a> <a class="sourceLine" id="cb8-22" title="22"><span class="co">#> brown 11.5% (3) 15.4% (4) 11.5% (3) 0.0% (0) 7.7% (2) 0.0% (0)</span></a> <a class="sourceLine" id="cb8-23" title="23"><span class="co">#> dark 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)</span></a> <a class="sourceLine" id="cb8-24" title="24"><span class="co">#> hazel 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)</span></a> <a class="sourceLine" id="cb8-25" title="25"><span class="co">#> yellow 0.0% (0) 0.0% (0) 0.0% (0) 3.8% (1) 0.0% (0) 3.8% (1)</span></a> <a class="sourceLine" id="cb8-26" title="26"><span class="co">#> Total 15.4% (4) 50.0% (13) 19.2% (5) 3.8% (1) 7.7% (2) 3.8% (1)</span></a></code></pre></div> <p>This automatic mapping supports interactive data analysis that switches between combinations of 2 and 3 variables. That way, if a user starts with <code>humans %>% tabyl(eye_color, skin_color)</code>, adds some <code>adorn_</code> calls, then decides to split the tabulation by gender and modifies their first line to <code>humans %>% tabyl(eye_color, skin_color, gender</code>), they don’t have to rewrite the subsequent adornment calls to use <code>map()</code>.</p> <p>However, if feels more natural to call these with <code>map()</code> or <code>lapply()</code>, that is still supported. For instance, <code>t3 %>% lapply(adorn_percentages)</code> would produce the same result as <code>t3 %>% adorn_percentages</code>.</p> <h3 id="other-features-of-tabyls">Other features of tabyls</h3> <ul> <li>When called on a factor, <code>tabyl</code> will show missing levels (levels not present in the data) in the result <ul> <li>This can be suppressed if not desired</li> </ul></li> <li><code>NA</code> values can be displayed or suppressed</li> <li><code>tabyls</code> print without displaying row numbers</li> </ul> <p>You can call <code>chisq.test()</code> and <code>fisher.test()</code> on a two-way tabyl to perform those statistical tests, just like on a base R <code>table()</code> object.</p> <h2 id="the-adorn_-functions">The <code>adorn_*</code> functions</h2> <p>These modular functions build on a <code>tabyl</code> to approximate the functionality of a PivotTable in Microsoft Excel. They print elegant results for interactive analysis or for sharing in a report, e.g., with <code>knitr::kable()</code>. For example:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb9-1" title="1">humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-2" title="2"><span class="st"> </span><span class="kw">tabyl</span>(gender, eye_color) <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-3" title="3"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="kw">c</span>(<span class="st">"row"</span>, <span class="st">"col"</span>)) <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-4" title="4"><span class="st"> </span><span class="kw">adorn_percentages</span>(<span class="st">"row"</span>) <span class="op">%>%</span><span class="st"> </span></a> <a class="sourceLine" id="cb9-5" title="5"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>(<span class="dt">rounding =</span> <span class="st">"half up"</span>, <span class="dt">digits =</span> <span class="dv">0</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-6" title="6"><span class="st"> </span><span class="kw">adorn_ns</span>() <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-7" title="7"><span class="st"> </span><span class="kw">adorn_title</span>(<span class="st">"combined"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb9-8" title="8"><span class="st"> </span>knitr<span class="op">::</span><span class="kw">kable</span>()</a></code></pre></div> <table> <thead> <tr class="header"> <th align="left">gender/eye_color</th> <th align="left">blue</th> <th align="left">blue-gray</th> <th align="left">brown</th> <th align="left">dark</th> <th align="left">hazel</th> <th align="left">yellow</th> <th align="left">Total</th> </tr> </thead> <tbody> <tr class="odd"> <td align="left">feminine</td> <td align="left">33% (3)</td> <td align="left">0% (0)</td> <td align="left">56% (5)</td> <td align="left">0% (0)</td> <td align="left">11% (1)</td> <td align="left">0% (0)</td> <td align="left">100% (9)</td> </tr> <tr class="even"> <td align="left">masculine</td> <td align="left">35% (9)</td> <td align="left">4% (1)</td> <td align="left">46% (12)</td> <td align="left">4% (1)</td> <td align="left">4% (1)</td> <td align="left">8% (2)</td> <td align="left">100% (26)</td> </tr> <tr class="odd"> <td align="left">Total</td> <td align="left">34% (12)</td> <td align="left">3% (1)</td> <td align="left">49% (17)</td> <td align="left">3% (1)</td> <td align="left">6% (2)</td> <td align="left">6% (2)</td> <td align="left">100% (35)</td> </tr> </tbody> </table> <h3 id="the-adorn-functions-are">The adorn functions are:</h3> <ul> <li><strong><code>adorn_totals()</code></strong>: Add totals row, column, or both.</li> <li><strong><code>adorn_percentages()</code></strong>: Calculate percentages along either axis or over the entire tabyl</li> <li><strong><code>adorn_pct_formatting()</code></strong>: Format percentage columns, controlling the number of digits to display and whether to append the <code>%</code> symbol</li> <li><strong><code>adorn_rounding()</code></strong>: Round a data.frame of numbers (usually the result of <code>adorn_percentages</code>), either using the base R <code>round()</code> function or using janitor’s <code>round_half_up()</code> to round all ties up (<a href="https://stackoverflow.com/a/12688836/4470365">thanks, StackOverflow</a>). <ul> <li>e.g., round 10.5 up to 11, consistent with Excel’s tie-breaking behavior. <ul> <li>This contrasts with rounding 10.5 down to 10 as in base R’s <code>round(10.5)</code>.</li> </ul></li> <li><code>adorn_rounding()</code> returns columns of class <code>numeric</code>, allowing for graphing, sorting, etc. It’s a less-aggressive substitute for <code>adorn_pct_formatting()</code>; these two functions should not be called together.</li> </ul></li> <li><strong><code>adorn_ns()</code></strong>: add Ns to a tabyl. These can be drawn from the tabyl’s underlying counts, which are attached to the tabyl as metadata, or they can be supplied by the user.</li> <li><strong><code>adorn_title()</code></strong>: add a title to a tabyl (or other data.frame). Options include putting the column title in a new row on top of the data.frame or combining the row and column titles in the data.frame’s first name slot.</li> </ul> <p>These adornments should be called in a logical order, e.g., you probably want to add totals before percentages are calculated. In general, call them in the order they appear above.</p> <h2 id="byot-bring-your-own-tabyl">BYOt (Bring Your Own tabyl)</h2> <p>You can also call <code>adorn_</code> functions on other data.frames, not only the results of calls to <code>tabyl()</code>. E.g., <code>mtcars %>% adorn_totals("col") %>% adorn_percentages("col")</code> performs as expected, despite <code>mtcars</code> not being a <code>tabyl</code>.</p> <p>This can be handy when you have a data.frame that is not a simple tabulation generated by <code>tabyl</code> but would still benefit from the <code>adorn_</code> formatting functions.</p> <p>A simple example: calculate the proportion of records meeting a certain condition, then format the results.</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb10-1" title="1">percent_above_<span class="dv">165</span>_cm <-<span class="st"> </span>humans <span class="op">%>%</span></a> <a class="sourceLine" id="cb10-2" title="2"><span class="st"> </span><span class="kw">group_by</span>(gender) <span class="op">%>%</span></a> <a class="sourceLine" id="cb10-3" title="3"><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">pct_above_165_cm =</span> <span class="kw">mean</span>(height <span class="op">></span><span class="st"> </span><span class="dv">165</span>, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>), <span class="dt">.groups =</span> <span class="st">"drop"</span>)</a> <a class="sourceLine" id="cb10-4" title="4"></a> <a class="sourceLine" id="cb10-5" title="5">percent_above_<span class="dv">165</span>_cm <span class="op">%>%</span></a> <a class="sourceLine" id="cb10-6" title="6"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>()</a> <a class="sourceLine" id="cb10-7" title="7"><span class="co">#> # A tibble: 2 x 2</span></a> <a class="sourceLine" id="cb10-8" title="8"><span class="co">#> gender pct_above_165_cm</span></a> <a class="sourceLine" id="cb10-9" title="9"><span class="co">#> <chr> <chr> </span></a> <a class="sourceLine" id="cb10-10" title="10"><span class="co">#> 1 feminine 12.5% </span></a> <a class="sourceLine" id="cb10-11" title="11"><span class="co">#> 2 masculine 100.0%</span></a></code></pre></div> <p>You can control which columns are adorned by using the <code>...</code> argument. It accepts the <a href="https://r4ds.had.co.nz/transform.html#select">tidyselect helpers</a>. That is, you can specify columns the same way you would using <code>dplyr::select()</code>.</p> <p>For instance, say you have a numeric column that should not be included in percentage formatting and you wish to exempt it. Here, only the <code>proportion</code> column is adorned:</p> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb11-1" title="1">mtcars <span class="op">%>%</span></a> <a class="sourceLine" id="cb11-2" title="2"><span class="st"> </span><span class="kw">count</span>(cyl, gear) <span class="op">%>%</span></a> <a class="sourceLine" id="cb11-3" title="3"><span class="st"> </span><span class="kw">rename</span>(<span class="dt">proportion =</span> n) <span class="op">%>%</span></a> <a class="sourceLine" id="cb11-4" title="4"><span class="st"> </span><span class="kw">adorn_percentages</span>(<span class="st">"col"</span>, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>, proportion) <span class="op">%>%</span></a> <a class="sourceLine" id="cb11-5" title="5"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>(,,,proportion) <span class="co"># the commas say to use the default values of the other arguments</span></a> <a class="sourceLine" id="cb11-6" title="6"><span class="co">#> cyl gear proportion</span></a> <a class="sourceLine" id="cb11-7" title="7"><span class="co">#> 4 3 3.1%</span></a> <a class="sourceLine" id="cb11-8" title="8"><span class="co">#> 4 4 25.0%</span></a> <a class="sourceLine" id="cb11-9" title="9"><span class="co">#> 4 5 6.2%</span></a> <a class="sourceLine" id="cb11-10" title="10"><span class="co">#> 6 3 6.2%</span></a> <a class="sourceLine" id="cb11-11" title="11"><span class="co">#> 6 4 12.5%</span></a> <a class="sourceLine" id="cb11-12" title="12"><span class="co">#> 6 5 3.1%</span></a> <a class="sourceLine" id="cb11-13" title="13"><span class="co">#> 8 3 37.5%</span></a> <a class="sourceLine" id="cb11-14" title="14"><span class="co">#> 8 5 6.2%</span></a></code></pre></div> <p>Here we specify that only two consecutive numeric columns should be totaled (<code>year</code> is numeric but should not be included):</p> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb12-1" title="1">cases <-<span class="st"> </span><span class="kw">data.frame</span>(</a> <a class="sourceLine" id="cb12-2" title="2"> <span class="dt">region =</span> <span class="kw">c</span>(<span class="st">"East"</span>, <span class="st">"West"</span>),</a> <a class="sourceLine" id="cb12-3" title="3"> <span class="dt">year =</span> <span class="dv">2015</span>,</a> <a class="sourceLine" id="cb12-4" title="4"> <span class="dt">recovered =</span> <span class="kw">c</span>(<span class="dv">125</span>, <span class="dv">87</span>),</a> <a class="sourceLine" id="cb12-5" title="5"> <span class="dt">died =</span> <span class="kw">c</span>(<span class="dv">13</span>, <span class="dv">12</span>)</a> <a class="sourceLine" id="cb12-6" title="6">)</a> <a class="sourceLine" id="cb12-7" title="7"></a> <a class="sourceLine" id="cb12-8" title="8">cases <span class="op">%>%</span></a> <a class="sourceLine" id="cb12-9" title="9"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="kw">c</span>(<span class="st">"col"</span>, <span class="st">"row"</span>), <span class="dt">fill =</span> <span class="st">"-"</span>, <span class="dt">na.rm =</span> <span class="ot">TRUE</span>, <span class="dt">name =</span> <span class="st">"Total Cases"</span>, recovered<span class="op">:</span>died)</a> <a class="sourceLine" id="cb12-10" title="10"><span class="co">#> region year recovered died Total Cases</span></a> <a class="sourceLine" id="cb12-11" title="11"><span class="co">#> East 2015 125 13 138</span></a> <a class="sourceLine" id="cb12-12" title="12"><span class="co">#> West 2015 87 12 99</span></a> <a class="sourceLine" id="cb12-13" title="13"><span class="co">#> Total Cases - 212 25 237</span></a></code></pre></div> <p>Here’s a more complex example that uses a data.frame of means, not counts. We create a table containing the mean of a 3rd variable when grouped by two other variables, then use <code>adorn_</code> functions to round the values and append Ns. The first part is pretty straightforward:</p> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb13-1" title="1"><span class="kw">library</span>(tidyr) <span class="co"># for spread()</span></a> <a class="sourceLine" id="cb13-2" title="2">mpg_by_cyl_and_am <-<span class="st"> </span>mtcars <span class="op">%>%</span></a> <a class="sourceLine" id="cb13-3" title="3"><span class="st"> </span><span class="kw">group_by</span>(cyl, am) <span class="op">%>%</span></a> <a class="sourceLine" id="cb13-4" title="4"><span class="st"> </span><span class="kw">summarise</span>(<span class="dt">mpg =</span> <span class="kw">mean</span>(mpg), <span class="dt">.groups =</span> <span class="st">"drop"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb13-5" title="5"><span class="st"> </span><span class="kw">spread</span>(am, mpg)</a> <a class="sourceLine" id="cb13-6" title="6"></a> <a class="sourceLine" id="cb13-7" title="7">mpg_by_cyl_and_am</a> <a class="sourceLine" id="cb13-8" title="8"><span class="co">#> # A tibble: 3 x 3</span></a> <a class="sourceLine" id="cb13-9" title="9"><span class="co">#> cyl `0` `1`</span></a> <a class="sourceLine" id="cb13-10" title="10"><span class="co">#> <dbl> <dbl> <dbl></span></a> <a class="sourceLine" id="cb13-11" title="11"><span class="co">#> 1 4 22.9 28.1</span></a> <a class="sourceLine" id="cb13-12" title="12"><span class="co">#> 2 6 19.1 20.6</span></a> <a class="sourceLine" id="cb13-13" title="13"><span class="co">#> 3 8 15.0 15.4</span></a></code></pre></div> <p>Now to <code>adorn_</code> it. Since this is not the result of a <code>tabyl()</code> call, it doesn’t have the underlying Ns stored in the <code>core</code> attribute, so we’ll have to supply them:</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb14-1" title="1">mpg_by_cyl_and_am <span class="op">%>%</span></a> <a class="sourceLine" id="cb14-2" title="2"><span class="st"> </span><span class="kw">adorn_rounding</span>() <span class="op">%>%</span></a> <a class="sourceLine" id="cb14-3" title="3"><span class="st"> </span><span class="kw">adorn_ns</span>(</a> <a class="sourceLine" id="cb14-4" title="4"> <span class="dt">ns =</span> mtcars <span class="op">%>%</span><span class="st"> </span><span class="co"># calculate the Ns on the fly by calling tabyl on the original data</span></a> <a class="sourceLine" id="cb14-5" title="5"><span class="st"> </span><span class="kw">tabyl</span>(cyl, am)</a> <a class="sourceLine" id="cb14-6" title="6"> ) <span class="op">%>%</span></a> <a class="sourceLine" id="cb14-7" title="7"><span class="st"> </span><span class="kw">adorn_title</span>(<span class="st">"combined"</span>, <span class="dt">row_name =</span> <span class="st">"Cylinders"</span>, <span class="dt">col_name =</span> <span class="st">"Is Automatic"</span>)</a> <a class="sourceLine" id="cb14-8" title="8"><span class="co">#> Cylinders/Is Automatic 0 1</span></a> <a class="sourceLine" id="cb14-9" title="9"><span class="co">#> 1 4 22.9 (3) 28.1 (8)</span></a> <a class="sourceLine" id="cb14-10" title="10"><span class="co">#> 2 6 19.1 (4) 20.6 (3)</span></a> <a class="sourceLine" id="cb14-11" title="11"><span class="co">#> 3 8 15.1 (12) 15.4 (2)</span></a></code></pre></div> <p>If needed, Ns can be manipulated in their own data.frame before they are appended. Here a tabyl with values in the thousands has its Ns formatted to include the separating character <code>,</code> as typically seen in American numbers, e.g., <code>3,000</code>.</p> <p>First we create the tabyl to adorn:</p> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb15-1" title="1"><span class="kw">set.seed</span>(<span class="dv">1</span>)</a> <a class="sourceLine" id="cb15-2" title="2">raw_data <-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">sex =</span> <span class="kw">rep</span>(<span class="kw">c</span>(<span class="st">"m"</span>, <span class="st">"f"</span>), <span class="dv">3000</span>),</a> <a class="sourceLine" id="cb15-3" title="3"> <span class="dt">age =</span> <span class="kw">round</span>(<span class="kw">runif</span>(<span class="dv">3000</span>, <span class="dv">1</span>, <span class="dv">102</span>), <span class="dv">0</span>))</a> <a class="sourceLine" id="cb15-4" title="4">raw_data<span class="op">$</span>agegroup =<span class="st"> </span><span class="kw">cut</span>(raw_data<span class="op">$</span>age, <span class="kw">quantile</span>(raw_data<span class="op">$</span>age, <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">1</span><span class="op">/</span><span class="dv">3</span>, <span class="dv">2</span><span class="op">/</span><span class="dv">3</span>, <span class="dv">1</span>)))</a> <a class="sourceLine" id="cb15-5" title="5"></a> <a class="sourceLine" id="cb15-6" title="6">comparison <-<span class="st"> </span>raw_data <span class="op">%>%</span></a> <a class="sourceLine" id="cb15-7" title="7"><span class="st"> </span><span class="kw">tabyl</span>(agegroup, sex, <span class="dt">show_missing_levels =</span> F) <span class="op">%>%</span></a> <a class="sourceLine" id="cb15-8" title="8"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="kw">c</span>(<span class="st">"row"</span>, <span class="st">"col"</span>)) <span class="op">%>%</span></a> <a class="sourceLine" id="cb15-9" title="9"><span class="st"> </span><span class="kw">adorn_percentages</span>(<span class="st">"col"</span>) <span class="op">%>%</span></a> <a class="sourceLine" id="cb15-10" title="10"><span class="st"> </span><span class="kw">adorn_pct_formatting</span>(<span class="dt">digits =</span> <span class="dv">1</span>)</a> <a class="sourceLine" id="cb15-11" title="11"></a> <a class="sourceLine" id="cb15-12" title="12">comparison</a> <a class="sourceLine" id="cb15-13" title="13"><span class="co">#> agegroup f m Total</span></a> <a class="sourceLine" id="cb15-14" title="14"><span class="co">#> (1,34] 33.9% 32.3% 33.1%</span></a> <a class="sourceLine" id="cb15-15" title="15"><span class="co">#> (34,68] 33.0% 33.7% 33.4%</span></a> <a class="sourceLine" id="cb15-16" title="16"><span class="co">#> (68,102] 32.7% 33.3% 33.0%</span></a> <a class="sourceLine" id="cb15-17" title="17"><span class="co">#> <NA> 0.4% 0.6% 0.5%</span></a> <a class="sourceLine" id="cb15-18" title="18"><span class="co">#> Total 100.0% 100.0% 100.0%</span></a></code></pre></div> <p>At this point, the Ns are unformatted:</p> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb16-1" title="1">comparison <span class="op">%>%</span></a> <a class="sourceLine" id="cb16-2" title="2"><span class="st"> </span><span class="kw">adorn_ns</span>()</a> <a class="sourceLine" id="cb16-3" title="3"><span class="co">#> agegroup f m Total</span></a> <a class="sourceLine" id="cb16-4" title="4"><span class="co">#> (1,34] 33.9% (1018) 32.3% (970) 33.1% (1988)</span></a> <a class="sourceLine" id="cb16-5" title="5"><span class="co">#> (34,68] 33.0% (990) 33.7% (1012) 33.4% (2002)</span></a> <a class="sourceLine" id="cb16-6" title="6"><span class="co">#> (68,102] 32.7% (980) 33.3% (1000) 33.0% (1980)</span></a> <a class="sourceLine" id="cb16-7" title="7"><span class="co">#> <NA> 0.4% (12) 0.6% (18) 0.5% (30)</span></a> <a class="sourceLine" id="cb16-8" title="8"><span class="co">#> Total 100.0% (3000) 100.0% (3000) 100.0% (6000)</span></a></code></pre></div> <p>Now we format them to insert the thousands commas. A tabyl’s raw Ns are stored in its <code>"core"</code> attribute. Here we retrieve those with <code>attr()</code>, then apply the base R function <code>format()</code> to all numeric columns. Lastly, we append these Ns using <code>adorn_ns()</code>.</p> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb17-1" title="1">formatted_ns <-<span class="st"> </span><span class="kw">attr</span>(comparison, <span class="st">"core"</span>) <span class="op">%>%</span><span class="st"> </span><span class="co"># extract the tabyl's underlying Ns</span></a> <a class="sourceLine" id="cb17-2" title="2"><span class="st"> </span><span class="kw">adorn_totals</span>(<span class="kw">c</span>(<span class="st">"row"</span>, <span class="st">"col"</span>)) <span class="op">%>%</span><span class="st"> </span><span class="co"># to match the data.frame we're appending to</span></a> <a class="sourceLine" id="cb17-3" title="3"><span class="st"> </span>dplyr<span class="op">::</span><span class="kw">mutate_if</span>(is.numeric, format, <span class="dt">big.mark =</span> <span class="st">","</span>)</a> <a class="sourceLine" id="cb17-4" title="4"></a> <a class="sourceLine" id="cb17-5" title="5">comparison <span class="op">%>%</span></a> <a class="sourceLine" id="cb17-6" title="6"><span class="st"> </span><span class="kw">adorn_ns</span>(<span class="dt">position =</span> <span class="st">"rear"</span>, <span class="dt">ns =</span> formatted_ns)</a> <a class="sourceLine" id="cb17-7" title="7"><span class="co">#> agegroup f m Total</span></a> <a class="sourceLine" id="cb17-8" title="8"><span class="co">#> (1,34] 33.9% (1,018) 32.3% ( 970) 33.1% (1,988)</span></a> <a class="sourceLine" id="cb17-9" title="9"><span class="co">#> (34,68] 33.0% ( 990) 33.7% (1,012) 33.4% (2,002)</span></a> <a class="sourceLine" id="cb17-10" title="10"><span class="co">#> (68,102] 32.7% ( 980) 33.3% (1,000) 33.0% (1,980)</span></a> <a class="sourceLine" id="cb17-11" title="11"><span class="co">#> <NA> 0.4% ( 12) 0.6% ( 18) 0.5% ( 30)</span></a> <a class="sourceLine" id="cb17-12" title="12"><span class="co">#> Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)</span></a></code></pre></div> <h3 id="questions-comments">Questions? Comments?</h3> <p>File <a href="https://github.com/sfirke/janitor/issues">an issue on GitHub</a> if you have suggestions related to <code>tabyl()</code> and its <code>adorn_</code> helpers or encounter problems while using them.</p> </body> </html>