EVOLUTION-MANAGER
Edit File: datatable-reshape.html
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <meta name="date" content="2022-10-09" /> <title>Efficient reshaping using data.tables</title> <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); </script> <script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> // v0.0.1 // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. document.addEventListener('DOMContentLoaded', function() { const codeList = document.getElementsByClassName("sourceCode"); for (var i = 0; i < codeList.length; i++) { var linkList = codeList[i].getElementsByTagName('a'); for (var j = 0; j < linkList.length; j++) { if (linkList[j].innerHTML === "") { linkList[j].setAttribute('aria-hidden', 'true'); } } } }); </script> <style type="text/css"> code{white-space: pre-wrap;} span.smallcaps{font-variant: small-caps;} span.underline{text-decoration: underline;} div.column{display: inline-block; vertical-align: top; width: 50%;} div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} ul.task-list{list-style: none;} </style> <style type="text/css"> code { white-space: pre; } .sourceCode { overflow: visible; } </style> <style type="text/css" data-origin="pandoc"> pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } /* Alert */ code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ code span.at { color: #7d9029; } /* Attribute */ code span.bn { color: #40a070; } /* BaseN */ code span.bu { } /* BuiltIn */ code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ code span.ch { color: #4070a0; } /* Char */ code span.cn { color: #880000; } /* Constant */ code span.co { color: #60a0b0; font-style: italic; } /* Comment */ code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ code span.do { color: #ba2121; font-style: italic; } /* Documentation */ code span.dt { color: #902000; } /* DataType */ code span.dv { color: #40a070; } /* DecVal */ code span.er { color: #ff0000; font-weight: bold; } /* Error */ code span.ex { } /* Extension */ code span.fl { color: #40a070; } /* Float */ code span.fu { color: #06287e; } /* Function */ code span.im { } /* Import */ code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ code span.kw { color: #007020; font-weight: bold; } /* Keyword */ code span.op { color: #666666; } /* Operator */ code span.ot { color: #007020; } /* Other */ code span.pp { color: #bc7a00; } /* Preprocessor */ code span.sc { color: #4070a0; } /* SpecialChar */ code span.ss { color: #bb6688; } /* SpecialString */ code span.st { color: #4070a0; } /* String */ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } var j = 0; while (j < rules.length) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") { j++; continue; } var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') { j++; continue; } // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Efficient reshaping using data.tables</h1> <h4 class="date">2022-10-09</h4> <p>This vignette discusses the default usage of reshaping functions <code>melt</code> (wide to long) and <code>dcast</code> (long to wide) for <em>data.tables</em> as well as the <strong>new extended functionalities</strong> of melting and casting on <em>multiple columns</em> available from <code>v1.9.6</code>.</p> <hr /> <div id="data" class="section level2"> <h2>Data</h2> <p>We will load the data sets directly within sections.</p> </div> <div id="introduction" class="section level2"> <h2>Introduction</h2> <p>The <code>melt</code> and <code>dcast</code> functions for <code>data.table</code>s are for reshaping wide-to-long and long-to-wide, respectively; the implementations are specifically designed with large in-memory data (e.g. 10Gb) in mind.</p> <p>In this vignette, we will</p> <ol style="list-style-type: decimal"> <li><p>First briefly look at the default <code>melt</code>ing and <code>dcast</code>ing of <code>data.table</code>s to convert them from <em>wide</em> to <em>long</em> format and <em>vice versa</em></p></li> <li><p>Look at scenarios where the current functionalities become cumbersome and inefficient</p></li> <li><p>Finally look at the new improvements to both <code>melt</code> and <code>dcast</code> methods for <code>data.table</code>s to handle multiple columns simultaneously.</p></li> </ol> <p>The extended functionalities are in line with <code>data.table</code>’s philosophy of performing operations efficiently and in a straightforward manner.</p> </div> <div id="default-functionality" class="section level2"> <h2>1. Default functionality</h2> <div id="a-melting-data.tables-wide-to-long" class="section level3"> <h3>a) <code>melt</code>ing <code>data.table</code>s (wide to long)</h3> <p>Suppose we have a <code>data.table</code> (artificial data) as shown below:</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true"></a>s1 <-<span class="st"> "family_id age_mother dob_child1 dob_child2 dob_child3</span></span> <span id="cb1-2"><a href="#cb1-2" aria-hidden="true"></a><span class="st">1 30 1998-11-26 2000-01-29 NA</span></span> <span id="cb1-3"><a href="#cb1-3" aria-hidden="true"></a><span class="st">2 27 1996-06-22 NA NA</span></span> <span id="cb1-4"><a href="#cb1-4" aria-hidden="true"></a><span class="st">3 26 2002-07-11 2004-04-05 2007-09-02</span></span> <span id="cb1-5"><a href="#cb1-5" aria-hidden="true"></a><span class="st">4 32 2004-10-10 2009-08-27 2012-07-21</span></span> <span id="cb1-6"><a href="#cb1-6" aria-hidden="true"></a><span class="st">5 29 2000-12-05 2005-02-28 NA"</span></span> <span id="cb1-7"><a href="#cb1-7" aria-hidden="true"></a>DT <-<span class="st"> </span><span class="kw">fread</span>(s1)</span> <span id="cb1-8"><a href="#cb1-8" aria-hidden="true"></a>DT</span> <span id="cb1-9"><a href="#cb1-9" aria-hidden="true"></a><span class="co"># family_id age_mother dob_child1 dob_child2 dob_child3</span></span> <span id="cb1-10"><a href="#cb1-10" aria-hidden="true"></a><span class="co"># 1: 1 30 1998-11-26 2000-01-29 <NA></span></span> <span id="cb1-11"><a href="#cb1-11" aria-hidden="true"></a><span class="co"># 2: 2 27 1996-06-22 <NA> <NA></span></span> <span id="cb1-12"><a href="#cb1-12" aria-hidden="true"></a><span class="co"># 3: 3 26 2002-07-11 2004-04-05 2007-09-02</span></span> <span id="cb1-13"><a href="#cb1-13" aria-hidden="true"></a><span class="co"># 4: 4 32 2004-10-10 2009-08-27 2012-07-21</span></span> <span id="cb1-14"><a href="#cb1-14" aria-hidden="true"></a><span class="co"># 5: 5 29 2000-12-05 2005-02-28 <NA></span></span> <span id="cb1-15"><a href="#cb1-15" aria-hidden="true"></a><span class="co">## dob stands for date of birth.</span></span> <span id="cb1-16"><a href="#cb1-16" aria-hidden="true"></a></span> <span id="cb1-17"><a href="#cb1-17" aria-hidden="true"></a><span class="kw">str</span>(DT)</span> <span id="cb1-18"><a href="#cb1-18" aria-hidden="true"></a><span class="co"># Classes 'data.table' and 'data.frame': 5 obs. of 5 variables:</span></span> <span id="cb1-19"><a href="#cb1-19" aria-hidden="true"></a><span class="co"># $ family_id : int 1 2 3 4 5</span></span> <span id="cb1-20"><a href="#cb1-20" aria-hidden="true"></a><span class="co"># $ age_mother: int 30 27 26 32 29</span></span> <span id="cb1-21"><a href="#cb1-21" aria-hidden="true"></a><span class="co"># $ dob_child1: IDate, format: "1998-11-26" "1996-06-22" "2002-07-11" ...</span></span> <span id="cb1-22"><a href="#cb1-22" aria-hidden="true"></a><span class="co"># $ dob_child2: IDate, format: "2000-01-29" NA "2004-04-05" ...</span></span> <span id="cb1-23"><a href="#cb1-23" aria-hidden="true"></a><span class="co"># $ dob_child3: IDate, format: NA NA "2007-09-02" ...</span></span> <span id="cb1-24"><a href="#cb1-24" aria-hidden="true"></a><span class="co"># - attr(*, ".internal.selfref")=<externalptr></span></span></code></pre></div> <div id="convert-dt-to-long-form-where-each-dob-is-a-separate-observation." class="section level4"> <h4>- Convert <code>DT</code> to <em>long</em> form where each <code>dob</code> is a separate observation.</h4> <p>We could accomplish this using <code>melt()</code> by specifying <code>id.vars</code> and <code>measure.vars</code> arguments as follows:</p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true"></a>DT.m1 =<span class="st"> </span><span class="kw">melt</span>(DT, <span class="dt">id.vars =</span> <span class="kw">c</span>(<span class="st">"family_id"</span>, <span class="st">"age_mother"</span>),</span> <span id="cb2-2"><a href="#cb2-2" aria-hidden="true"></a> <span class="dt">measure.vars =</span> <span class="kw">c</span>(<span class="st">"dob_child1"</span>, <span class="st">"dob_child2"</span>, <span class="st">"dob_child3"</span>))</span> <span id="cb2-3"><a href="#cb2-3" aria-hidden="true"></a>DT.m1</span> <span id="cb2-4"><a href="#cb2-4" aria-hidden="true"></a><span class="co"># family_id age_mother variable value</span></span> <span id="cb2-5"><a href="#cb2-5" aria-hidden="true"></a><span class="co"># 1: 1 30 dob_child1 1998-11-26</span></span> <span id="cb2-6"><a href="#cb2-6" aria-hidden="true"></a><span class="co"># 2: 2 27 dob_child1 1996-06-22</span></span> <span id="cb2-7"><a href="#cb2-7" aria-hidden="true"></a><span class="co"># 3: 3 26 dob_child1 2002-07-11</span></span> <span id="cb2-8"><a href="#cb2-8" aria-hidden="true"></a><span class="co"># 4: 4 32 dob_child1 2004-10-10</span></span> <span id="cb2-9"><a href="#cb2-9" aria-hidden="true"></a><span class="co"># 5: 5 29 dob_child1 2000-12-05</span></span> <span id="cb2-10"><a href="#cb2-10" aria-hidden="true"></a><span class="co"># 6: 1 30 dob_child2 2000-01-29</span></span> <span id="cb2-11"><a href="#cb2-11" aria-hidden="true"></a><span class="co"># 7: 2 27 dob_child2 <NA></span></span> <span id="cb2-12"><a href="#cb2-12" aria-hidden="true"></a><span class="co"># 8: 3 26 dob_child2 2004-04-05</span></span> <span id="cb2-13"><a href="#cb2-13" aria-hidden="true"></a><span class="co"># 9: 4 32 dob_child2 2009-08-27</span></span> <span id="cb2-14"><a href="#cb2-14" aria-hidden="true"></a><span class="co"># 10: 5 29 dob_child2 2005-02-28</span></span> <span id="cb2-15"><a href="#cb2-15" aria-hidden="true"></a><span class="co"># 11: 1 30 dob_child3 <NA></span></span> <span id="cb2-16"><a href="#cb2-16" aria-hidden="true"></a><span class="co"># 12: 2 27 dob_child3 <NA></span></span> <span id="cb2-17"><a href="#cb2-17" aria-hidden="true"></a><span class="co"># 13: 3 26 dob_child3 2007-09-02</span></span> <span id="cb2-18"><a href="#cb2-18" aria-hidden="true"></a><span class="co"># 14: 4 32 dob_child3 2012-07-21</span></span> <span id="cb2-19"><a href="#cb2-19" aria-hidden="true"></a><span class="co"># 15: 5 29 dob_child3 <NA></span></span> <span id="cb2-20"><a href="#cb2-20" aria-hidden="true"></a><span class="kw">str</span>(DT.m1)</span> <span id="cb2-21"><a href="#cb2-21" aria-hidden="true"></a><span class="co"># Classes 'data.table' and 'data.frame': 15 obs. of 4 variables:</span></span> <span id="cb2-22"><a href="#cb2-22" aria-hidden="true"></a><span class="co"># $ family_id : int 1 2 3 4 5 1 2 3 4 5 ...</span></span> <span id="cb2-23"><a href="#cb2-23" aria-hidden="true"></a><span class="co"># $ age_mother: int 30 27 26 32 29 30 27 26 32 29 ...</span></span> <span id="cb2-24"><a href="#cb2-24" aria-hidden="true"></a><span class="co"># $ variable : Factor w/ 3 levels "dob_child1","dob_child2",..: 1 1 1 1 1 2 2 2 2 2 ...</span></span> <span id="cb2-25"><a href="#cb2-25" aria-hidden="true"></a><span class="co"># $ value : IDate, format: "1998-11-26" "1996-06-22" "2002-07-11" ...</span></span> <span id="cb2-26"><a href="#cb2-26" aria-hidden="true"></a><span class="co"># - attr(*, ".internal.selfref")=<externalptr></span></span></code></pre></div> </div> <div id="section" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info"></h4> <ul> <li><p><code>measure.vars</code> specify the set of columns we would like to collapse (or combine) together.</p></li> <li><p>We can also specify column <em>indices</em> instead of <em>names</em>.</p></li> <li><p>By default, <code>variable</code> column is of type <code>factor</code>. Set <code>variable.factor</code> argument to <code>FALSE</code> if you’d like to return a <em><code>character</code></em> vector instead.</p></li> <li><p>By default, the molten columns are automatically named <code>variable</code> and <code>value</code>.</p></li> <li><p><code>melt</code> preserves column attributes in result.</p></li> </ul> </div> <div id="name-the-variable-and-value-columns-to-child-and-dob-respectively" class="section level4"> <h4>- Name the <code>variable</code> and <code>value</code> columns to <code>child</code> and <code>dob</code> respectively</h4> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true"></a>DT.m1 =<span class="st"> </span><span class="kw">melt</span>(DT, <span class="dt">measure.vars =</span> <span class="kw">c</span>(<span class="st">"dob_child1"</span>, <span class="st">"dob_child2"</span>, <span class="st">"dob_child3"</span>),</span> <span id="cb3-2"><a href="#cb3-2" aria-hidden="true"></a> <span class="dt">variable.name =</span> <span class="st">"child"</span>, <span class="dt">value.name =</span> <span class="st">"dob"</span>)</span> <span id="cb3-3"><a href="#cb3-3" aria-hidden="true"></a>DT.m1</span> <span id="cb3-4"><a href="#cb3-4" aria-hidden="true"></a><span class="co"># family_id age_mother child dob</span></span> <span id="cb3-5"><a href="#cb3-5" aria-hidden="true"></a><span class="co"># 1: 1 30 dob_child1 1998-11-26</span></span> <span id="cb3-6"><a href="#cb3-6" aria-hidden="true"></a><span class="co"># 2: 2 27 dob_child1 1996-06-22</span></span> <span id="cb3-7"><a href="#cb3-7" aria-hidden="true"></a><span class="co"># 3: 3 26 dob_child1 2002-07-11</span></span> <span id="cb3-8"><a href="#cb3-8" aria-hidden="true"></a><span class="co"># 4: 4 32 dob_child1 2004-10-10</span></span> <span id="cb3-9"><a href="#cb3-9" aria-hidden="true"></a><span class="co"># 5: 5 29 dob_child1 2000-12-05</span></span> <span id="cb3-10"><a href="#cb3-10" aria-hidden="true"></a><span class="co"># 6: 1 30 dob_child2 2000-01-29</span></span> <span id="cb3-11"><a href="#cb3-11" aria-hidden="true"></a><span class="co"># 7: 2 27 dob_child2 <NA></span></span> <span id="cb3-12"><a href="#cb3-12" aria-hidden="true"></a><span class="co"># 8: 3 26 dob_child2 2004-04-05</span></span> <span id="cb3-13"><a href="#cb3-13" aria-hidden="true"></a><span class="co"># 9: 4 32 dob_child2 2009-08-27</span></span> <span id="cb3-14"><a href="#cb3-14" aria-hidden="true"></a><span class="co"># 10: 5 29 dob_child2 2005-02-28</span></span> <span id="cb3-15"><a href="#cb3-15" aria-hidden="true"></a><span class="co"># 11: 1 30 dob_child3 <NA></span></span> <span id="cb3-16"><a href="#cb3-16" aria-hidden="true"></a><span class="co"># 12: 2 27 dob_child3 <NA></span></span> <span id="cb3-17"><a href="#cb3-17" aria-hidden="true"></a><span class="co"># 13: 3 26 dob_child3 2007-09-02</span></span> <span id="cb3-18"><a href="#cb3-18" aria-hidden="true"></a><span class="co"># 14: 4 32 dob_child3 2012-07-21</span></span> <span id="cb3-19"><a href="#cb3-19" aria-hidden="true"></a><span class="co"># 15: 5 29 dob_child3 <NA></span></span></code></pre></div> </div> <div id="section-1" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info"></h4> <ul> <li><p>By default, when one of <code>id.vars</code> or <code>measure.vars</code> is missing, the rest of the columns are <em>automatically assigned</em> to the missing argument.</p></li> <li><p>When neither <code>id.vars</code> nor <code>measure.vars</code> are specified, as mentioned under <code>?melt</code>, all <em>non</em>-<code>numeric</code>, <code>integer</code>, <code>logical</code> columns will be assigned to <code>id.vars</code>.</p> <p>In addition, a warning message is issued highlighting the columns that are automatically considered to be <code>id.vars</code>.</p></li> </ul> </div> </div> <div id="b-dcasting-data.tables-long-to-wide" class="section level3"> <h3>b) <code>dcast</code>ing <code>data.table</code>s (long to wide)</h3> <p>In the previous section, we saw how to get from wide form to long form. Let’s see the reverse operation in this section.</p> <div id="how-can-we-get-back-to-the-original-data-table-dt-from-dt.m" class="section level4"> <h4>- How can we get back to the original data table <code>DT</code> from <code>DT.m</code>?</h4> <p>That is, we’d like to collect all <em>child</em> observations corresponding to each <code>family_id, age_mother</code> together under the same row. We can accomplish it using <code>dcast</code> as follows:</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true"></a><span class="kw">dcast</span>(DT.m1, family_id <span class="op">+</span><span class="st"> </span>age_mother <span class="op">~</span><span class="st"> </span>child, <span class="dt">value.var =</span> <span class="st">"dob"</span>)</span> <span id="cb4-2"><a href="#cb4-2" aria-hidden="true"></a><span class="co"># family_id age_mother dob_child1 dob_child2 dob_child3</span></span> <span id="cb4-3"><a href="#cb4-3" aria-hidden="true"></a><span class="co"># 1: 1 30 1998-11-26 2000-01-29 <NA></span></span> <span id="cb4-4"><a href="#cb4-4" aria-hidden="true"></a><span class="co"># 2: 2 27 1996-06-22 <NA> <NA></span></span> <span id="cb4-5"><a href="#cb4-5" aria-hidden="true"></a><span class="co"># 3: 3 26 2002-07-11 2004-04-05 2007-09-02</span></span> <span id="cb4-6"><a href="#cb4-6" aria-hidden="true"></a><span class="co"># 4: 4 32 2004-10-10 2009-08-27 2012-07-21</span></span> <span id="cb4-7"><a href="#cb4-7" aria-hidden="true"></a><span class="co"># 5: 5 29 2000-12-05 2005-02-28 <NA></span></span></code></pre></div> </div> <div id="section-2" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info"></h4> <ul> <li><p><code>dcast</code> uses <em>formula</em> interface. The variables on the <em>LHS</em> of formula represents the <em>id</em> vars and <em>RHS</em> the <em>measure</em> vars.</p></li> <li><p><code>value.var</code> denotes the column to be filled in with while casting to wide format.</p></li> <li><p><code>dcast</code> also tries to preserve attributes in result wherever possible.</p></li> </ul> </div> <div id="starting-from-dt.m-how-can-we-get-the-number-of-children-in-each-family" class="section level4"> <h4>- Starting from <code>DT.m</code>, how can we get the number of children in each family?</h4> <p>You can also pass a function to aggregate by in <code>dcast</code> with the argument <code>fun.aggregate</code>. This is particularly essential when the formula provided does not identify single observation for each cell.</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true"></a><span class="kw">dcast</span>(DT.m1, family_id <span class="op">~</span><span class="st"> </span>., <span class="dt">fun.agg =</span> <span class="cf">function</span>(x) <span class="kw">sum</span>(<span class="op">!</span><span class="kw">is.na</span>(x)), <span class="dt">value.var =</span> <span class="st">"dob"</span>)</span> <span id="cb5-2"><a href="#cb5-2" aria-hidden="true"></a><span class="co"># family_id .</span></span> <span id="cb5-3"><a href="#cb5-3" aria-hidden="true"></a><span class="co"># 1: 1 2</span></span> <span id="cb5-4"><a href="#cb5-4" aria-hidden="true"></a><span class="co"># 2: 2 1</span></span> <span id="cb5-5"><a href="#cb5-5" aria-hidden="true"></a><span class="co"># 3: 3 3</span></span> <span id="cb5-6"><a href="#cb5-6" aria-hidden="true"></a><span class="co"># 4: 4 3</span></span> <span id="cb5-7"><a href="#cb5-7" aria-hidden="true"></a><span class="co"># 5: 5 2</span></span></code></pre></div> <p>Check <code>?dcast</code> for other useful arguments and additional examples.</p> </div> </div> </div> <div id="limitations-in-current-meltdcast-approaches" class="section level2"> <h2>2. Limitations in current <code>melt/dcast</code> approaches</h2> <p>So far we’ve seen features of <code>melt</code> and <code>dcast</code> that are implemented efficiently for <code>data.table</code>s, using internal <code>data.table</code> machinery (<em>fast radix ordering</em>, <em>binary search</em> etc..).</p> <p>However, there are situations we might run into where the desired operation is not expressed in a straightforward manner. For example, consider the <code>data.table</code> shown below:</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true"></a>s2 <-<span class="st"> "family_id age_mother dob_child1 dob_child2 dob_child3 gender_child1 gender_child2 gender_child3</span></span> <span id="cb6-2"><a href="#cb6-2" aria-hidden="true"></a><span class="st">1 30 1998-11-26 2000-01-29 NA 1 2 NA</span></span> <span id="cb6-3"><a href="#cb6-3" aria-hidden="true"></a><span class="st">2 27 1996-06-22 NA NA 2 NA NA</span></span> <span id="cb6-4"><a href="#cb6-4" aria-hidden="true"></a><span class="st">3 26 2002-07-11 2004-04-05 2007-09-02 2 2 1</span></span> <span id="cb6-5"><a href="#cb6-5" aria-hidden="true"></a><span class="st">4 32 2004-10-10 2009-08-27 2012-07-21 1 1 1</span></span> <span id="cb6-6"><a href="#cb6-6" aria-hidden="true"></a><span class="st">5 29 2000-12-05 2005-02-28 NA 2 1 NA"</span></span> <span id="cb6-7"><a href="#cb6-7" aria-hidden="true"></a>DT <-<span class="st"> </span><span class="kw">fread</span>(s2)</span> <span id="cb6-8"><a href="#cb6-8" aria-hidden="true"></a>DT</span> <span id="cb6-9"><a href="#cb6-9" aria-hidden="true"></a><span class="co"># family_id age_mother dob_child1 dob_child2 dob_child3 gender_child1 gender_child2 gender_child3</span></span> <span id="cb6-10"><a href="#cb6-10" aria-hidden="true"></a><span class="co"># 1: 1 30 1998-11-26 2000-01-29 <NA> 1 2 NA</span></span> <span id="cb6-11"><a href="#cb6-11" aria-hidden="true"></a><span class="co"># 2: 2 27 1996-06-22 <NA> <NA> 2 NA NA</span></span> <span id="cb6-12"><a href="#cb6-12" aria-hidden="true"></a><span class="co"># 3: 3 26 2002-07-11 2004-04-05 2007-09-02 2 2 1</span></span> <span id="cb6-13"><a href="#cb6-13" aria-hidden="true"></a><span class="co"># 4: 4 32 2004-10-10 2009-08-27 2012-07-21 1 1 1</span></span> <span id="cb6-14"><a href="#cb6-14" aria-hidden="true"></a><span class="co"># 5: 5 29 2000-12-05 2005-02-28 <NA> 2 1 NA</span></span> <span id="cb6-15"><a href="#cb6-15" aria-hidden="true"></a><span class="co">## 1 = female, 2 = male</span></span></code></pre></div> <p>And you’d like to combine (<code>melt</code>) all the <code>dob</code> columns together, and <code>gender</code> columns together. Using the current functionality, we can do something like this:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true"></a>DT.m1 =<span class="st"> </span><span class="kw">melt</span>(DT, <span class="dt">id =</span> <span class="kw">c</span>(<span class="st">"family_id"</span>, <span class="st">"age_mother"</span>))</span> <span id="cb7-2"><a href="#cb7-2" aria-hidden="true"></a>DT.m1[, <span class="kw">c</span>(<span class="st">"variable"</span>, <span class="st">"child"</span>) <span class="op">:</span><span class="er">=</span><span class="st"> </span><span class="kw">tstrsplit</span>(variable, <span class="st">"_"</span>, <span class="dt">fixed =</span> <span class="ot">TRUE</span>)]</span> <span id="cb7-3"><a href="#cb7-3" aria-hidden="true"></a>DT.c1 =<span class="st"> </span><span class="kw">dcast</span>(DT.m1, family_id <span class="op">+</span><span class="st"> </span>age_mother <span class="op">+</span><span class="st"> </span>child <span class="op">~</span><span class="st"> </span>variable, <span class="dt">value.var =</span> <span class="st">"value"</span>)</span> <span id="cb7-4"><a href="#cb7-4" aria-hidden="true"></a>DT.c1</span> <span id="cb7-5"><a href="#cb7-5" aria-hidden="true"></a><span class="co"># family_id age_mother child dob gender</span></span> <span id="cb7-6"><a href="#cb7-6" aria-hidden="true"></a><span class="co"># 1: 1 30 child1 1998-11-26 1970-01-02</span></span> <span id="cb7-7"><a href="#cb7-7" aria-hidden="true"></a><span class="co"># 2: 1 30 child2 2000-01-29 1970-01-03</span></span> <span id="cb7-8"><a href="#cb7-8" aria-hidden="true"></a><span class="co"># 3: 1 30 child3 <NA> <NA></span></span> <span id="cb7-9"><a href="#cb7-9" aria-hidden="true"></a><span class="co"># 4: 2 27 child1 1996-06-22 1970-01-03</span></span> <span id="cb7-10"><a href="#cb7-10" aria-hidden="true"></a><span class="co"># 5: 2 27 child2 <NA> <NA></span></span> <span id="cb7-11"><a href="#cb7-11" aria-hidden="true"></a><span class="co"># 6: 2 27 child3 <NA> <NA></span></span> <span id="cb7-12"><a href="#cb7-12" aria-hidden="true"></a><span class="co"># 7: 3 26 child1 2002-07-11 1970-01-03</span></span> <span id="cb7-13"><a href="#cb7-13" aria-hidden="true"></a><span class="co"># 8: 3 26 child2 2004-04-05 1970-01-03</span></span> <span id="cb7-14"><a href="#cb7-14" aria-hidden="true"></a><span class="co"># 9: 3 26 child3 2007-09-02 1970-01-02</span></span> <span id="cb7-15"><a href="#cb7-15" aria-hidden="true"></a><span class="co"># 10: 4 32 child1 2004-10-10 1970-01-02</span></span> <span id="cb7-16"><a href="#cb7-16" aria-hidden="true"></a><span class="co"># 11: 4 32 child2 2009-08-27 1970-01-02</span></span> <span id="cb7-17"><a href="#cb7-17" aria-hidden="true"></a><span class="co"># 12: 4 32 child3 2012-07-21 1970-01-02</span></span> <span id="cb7-18"><a href="#cb7-18" aria-hidden="true"></a><span class="co"># 13: 5 29 child1 2000-12-05 1970-01-03</span></span> <span id="cb7-19"><a href="#cb7-19" aria-hidden="true"></a><span class="co"># 14: 5 29 child2 2005-02-28 1970-01-02</span></span> <span id="cb7-20"><a href="#cb7-20" aria-hidden="true"></a><span class="co"># 15: 5 29 child3 <NA> <NA></span></span> <span id="cb7-21"><a href="#cb7-21" aria-hidden="true"></a></span> <span id="cb7-22"><a href="#cb7-22" aria-hidden="true"></a><span class="kw">str</span>(DT.c1) <span class="co">## gender column is character type now!</span></span> <span id="cb7-23"><a href="#cb7-23" aria-hidden="true"></a><span class="co"># Classes 'data.table' and 'data.frame': 15 obs. of 5 variables:</span></span> <span id="cb7-24"><a href="#cb7-24" aria-hidden="true"></a><span class="co"># $ family_id : int 1 1 1 2 2 2 3 3 3 4 ...</span></span> <span id="cb7-25"><a href="#cb7-25" aria-hidden="true"></a><span class="co"># $ age_mother: int 30 30 30 27 27 27 26 26 26 32 ...</span></span> <span id="cb7-26"><a href="#cb7-26" aria-hidden="true"></a><span class="co"># $ child : chr "child1" "child2" "child3" "child1" ...</span></span> <span id="cb7-27"><a href="#cb7-27" aria-hidden="true"></a><span class="co"># $ dob : IDate, format: "1998-11-26" "2000-01-29" NA ...</span></span> <span id="cb7-28"><a href="#cb7-28" aria-hidden="true"></a><span class="co"># $ gender : IDate, format: "1970-01-02" "1970-01-03" NA ...</span></span> <span id="cb7-29"><a href="#cb7-29" aria-hidden="true"></a><span class="co"># - attr(*, ".internal.selfref")=<externalptr> </span></span> <span id="cb7-30"><a href="#cb7-30" aria-hidden="true"></a><span class="co"># - attr(*, "sorted")= chr [1:3] "family_id" "age_mother" "child"</span></span></code></pre></div> <div id="issues" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info">Issues</h4> <ol style="list-style-type: decimal"> <li><p>What we wanted to do was to combine all the <code>dob</code> and <code>gender</code> type columns together respectively. Instead we are combining <em>everything</em> together, and then splitting them again. I think it’s easy to see that it’s quite roundabout (and inefficient).</p> <p>As an analogy, imagine you’ve a closet with four shelves of clothes and you’d like to put together the clothes from shelves 1 and 2 together (in 1), and 3 and 4 together (in 3). What we are doing is more or less to combine all the clothes together, and then split them back on to shelves 1 and 3!</p></li> <li><p>The columns to <code>melt</code> may be of different types, as in this case (<code>character</code> and <code>integer</code> types). By <code>melt</code>ing them all together, the columns will be coerced in result, as explained by the warning message above and shown from output of <code>str(DT.c1)</code>, where <code>gender</code> has been converted to <em><code>character</code></em> type.</p></li> <li><p>We are generating an additional column by splitting the <code>variable</code> column into two columns, whose purpose is quite cryptic. We do it because we need it for <em>casting</em> in the next step.</p></li> <li><p>Finally, we cast the data set. But the issue is it’s a much more computationally involved operation than <em>melt</em>. Specifically, it requires computing the order of the variables in formula, and that’s costly.</p></li> </ol> <p>In fact, <code>stats::reshape</code> is capable of performing this operation in a very straightforward manner. It is an extremely useful and often underrated function. You should definitely give it a try!</p> </div> </div> <div id="enhanced-new-functionality" class="section level2"> <h2>3. Enhanced (new) functionality</h2> <div id="a-enhanced-melt" class="section level3"> <h3>a) Enhanced <code>melt</code></h3> <p>Since we’d like for <code>data.table</code>s to perform this operation straightforward and efficient using the same interface, we went ahead and implemented an <em>additional functionality</em>, where we can <code>melt</code> to multiple columns <em>simultaneously</em>.</p> <div id="melt-multiple-columns-simultaneously" class="section level4"> <h4>- <code>melt</code> multiple columns simultaneously</h4> <p>The idea is quite simple. We pass a list of columns to <code>measure.vars</code>, where each element of the list contains the columns that should be combined together.</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true"></a>colA =<span class="st"> </span><span class="kw">paste</span>(<span class="st">"dob_child"</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">3</span>, <span class="dt">sep =</span> <span class="st">""</span>)</span> <span id="cb8-2"><a href="#cb8-2" aria-hidden="true"></a>colB =<span class="st"> </span><span class="kw">paste</span>(<span class="st">"gender_child"</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">3</span>, <span class="dt">sep =</span> <span class="st">""</span>)</span> <span id="cb8-3"><a href="#cb8-3" aria-hidden="true"></a>DT.m2 =<span class="st"> </span><span class="kw">melt</span>(DT, <span class="dt">measure =</span> <span class="kw">list</span>(colA, colB), <span class="dt">value.name =</span> <span class="kw">c</span>(<span class="st">"dob"</span>, <span class="st">"gender"</span>))</span> <span id="cb8-4"><a href="#cb8-4" aria-hidden="true"></a>DT.m2</span> <span id="cb8-5"><a href="#cb8-5" aria-hidden="true"></a><span class="co"># family_id age_mother variable dob gender</span></span> <span id="cb8-6"><a href="#cb8-6" aria-hidden="true"></a><span class="co"># 1: 1 30 1 1998-11-26 1</span></span> <span id="cb8-7"><a href="#cb8-7" aria-hidden="true"></a><span class="co"># 2: 2 27 1 1996-06-22 2</span></span> <span id="cb8-8"><a href="#cb8-8" aria-hidden="true"></a><span class="co"># 3: 3 26 1 2002-07-11 2</span></span> <span id="cb8-9"><a href="#cb8-9" aria-hidden="true"></a><span class="co"># 4: 4 32 1 2004-10-10 1</span></span> <span id="cb8-10"><a href="#cb8-10" aria-hidden="true"></a><span class="co"># 5: 5 29 1 2000-12-05 2</span></span> <span id="cb8-11"><a href="#cb8-11" aria-hidden="true"></a><span class="co"># 6: 1 30 2 2000-01-29 2</span></span> <span id="cb8-12"><a href="#cb8-12" aria-hidden="true"></a><span class="co"># 7: 2 27 2 <NA> NA</span></span> <span id="cb8-13"><a href="#cb8-13" aria-hidden="true"></a><span class="co"># 8: 3 26 2 2004-04-05 2</span></span> <span id="cb8-14"><a href="#cb8-14" aria-hidden="true"></a><span class="co"># 9: 4 32 2 2009-08-27 1</span></span> <span id="cb8-15"><a href="#cb8-15" aria-hidden="true"></a><span class="co"># 10: 5 29 2 2005-02-28 1</span></span> <span id="cb8-16"><a href="#cb8-16" aria-hidden="true"></a><span class="co"># 11: 1 30 3 <NA> NA</span></span> <span id="cb8-17"><a href="#cb8-17" aria-hidden="true"></a><span class="co"># 12: 2 27 3 <NA> NA</span></span> <span id="cb8-18"><a href="#cb8-18" aria-hidden="true"></a><span class="co"># 13: 3 26 3 2007-09-02 1</span></span> <span id="cb8-19"><a href="#cb8-19" aria-hidden="true"></a><span class="co"># 14: 4 32 3 2012-07-21 1</span></span> <span id="cb8-20"><a href="#cb8-20" aria-hidden="true"></a><span class="co"># 15: 5 29 3 <NA> NA</span></span> <span id="cb8-21"><a href="#cb8-21" aria-hidden="true"></a></span> <span id="cb8-22"><a href="#cb8-22" aria-hidden="true"></a><span class="kw">str</span>(DT.m2) <span class="co">## col type is preserved</span></span> <span id="cb8-23"><a href="#cb8-23" aria-hidden="true"></a><span class="co"># Classes 'data.table' and 'data.frame': 15 obs. of 5 variables:</span></span> <span id="cb8-24"><a href="#cb8-24" aria-hidden="true"></a><span class="co"># $ family_id : int 1 2 3 4 5 1 2 3 4 5 ...</span></span> <span id="cb8-25"><a href="#cb8-25" aria-hidden="true"></a><span class="co"># $ age_mother: int 30 27 26 32 29 30 27 26 32 29 ...</span></span> <span id="cb8-26"><a href="#cb8-26" aria-hidden="true"></a><span class="co"># $ variable : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 2 2 2 2 2 ...</span></span> <span id="cb8-27"><a href="#cb8-27" aria-hidden="true"></a><span class="co"># $ dob : IDate, format: "1998-11-26" "1996-06-22" "2002-07-11" ...</span></span> <span id="cb8-28"><a href="#cb8-28" aria-hidden="true"></a><span class="co"># $ gender : int 1 2 2 1 2 2 NA 2 1 1 ...</span></span> <span id="cb8-29"><a href="#cb8-29" aria-hidden="true"></a><span class="co"># - attr(*, ".internal.selfref")=<externalptr></span></span></code></pre></div> </div> <div id="using-patterns" class="section level4"> <h4>- Using <code>patterns()</code></h4> <p>Usually in these problems, the columns we’d like to melt can be distinguished by a common pattern. We can use the function <code>patterns()</code>, implemented for convenience, to provide regular expressions for the columns to be combined together. The above operation can be rewritten as:</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true"></a>DT.m2 =<span class="st"> </span><span class="kw">melt</span>(DT, <span class="dt">measure =</span> <span class="kw">patterns</span>(<span class="st">"^dob"</span>, <span class="st">"^gender"</span>), <span class="dt">value.name =</span> <span class="kw">c</span>(<span class="st">"dob"</span>, <span class="st">"gender"</span>))</span> <span id="cb9-2"><a href="#cb9-2" aria-hidden="true"></a>DT.m2</span> <span id="cb9-3"><a href="#cb9-3" aria-hidden="true"></a><span class="co"># family_id age_mother variable dob gender</span></span> <span id="cb9-4"><a href="#cb9-4" aria-hidden="true"></a><span class="co"># 1: 1 30 1 1998-11-26 1</span></span> <span id="cb9-5"><a href="#cb9-5" aria-hidden="true"></a><span class="co"># 2: 2 27 1 1996-06-22 2</span></span> <span id="cb9-6"><a href="#cb9-6" aria-hidden="true"></a><span class="co"># 3: 3 26 1 2002-07-11 2</span></span> <span id="cb9-7"><a href="#cb9-7" aria-hidden="true"></a><span class="co"># 4: 4 32 1 2004-10-10 1</span></span> <span id="cb9-8"><a href="#cb9-8" aria-hidden="true"></a><span class="co"># 5: 5 29 1 2000-12-05 2</span></span> <span id="cb9-9"><a href="#cb9-9" aria-hidden="true"></a><span class="co"># 6: 1 30 2 2000-01-29 2</span></span> <span id="cb9-10"><a href="#cb9-10" aria-hidden="true"></a><span class="co"># 7: 2 27 2 <NA> NA</span></span> <span id="cb9-11"><a href="#cb9-11" aria-hidden="true"></a><span class="co"># 8: 3 26 2 2004-04-05 2</span></span> <span id="cb9-12"><a href="#cb9-12" aria-hidden="true"></a><span class="co"># 9: 4 32 2 2009-08-27 1</span></span> <span id="cb9-13"><a href="#cb9-13" aria-hidden="true"></a><span class="co"># 10: 5 29 2 2005-02-28 1</span></span> <span id="cb9-14"><a href="#cb9-14" aria-hidden="true"></a><span class="co"># 11: 1 30 3 <NA> NA</span></span> <span id="cb9-15"><a href="#cb9-15" aria-hidden="true"></a><span class="co"># 12: 2 27 3 <NA> NA</span></span> <span id="cb9-16"><a href="#cb9-16" aria-hidden="true"></a><span class="co"># 13: 3 26 3 2007-09-02 1</span></span> <span id="cb9-17"><a href="#cb9-17" aria-hidden="true"></a><span class="co"># 14: 4 32 3 2012-07-21 1</span></span> <span id="cb9-18"><a href="#cb9-18" aria-hidden="true"></a><span class="co"># 15: 5 29 3 <NA> NA</span></span></code></pre></div> <p>That’s it!</p> </div> <div id="section-3" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info"></h4> <ul> <li><p>We can remove the <code>variable</code> column if necessary.</p></li> <li><p>The functionality is implemented entirely in C, and is therefore both <em>fast</em> and <em>memory efficient</em> in addition to being <em>straightforward</em>.</p></li> </ul> </div> </div> <div id="b-enhanced-dcast" class="section level3"> <h3>b) Enhanced <code>dcast</code></h3> <p>Okay great! We can now melt into multiple columns simultaneously. Now given the data set <code>DT.m2</code> as shown above, how can we get back to the same format as the original data we started with?</p> <p>If we use the current functionality of <code>dcast</code>, then we’d have to cast twice and bind the results together. But that’s once again verbose, not straightforward and is also inefficient.</p> <div id="casting-multiple-value.vars-simultaneously" class="section level4"> <h4>- Casting multiple <code>value.var</code>s simultaneously</h4> <p>We can now provide <strong>multiple <code>value.var</code> columns</strong> to <code>dcast</code> for <code>data.table</code>s directly so that the operations are taken care of internally and efficiently.</p> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true"></a><span class="co">## new 'cast' functionality - multiple value.vars</span></span> <span id="cb10-2"><a href="#cb10-2" aria-hidden="true"></a>DT.c2 =<span class="st"> </span><span class="kw">dcast</span>(DT.m2, family_id <span class="op">+</span><span class="st"> </span>age_mother <span class="op">~</span><span class="st"> </span>variable, <span class="dt">value.var =</span> <span class="kw">c</span>(<span class="st">"dob"</span>, <span class="st">"gender"</span>))</span> <span id="cb10-3"><a href="#cb10-3" aria-hidden="true"></a>DT.c2</span> <span id="cb10-4"><a href="#cb10-4" aria-hidden="true"></a><span class="co"># family_id age_mother dob_1 dob_2 dob_3 gender_1 gender_2 gender_3</span></span> <span id="cb10-5"><a href="#cb10-5" aria-hidden="true"></a><span class="co"># 1: 1 30 1998-11-26 2000-01-29 <NA> 1 2 NA</span></span> <span id="cb10-6"><a href="#cb10-6" aria-hidden="true"></a><span class="co"># 2: 2 27 1996-06-22 <NA> <NA> 2 NA NA</span></span> <span id="cb10-7"><a href="#cb10-7" aria-hidden="true"></a><span class="co"># 3: 3 26 2002-07-11 2004-04-05 2007-09-02 2 2 1</span></span> <span id="cb10-8"><a href="#cb10-8" aria-hidden="true"></a><span class="co"># 4: 4 32 2004-10-10 2009-08-27 2012-07-21 1 1 1</span></span> <span id="cb10-9"><a href="#cb10-9" aria-hidden="true"></a><span class="co"># 5: 5 29 2000-12-05 2005-02-28 <NA> 2 1 NA</span></span></code></pre></div> </div> <div id="section-4" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info"></h4> <ul> <li><p>Attributes are preserved in result wherever possible.</p></li> <li><p>Everything is taken care of internally, and efficiently. In addition to being fast, it is also very memory efficient.</p></li> </ul> </div> </div> </div> <div id="section-5" class="section level1"> <h1></h1> <div id="multiple-functions-to-fun.aggregate" class="section level4 bs-callout bs-callout-info"> <h4 class="bs-callout bs-callout-info">Multiple functions to <code>fun.aggregate</code>:</h4> <p>You can also provide <em>multiple functions</em> to <code>fun.aggregate</code> to <code>dcast</code> for <em>data.tables</em>. Check the examples in <code>?dcast</code> which illustrates this functionality.</p> </div> </div> <div id="section-6" class="section level1"> <h1></h1> <hr /> </div> <!-- code folding --> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>