190 lines
18 KiB
HTML
190 lines
18 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="generator" content="pandoc" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
|
||
<title>1.5 Building Up Data with Comprehensions</title>
|
||
<style>
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
span.underline{text-decoration: underline;}
|
||
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
color: #aaaaaa;
|
||
}
|
||
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
||
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
||
code span.at { color: #7d9029; } /* Attribute */
|
||
code span.bn { color: #40a070; } /* BaseN */
|
||
code span.bu { } /* BuiltIn */
|
||
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
||
code span.ch { color: #4070a0; } /* Char */
|
||
code span.cn { color: #880000; } /* Constant */
|
||
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
||
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
||
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
||
code span.dt { color: #902000; } /* DataType */
|
||
code span.dv { color: #40a070; } /* DecVal */
|
||
code span.er { color: #ff0000; font-weight: bold; } /* Error */
|
||
code span.ex { } /* Extension */
|
||
code span.fl { color: #40a070; } /* Float */
|
||
code span.fu { color: #06287e; } /* Function */
|
||
code span.im { } /* Import */
|
||
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
||
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
||
code span.op { color: #666666; } /* Operator */
|
||
code span.ot { color: #007020; } /* Other */
|
||
code span.pp { color: #bc7a00; } /* Preprocessor */
|
||
code span.sc { color: #4070a0; } /* SpecialChar */
|
||
code span.ss { color: #bb6688; } /* SpecialString */
|
||
code span.st { color: #4070a0; } /* String */
|
||
code span.va { color: #19177c; } /* Variable */
|
||
code span.vs { color: #4070a0; } /* VerbatimString */
|
||
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
||
</style>
|
||
<link rel="stylesheet" href="../tufte.css" />
|
||
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" type="text/javascript"></script>
|
||
<!--[if lt IE 9]>
|
||
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
|
||
<![endif]-->
|
||
</head>
|
||
<body>
|
||
<div style="display:none">
|
||
\(
|
||
\newcommand{\NOT}{\neg}
|
||
\newcommand{\AND}{\wedge}
|
||
\newcommand{\OR}{\vee}
|
||
\newcommand{\XOR}{\oplus}
|
||
\newcommand{\IMP}{\Rightarrow}
|
||
\newcommand{\IFF}{\Leftrightarrow}
|
||
\newcommand{\TRUE}{\text{True}\xspace}
|
||
\newcommand{\FALSE}{\text{False}\xspace}
|
||
\newcommand{\IN}{\,{\in}\,}
|
||
\newcommand{\NOTIN}{\,{\notin}\,}
|
||
\newcommand{\TO}{\rightarrow}
|
||
\newcommand{\DIV}{\mid}
|
||
\newcommand{\NDIV}{\nmid}
|
||
\newcommand{\MOD}[1]{\pmod{#1}}
|
||
\newcommand{\MODS}[1]{\ (\text{mod}\ #1)}
|
||
\newcommand{\N}{\mathbb N}
|
||
\newcommand{\Z}{\mathbb Z}
|
||
\newcommand{\Q}{\mathbb Q}
|
||
\newcommand{\R}{\mathbb R}
|
||
\newcommand{\C}{\mathbb C}
|
||
\newcommand{\cA}{\mathcal A}
|
||
\newcommand{\cB}{\mathcal B}
|
||
\newcommand{\cC}{\mathcal C}
|
||
\newcommand{\cD}{\mathcal D}
|
||
\newcommand{\cE}{\mathcal E}
|
||
\newcommand{\cF}{\mathcal F}
|
||
\newcommand{\cG}{\mathcal G}
|
||
\newcommand{\cH}{\mathcal H}
|
||
\newcommand{\cI}{\mathcal I}
|
||
\newcommand{\cJ}{\mathcal J}
|
||
\newcommand{\cL}{\mathcal L}
|
||
\newcommand{\cK}{\mathcal K}
|
||
\newcommand{\cN}{\mathcal N}
|
||
\newcommand{\cO}{\mathcal O}
|
||
\newcommand{\cP}{\mathcal P}
|
||
\newcommand{\cQ}{\mathcal Q}
|
||
\newcommand{\cS}{\mathcal S}
|
||
\newcommand{\cT}{\mathcal T}
|
||
\newcommand{\cV}{\mathcal V}
|
||
\newcommand{\cW}{\mathcal W}
|
||
\newcommand{\cZ}{\mathcal Z}
|
||
\newcommand{\emp}{\emptyset}
|
||
\newcommand{\bs}{\backslash}
|
||
\newcommand{\floor}[1]{\left \lfloor #1 \right \rfloor}
|
||
\newcommand{\ceil}[1]{\left \lceil #1 \right \rceil}
|
||
\newcommand{\abs}[1]{\left | #1 \right |}
|
||
\newcommand{\xspace}{}
|
||
\newcommand{\proofheader}[1]{\underline{\textbf{#1}}}
|
||
\)
|
||
</div>
|
||
<header id="title-block-header">
|
||
<h1 class="title">1.5 Building Up Data with Comprehensions</h1>
|
||
</header>
|
||
<section>
|
||
<p>To wrap up our introduction to data in Python, we’re going to learn about one last kind of expression that allows to build up and transform large collections of data in Python.</p>
|
||
<h2 id="from-set-builder-notation-to-set-comprehensions">From set builder notation to set comprehensions</h2>
|
||
<p>Recall <em>set builder notation</em>, which is a concise way of defining a mathematical set by specifying the values of the elements in terms of a larger domain. For example, suppose we have a set <span class="math inline">\(S = \{1, 2, 3, 4, 5\}\)</span>. We can express a set of squares of the elements of <span class="math inline">\(S\)</span>: <span class="math display">\[\{ x^2 \mid x \in S \}.\]</span></p>
|
||
<p>It turns out that this notation translates naturally to Python! To start, let’s go into the Python Console and create a variable that refers to a set of numbers:</p>
|
||
<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="op">>>></span> numbers <span class="op">=</span> {<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">5</span>}</span></code></pre></div>
|
||
<p>Now, we introduce a new kind of expression called a <strong>set comprehension</strong>, which has the following syntax:<label for="sn-0" class="margin-toggle sidenote-number"></label><input type="checkbox" id="sn-0" class="margin-toggle"/><span class="sidenote"> Careful with this: even though set comprehensions also use curly braces, they are <em>not</em> the same as set literals. We aren’t writing out the individual elements separated by commas.</span></p>
|
||
<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a>{ <span class="op"><</span>expr<span class="op">></span> <span class="cf">for</span> <span class="op"><</span>variable<span class="op">></span> <span class="kw">in</span> <span class="op"><</span>collection<span class="op">></span> }</span></code></pre></div>
|
||
<p>Evaluating a set comprehension is done by taking the <code><expr></code> and evaluating it once for each value in <code><collection></code> assigned to the <code><variable></code>. This is exactly analogous to set builder notation, except using <code>for</code> instead of <span class="math inline">\(|\)</span> and <code>in</code> instead of <span class="math inline">\(\in\)</span>. Here’s how we can repeat our initial example in Python using a set comprehension:</p>
|
||
<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a><span class="op">>>></span> {x <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> x <span class="kw">in</span> numbers}</span>
|
||
<span id="cb3-2"><a href="#cb3-2"></a>{<span class="dv">1</span>, <span class="dv">4</span>, <span class="dv">9</span>, <span class="dv">16</span>, <span class="dv">25</span>}</span></code></pre></div>
|
||
<p>Pretty cool, eh? If you aren’t sure exactly what happened here, it’s useful to write out the expanded form of the set comprehension:</p>
|
||
<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a> {x <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> x <span class="kw">in</span> numbers}</span>
|
||
<span id="cb4-2"><a href="#cb4-2"></a><span class="op">==</span> {<span class="dv">1</span> <span class="op">**</span> <span class="dv">2</span>, <span class="dv">2</span> <span class="op">**</span> <span class="dv">2</span>, <span class="dv">3</span> <span class="op">**</span> <span class="dv">2</span>, <span class="dv">4</span> <span class="op">**</span> <span class="dv">2</span>, <span class="dv">5</span> <span class="op">**</span> <span class="dv">2</span>} <span class="co"># Replacing x with 1, 2, 3, 4, and 5.</span></span></code></pre></div>
|
||
<p>It goes even further—we can use set comprehensions with a Python list as well.</p>
|
||
<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a><span class="op">>>></span> {x <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> x <span class="kw">in</span> [<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">5</span>]}</span>
|
||
<span id="cb5-2"><a href="#cb5-2"></a>{<span class="dv">1</span>, <span class="dv">4</span>, <span class="dv">9</span>, <span class="dv">16</span>, <span class="dv">25</span>}</span></code></pre></div>
|
||
<p>In fact, as we’ll see later in this course, set comprehensions can be used with any “collection” data type in Python, not just sets and lists.</p>
|
||
<h2 id="list-and-dictionary-comprehensions">List and dictionary comprehensions</h2>
|
||
<p>Even though set comprehensions draw their inspiration from set builder notation in mathematics, Python has extended them to other data types.</p>
|
||
<p>A <strong>list comprehension</strong> is very similar to a set comprehension, except its syntax uses square brackets instead of curly braces:</p>
|
||
<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a>[ <span class="op"><</span>expr<span class="op">></span> <span class="cf">for</span> <span class="op"><</span>variable<span class="op">></span> <span class="kw">in</span> <span class="op"><</span>collection<span class="op">></span> ]</span></code></pre></div>
|
||
<p>Once again, <code><collection></code> can be a set or a list:</p>
|
||
<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="op">>>></span> [x <span class="op">+</span> <span class="dv">4</span> <span class="cf">for</span> x <span class="kw">in</span> {<span class="dv">10</span>, <span class="dv">20</span>, <span class="dv">30</span>}]</span>
|
||
<span id="cb7-2"><a href="#cb7-2"></a>[<span class="dv">14</span>, <span class="dv">24</span>, <span class="dv">34</span>]</span>
|
||
<span id="cb7-3"><a href="#cb7-3"></a><span class="op">>>></span> [x <span class="op">*</span> <span class="dv">3</span> <span class="cf">for</span> x <span class="kw">in</span> [<span class="dv">100</span>, <span class="dv">200</span>, <span class="dv">300</span>]]</span>
|
||
<span id="cb7-4"><a href="#cb7-4"></a>[<span class="dv">300</span>, <span class="dv">600</span>, <span class="dv">900</span>]</span></code></pre></div>
|
||
<p>One word of warning: because sets are unordered but lists are ordered, you should <em>not</em> assume a particular ordering of the elements when a list comprehension generates elements from a set—the results can be unexpected!</p>
|
||
<div class="sourceCode" id="cb8"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a><span class="op">>>></span> [x <span class="cf">for</span> x <span class="kw">in</span> {<span class="dv">20</span>, <span class="dv">10</span>, <span class="dv">30</span>}]</span>
|
||
<span id="cb8-2"><a href="#cb8-2"></a>[<span class="dv">10</span>, <span class="dv">20</span>, <span class="dv">30</span>]</span></code></pre></div>
|
||
<p>A <strong>dictionary comprehension</strong> is again similar to a set comprehension, but specifies both an expression to generate keys and an expression to generate their associated values:</p>
|
||
<div class="sourceCode" id="cb9"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a>{ <span class="op"><</span>key_expr<span class="op">></span> : <span class="op"><</span>value_expr<span class="op">></span> <span class="cf">for</span> <span class="op"><</span>variable<span class="op">></span> <span class="kw">in</span> <span class="op"><</span>collection<span class="op">></span> }</span></code></pre></div>
|
||
<p>Out of all three comprehension types, dictionary comprehensions are the most complex, because the left-hand side (before the <code>for</code>) consists of two expressions instead of one. Here is one example of a dictionary comprehension that creates a “table of values” for the function <span class="math inline">\(f(x) = x^2 + 1\)</span>.</p>
|
||
<div class="sourceCode" id="cb10"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a><span class="op">>>></span> {x : x <span class="op">**</span> <span class="dv">2</span> <span class="op">+</span> <span class="dv">1</span> <span class="cf">for</span> x <span class="kw">in</span> {<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">5</span>}}</span>
|
||
<span id="cb10-2"><a href="#cb10-2"></a>{<span class="dv">1</span>: <span class="dv">2</span>, <span class="dv">2</span>: <span class="dv">5</span>, <span class="dv">3</span>: <span class="dv">10</span>, <span class="dv">4</span>: <span class="dv">17</span>, <span class="dv">5</span>: <span class="dv">26</span>}</span></code></pre></div>
|
||
<h2 id="comprehensions-with-multiple-variables">Comprehensions with multiple variables</h2>
|
||
<p>Our last example in this section will be to illustrate how multiple variables are used within the same comprehension expression. First, recall how we defined the <em>Cartesian product</em> of two sets using set builder notation: <span class="math display">\[ A \times B = \{ (x, y) \mid x \in A \text{ and } y \in B \}.\]</span> In this expression, the expression <span class="math inline">\((x, y)\)</span> is evaluated once for every possible combination of elements <span class="math inline">\(x\)</span> of <span class="math inline">\(A\)</span> and elements <span class="math inline">\(y\)</span> of <span class="math inline">\(B\)</span>.</p>
|
||
<p>The same holds for set, list, and dictionary comprehensions. We can specify additional variables in a comprehension by adding extra <code>for <variable> in <collection></code> clauses to the comprehension. For example, if we define the following sets:</p>
|
||
<div class="sourceCode" id="cb11"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a><span class="op">>>></span> nums1 <span class="op">=</span> {<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>}</span>
|
||
<span id="cb11-2"><a href="#cb11-2"></a><span class="op">>>></span> nums2 <span class="op">=</span> {<span class="dv">10</span>, <span class="dv">20</span>, <span class="dv">30</span>}</span></code></pre></div>
|
||
<p>then we can calculate their Cartesian product using the following set comprehension:<label for="sn-1" class="margin-toggle sidenote-number"></label><input type="checkbox" id="sn-1" class="margin-toggle"/><span class="sidenote"> Remember, sets are unordered! Don’t get hung up on the unusual order in the output.</span></p>
|
||
<div class="sourceCode" id="cb12"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a><span class="op">>>></span> {(x, y) <span class="cf">for</span> x <span class="kw">in</span> nums1 <span class="cf">for</span> y <span class="kw">in</span> nums2}</span>
|
||
<span id="cb12-2"><a href="#cb12-2"></a>{(<span class="dv">3</span>, <span class="dv">30</span>), (<span class="dv">2</span>, <span class="dv">20</span>), (<span class="dv">2</span>, <span class="dv">10</span>), (<span class="dv">1</span>, <span class="dv">30</span>), (<span class="dv">3</span>, <span class="dv">20</span>), (<span class="dv">1</span>, <span class="dv">20</span>), (<span class="dv">3</span>, <span class="dv">10</span>), (<span class="dv">1</span>, <span class="dv">10</span>), (<span class="dv">2</span>, <span class="dv">30</span>)}</span></code></pre></div>
|
||
<p>In general, if we have a comprehension with clauses <code>for v1 in collection1</code>, <code>for v2 in collection2</code>, etc., then the comprehension’s inner expression is evaluated <em>once for each combination of values for the variables</em>. This illustrates yet another pretty impressive power of Python: the ability to combine different collections of data together in a short amount of code.</p>
|
||
</section>
|
||
<footer>
|
||
<a href="https://www.teach.cs.toronto.edu/~csc110y/fall/notes/">CSC110 Course Notes Home</a>
|
||
</footer>
|
||
</body>
|
||
</html>
|