24756 lines
1.4 MiB
24756 lines
1.4 MiB
<!DOCTYPE HTML>
|
||
<html lang="en" class="sidebar-visible no-js light">
|
||
<head>
|
||
<!-- Book generated using mdBook -->
|
||
<meta charset="UTF-8">
|
||
<title>The Rust Programming Language</title>
|
||
|
||
<meta name="robots" content="noindex" />
|
||
|
||
|
||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||
<meta name="description" content="">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<meta name="theme-color" content="#ffffff" />
|
||
|
||
<link rel="shortcut icon" href="favicon.png">
|
||
<link rel="stylesheet" href="css/variables.css">
|
||
<link rel="stylesheet" href="css/general.css">
|
||
<link rel="stylesheet" href="css/chrome.css">
|
||
<link rel="stylesheet" href="css/print.css" media="print">
|
||
|
||
<!-- Fonts -->
|
||
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
||
<link href="googleFonts/css.css" rel="stylesheet" type="text/css">
|
||
|
||
<!-- Highlight.js Stylesheets -->
|
||
<link rel="stylesheet" href="highlight.css">
|
||
<link rel="stylesheet" href="tomorrow-night.css">
|
||
<link rel="stylesheet" href="ayu-highlight.css">
|
||
|
||
<!-- Custom theme stylesheets -->
|
||
|
||
<link rel="stylesheet" href="ferris.css">
|
||
|
||
<link rel="stylesheet" href="theme/2018-edition.css">
|
||
|
||
|
||
|
||
</head>
|
||
<body>
|
||
<!-- Provide site root to javascript -->
|
||
<script type="text/javascript">
|
||
var path_to_root = "";
|
||
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "light" : "light";
|
||
</script>
|
||
|
||
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
||
<script type="text/javascript">
|
||
try {
|
||
var theme = localStorage.getItem('mdbook-theme');
|
||
var sidebar = localStorage.getItem('mdbook-sidebar');
|
||
|
||
if (theme.startsWith('"') && theme.endsWith('"')) {
|
||
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
||
}
|
||
|
||
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
||
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
||
}
|
||
} catch (e) { }
|
||
</script>
|
||
|
||
<!-- Set the theme before any content is loaded, prevents flash -->
|
||
<script type="text/javascript">
|
||
var theme;
|
||
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
||
if (theme === null || theme === undefined) { theme = default_theme; }
|
||
var html = document.querySelector('html');
|
||
html.classList.remove('no-js')
|
||
html.classList.remove('light')
|
||
html.classList.add(theme);
|
||
html.classList.add('js');
|
||
</script>
|
||
|
||
<!-- Hide / unhide sidebar before it is displayed -->
|
||
<script type="text/javascript">
|
||
var html = document.querySelector('html');
|
||
var sidebar = 'hidden';
|
||
if (document.body.clientWidth >= 1080) {
|
||
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
||
sidebar = sidebar || 'visible';
|
||
}
|
||
html.classList.remove('sidebar-visible');
|
||
html.classList.add("sidebar-" + sidebar);
|
||
</script>
|
||
|
||
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
||
<div id="sidebar-scrollbox" class="sidebar-scrollbox">
|
||
<ol class="chapter"><li class="expanded affix "><a href="title-page.html">The Rust Programming Language</a></li><li class="expanded affix "><a href="foreword.html">Foreword</a></li><li class="expanded affix "><a href="ch00-00-introduction.html">Introduction</a></li><li class="expanded "><a href="ch01-00-getting-started.html"><strong aria-hidden="true">1.</strong> Getting Started</a></li><li><ol class="section"><li class="expanded "><a href="ch01-01-installation.html"><strong aria-hidden="true">1.1.</strong> Installation</a></li><li class="expanded "><a href="ch01-02-hello-world.html"><strong aria-hidden="true">1.2.</strong> Hello, World!</a></li><li class="expanded "><a href="ch01-03-hello-cargo.html"><strong aria-hidden="true">1.3.</strong> Hello, Cargo!</a></li></ol></li><li class="expanded "><a href="ch02-00-guessing-game-tutorial.html"><strong aria-hidden="true">2.</strong> Programming a Guessing Game</a></li><li class="expanded "><a href="ch03-00-common-programming-concepts.html"><strong aria-hidden="true">3.</strong> Common Programming Concepts</a></li><li><ol class="section"><li class="expanded "><a href="ch03-01-variables-and-mutability.html"><strong aria-hidden="true">3.1.</strong> Variables and Mutability</a></li><li class="expanded "><a href="ch03-02-data-types.html"><strong aria-hidden="true">3.2.</strong> Data Types</a></li><li class="expanded "><a href="ch03-03-how-functions-work.html"><strong aria-hidden="true">3.3.</strong> Functions</a></li><li class="expanded "><a href="ch03-04-comments.html"><strong aria-hidden="true">3.4.</strong> Comments</a></li><li class="expanded "><a href="ch03-05-control-flow.html"><strong aria-hidden="true">3.5.</strong> Control Flow</a></li></ol></li><li class="expanded "><a href="ch04-00-understanding-ownership.html"><strong aria-hidden="true">4.</strong> Understanding Ownership</a></li><li><ol class="section"><li class="expanded "><a href="ch04-01-what-is-ownership.html"><strong aria-hidden="true">4.1.</strong> What is Ownership?</a></li><li class="expanded "><a href="ch04-02-references-and-borrowing.html"><strong aria-hidden="true">4.2.</strong> References and Borrowing</a></li><li class="expanded "><a href="ch04-03-slices.html"><strong aria-hidden="true">4.3.</strong> The Slice Type</a></li></ol></li><li class="expanded "><a href="ch05-00-structs.html"><strong aria-hidden="true">5.</strong> Using Structs to Structure Related Data</a></li><li><ol class="section"><li class="expanded "><a href="ch05-01-defining-structs.html"><strong aria-hidden="true">5.1.</strong> Defining and Instantiating Structs</a></li><li class="expanded "><a href="ch05-02-example-structs.html"><strong aria-hidden="true">5.2.</strong> An Example Program Using Structs</a></li><li class="expanded "><a href="ch05-03-method-syntax.html"><strong aria-hidden="true">5.3.</strong> Method Syntax</a></li></ol></li><li class="expanded "><a href="ch06-00-enums.html"><strong aria-hidden="true">6.</strong> Enums and Pattern Matching</a></li><li><ol class="section"><li class="expanded "><a href="ch06-01-defining-an-enum.html"><strong aria-hidden="true">6.1.</strong> Defining an Enum</a></li><li class="expanded "><a href="ch06-02-match.html"><strong aria-hidden="true">6.2.</strong> The match Control Flow Operator</a></li><li class="expanded "><a href="ch06-03-if-let.html"><strong aria-hidden="true">6.3.</strong> Concise Control Flow with if let</a></li></ol></li><li class="expanded "><a href="ch07-00-managing-growing-projects-with-packages-crates-and-modules.html"><strong aria-hidden="true">7.</strong> Managing Growing Projects with Packages, Crates, and Modules</a></li><li><ol class="section"><li class="expanded "><a href="ch07-01-packages-and-crates.html"><strong aria-hidden="true">7.1.</strong> Packages and Crates</a></li><li class="expanded "><a href="ch07-02-defining-modules-to-control-scope-and-privacy.html"><strong aria-hidden="true">7.2.</strong> Defining Modules to Control Scope and Privacy</a></li><li class="expanded "><a href="ch07-03-paths-for-referring-to-an-item-in-the-module-tree.html"><strong aria-hidden="true">7.3.</strong> Paths for Referring to an Item in the Module Tree</a></li><li class="expanded "><a href="ch07-04-bringing-paths-into-scope-with-the-use-keyword.html"><strong aria-hidden="true">7.4.</strong> Bringing Paths Into Scope with the use Keyword</a></li><li class="expanded "><a href="ch07-05-separating-modules-into-different-files.html"><strong aria-hidden="true">7.5.</strong> Separating Modules into Different Files</a></li></ol></li><li class="expanded "><a href="ch08-00-common-collections.html"><strong aria-hidden="true">8.</strong> Common Collections</a></li><li><ol class="section"><li class="expanded "><a href="ch08-01-vectors.html"><strong aria-hidden="true">8.1.</strong> Storing Lists of Values with Vectors</a></li><li class="expanded "><a href="ch08-02-strings.html"><strong aria-hidden="true">8.2.</strong> Storing UTF-8 Encoded Text with Strings</a></li><li class="expanded "><a href="ch08-03-hash-maps.html"><strong aria-hidden="true">8.3.</strong> Storing Keys with Associated Values in Hash Maps</a></li></ol></li><li class="expanded "><a href="ch09-00-error-handling.html"><strong aria-hidden="true">9.</strong> Error Handling</a></li><li><ol class="section"><li class="expanded "><a href="ch09-01-unrecoverable-errors-with-panic.html"><strong aria-hidden="true">9.1.</strong> Unrecoverable Errors with panic!</a></li><li class="expanded "><a href="ch09-02-recoverable-errors-with-result.html"><strong aria-hidden="true">9.2.</strong> Recoverable Errors with Result</a></li><li class="expanded "><a href="ch09-03-to-panic-or-not-to-panic.html"><strong aria-hidden="true">9.3.</strong> To panic! or Not To panic!</a></li></ol></li><li class="expanded "><a href="ch10-00-generics.html"><strong aria-hidden="true">10.</strong> Generic Types, Traits, and Lifetimes</a></li><li><ol class="section"><li class="expanded "><a href="ch10-01-syntax.html"><strong aria-hidden="true">10.1.</strong> Generic Data Types</a></li><li class="expanded "><a href="ch10-02-traits.html"><strong aria-hidden="true">10.2.</strong> Traits: Defining Shared Behavior</a></li><li class="expanded "><a href="ch10-03-lifetime-syntax.html"><strong aria-hidden="true">10.3.</strong> Validating References with Lifetimes</a></li></ol></li><li class="expanded "><a href="ch11-00-testing.html"><strong aria-hidden="true">11.</strong> Writing Automated Tests</a></li><li><ol class="section"><li class="expanded "><a href="ch11-01-writing-tests.html"><strong aria-hidden="true">11.1.</strong> How to Write Tests</a></li><li class="expanded "><a href="ch11-02-running-tests.html"><strong aria-hidden="true">11.2.</strong> Controlling How Tests Are Run</a></li><li class="expanded "><a href="ch11-03-test-organization.html"><strong aria-hidden="true">11.3.</strong> Test Organization</a></li></ol></li><li class="expanded "><a href="ch12-00-an-io-project.html"><strong aria-hidden="true">12.</strong> An I/O Project: Building a Command Line Program</a></li><li><ol class="section"><li class="expanded "><a href="ch12-01-accepting-command-line-arguments.html"><strong aria-hidden="true">12.1.</strong> Accepting Command Line Arguments</a></li><li class="expanded "><a href="ch12-02-reading-a-file.html"><strong aria-hidden="true">12.2.</strong> Reading a File</a></li><li class="expanded "><a href="ch12-03-improving-error-handling-and-modularity.html"><strong aria-hidden="true">12.3.</strong> Refactoring to Improve Modularity and Error Handling</a></li><li class="expanded "><a href="ch12-04-testing-the-librarys-functionality.html"><strong aria-hidden="true">12.4.</strong> Developing the Library’s Functionality with Test Driven Development</a></li><li class="expanded "><a href="ch12-05-working-with-environment-variables.html"><strong aria-hidden="true">12.5.</strong> Working with Environment Variables</a></li><li class="expanded "><a href="ch12-06-writing-to-stderr-instead-of-stdout.html"><strong aria-hidden="true">12.6.</strong> Writing Error Messages to Standard Error Instead of Standard Output</a></li></ol></li><li class="expanded "><a href="ch13-00-functional-features.html"><strong aria-hidden="true">13.</strong> Functional Language Features: Iterators and Closures</a></li><li><ol class="section"><li class="expanded "><a href="ch13-01-closures.html"><strong aria-hidden="true">13.1.</strong> Closures: Anonymous Functions that Can Capture Their Environment</a></li><li class="expanded "><a href="ch13-02-iterators.html"><strong aria-hidden="true">13.2.</strong> Processing a Series of Items with Iterators</a></li><li class="expanded "><a href="ch13-03-improving-our-io-project.html"><strong aria-hidden="true">13.3.</strong> Improving Our I/O Project</a></li><li class="expanded "><a href="ch13-04-performance.html"><strong aria-hidden="true">13.4.</strong> Comparing Performance: Loops vs. Iterators</a></li></ol></li><li class="expanded "><a href="ch14-00-more-about-cargo.html"><strong aria-hidden="true">14.</strong> More about Cargo and Crates.io</a></li><li><ol class="section"><li class="expanded "><a href="ch14-01-release-profiles.html"><strong aria-hidden="true">14.1.</strong> Customizing Builds with Release Profiles</a></li><li class="expanded "><a href="ch14-02-publishing-to-crates-io.html"><strong aria-hidden="true">14.2.</strong> Publishing a Crate to Crates.io</a></li><li class="expanded "><a href="ch14-03-cargo-workspaces.html"><strong aria-hidden="true">14.3.</strong> Cargo Workspaces</a></li><li class="expanded "><a href="ch14-04-installing-binaries.html"><strong aria-hidden="true">14.4.</strong> Installing Binaries from Crates.io with cargo install</a></li><li class="expanded "><a href="ch14-05-extending-cargo.html"><strong aria-hidden="true">14.5.</strong> Extending Cargo with Custom Commands</a></li></ol></li><li class="expanded "><a href="ch15-00-smart-pointers.html"><strong aria-hidden="true">15.</strong> Smart Pointers</a></li><li><ol class="section"><li class="expanded "><a href="ch15-01-box.html"><strong aria-hidden="true">15.1.</strong> Using Box<T> to Point to Data on the Heap</a></li><li class="expanded "><a href="ch15-02-deref.html"><strong aria-hidden="true">15.2.</strong> Treating Smart Pointers Like Regular References with the Deref Trait</a></li><li class="expanded "><a href="ch15-03-drop.html"><strong aria-hidden="true">15.3.</strong> Running Code on Cleanup with the Drop Trait</a></li><li class="expanded "><a href="ch15-04-rc.html"><strong aria-hidden="true">15.4.</strong> Rc<T>, the Reference Counted Smart Pointer</a></li><li class="expanded "><a href="ch15-05-interior-mutability.html"><strong aria-hidden="true">15.5.</strong> RefCell<T> and the Interior Mutability Pattern</a></li><li class="expanded "><a href="ch15-06-reference-cycles.html"><strong aria-hidden="true">15.6.</strong> Reference Cycles Can Leak Memory</a></li></ol></li><li class="expanded "><a href="ch16-00-concurrency.html"><strong aria-hidden="true">16.</strong> Fearless Concurrency</a></li><li><ol class="section"><li class="expanded "><a href="ch16-01-threads.html"><strong aria-hidden="true">16.1.</strong> Using Threads to Run Code Simultaneously</a></li><li class="expanded "><a href="ch16-02-message-passing.html"><strong aria-hidden="true">16.2.</strong> Using Message Passing to Transfer Data Between Threads</a></li><li class="expanded "><a href="ch16-03-shared-state.html"><strong aria-hidden="true">16.3.</strong> Shared-State Concurrency</a></li><li class="expanded "><a href="ch16-04-extensible-concurrency-sync-and-send.html"><strong aria-hidden="true">16.4.</strong> Extensible Concurrency with the Sync and Send Traits</a></li></ol></li><li class="expanded "><a href="ch17-00-oop.html"><strong aria-hidden="true">17.</strong> Object Oriented Programming Features of Rust</a></li><li><ol class="section"><li class="expanded "><a href="ch17-01-what-is-oo.html"><strong aria-hidden="true">17.1.</strong> Characteristics of Object-Oriented Languages</a></li><li class="expanded "><a href="ch17-02-trait-objects.html"><strong aria-hidden="true">17.2.</strong> Using Trait Objects That Allow for Values of Different Types</a></li><li class="expanded "><a href="ch17-03-oo-design-patterns.html"><strong aria-hidden="true">17.3.</strong> Implementing an Object-Oriented Design Pattern</a></li></ol></li><li class="expanded "><a href="ch18-00-patterns.html"><strong aria-hidden="true">18.</strong> Patterns and Matching</a></li><li><ol class="section"><li class="expanded "><a href="ch18-01-all-the-places-for-patterns.html"><strong aria-hidden="true">18.1.</strong> All the Places Patterns Can Be Used</a></li><li class="expanded "><a href="ch18-02-refutability.html"><strong aria-hidden="true">18.2.</strong> Refutability: Whether a Pattern Might Fail to Match</a></li><li class="expanded "><a href="ch18-03-pattern-syntax.html"><strong aria-hidden="true">18.3.</strong> Pattern Syntax</a></li></ol></li><li class="expanded "><a href="ch19-00-advanced-features.html"><strong aria-hidden="true">19.</strong> Advanced Features</a></li><li><ol class="section"><li class="expanded "><a href="ch19-01-unsafe-rust.html"><strong aria-hidden="true">19.1.</strong> Unsafe Rust</a></li><li class="expanded "><a href="ch19-03-advanced-traits.html"><strong aria-hidden="true">19.2.</strong> Advanced Traits</a></li><li class="expanded "><a href="ch19-04-advanced-types.html"><strong aria-hidden="true">19.3.</strong> Advanced Types</a></li><li class="expanded "><a href="ch19-05-advanced-functions-and-closures.html"><strong aria-hidden="true">19.4.</strong> Advanced Functions and Closures</a></li><li class="expanded "><a href="ch19-06-macros.html"><strong aria-hidden="true">19.5.</strong> Macros</a></li></ol></li><li class="expanded "><a href="ch20-00-final-project-a-web-server.html"><strong aria-hidden="true">20.</strong> Final Project: Building a Multithreaded Web Server</a></li><li><ol class="section"><li class="expanded "><a href="ch20-01-single-threaded.html"><strong aria-hidden="true">20.1.</strong> Building a Single-Threaded Web Server</a></li><li class="expanded "><a href="ch20-02-multithreaded.html"><strong aria-hidden="true">20.2.</strong> Turning Our Single-Threaded Server into a Multithreaded Server</a></li><li class="expanded "><a href="ch20-03-graceful-shutdown-and-cleanup.html"><strong aria-hidden="true">20.3.</strong> Graceful Shutdown and Cleanup</a></li></ol></li><li class="expanded "><a href="appendix-00.html"><strong aria-hidden="true">21.</strong> Appendix</a></li><li><ol class="section"><li class="expanded "><a href="appendix-01-keywords.html"><strong aria-hidden="true">21.1.</strong> A - Keywords</a></li><li class="expanded "><a href="appendix-02-operators.html"><strong aria-hidden="true">21.2.</strong> B - Operators and Symbols</a></li><li class="expanded "><a href="appendix-03-derivable-traits.html"><strong aria-hidden="true">21.3.</strong> C - Derivable Traits</a></li><li class="expanded "><a href="appendix-04-useful-development-tools.html"><strong aria-hidden="true">21.4.</strong> D - Useful Development Tools</a></li><li class="expanded "><a href="appendix-05-editions.html"><strong aria-hidden="true">21.5.</strong> E - Editions</a></li><li class="expanded "><a href="appendix-06-translation.html"><strong aria-hidden="true">21.6.</strong> F - Translations of the Book</a></li><li class="expanded "><a href="appendix-07-nightly-rust.html"><strong aria-hidden="true">21.7.</strong> G - How Rust is Made and “Nightly Rust”</a></li></ol></li></ol>
|
||
</div>
|
||
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
|
||
</nav>
|
||
|
||
<div id="page-wrapper" class="page-wrapper">
|
||
|
||
<div class="page">
|
||
|
||
<div id="menu-bar" class="menu-bar">
|
||
<div id="menu-bar-sticky-container">
|
||
<div class="left-buttons">
|
||
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
||
<i class="fa fa-bars"></i>
|
||
</button>
|
||
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
||
<i class="fa fa-paint-brush"></i>
|
||
</button>
|
||
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
||
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
||
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
||
</ul>
|
||
|
||
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
||
<i class="fa fa-search"></i>
|
||
</button>
|
||
|
||
</div>
|
||
|
||
<h1 class="menu-title">The Rust Programming Language</h1>
|
||
|
||
<div class="right-buttons">
|
||
<a href="print.html" title="Print this book" aria-label="Print this book">
|
||
<i id="print-button" class="fa fa-print"></i>
|
||
</a>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<div id="search-wrapper" class="hidden">
|
||
<form id="searchbar-outer" class="searchbar-outer">
|
||
<input type="search" name="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
||
</form>
|
||
<div id="searchresults-outer" class="searchresults-outer hidden">
|
||
<div id="searchresults-header" class="searchresults-header"></div>
|
||
<ul id="searchresults">
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
||
<script type="text/javascript">
|
||
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
||
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
||
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
||
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
||
});
|
||
</script>
|
||
|
||
<div id="content" class="content">
|
||
<main>
|
||
<h1><a class="header" href="#the-rust-programming-language" id="the-rust-programming-language">The Rust Programming Language</a></h1>
|
||
<p><em>by Steve Klabnik and Carol Nichols, with contributions from the Rust Community</em></p>
|
||
<p>This version of the text assumes you’re using Rust 1.37.0 or later with
|
||
<code>edition="2018"</code> in <em>Cargo.toml</em> of all projects to use Rust 2018 Edition
|
||
idioms. See the <a href="ch01-01-installation.html">“Installation” section of Chapter 1</a><!-- ignore -->
|
||
to install or update Rust, and see the new <a href="appendix-05-editions.html">Appendix E</a><!-- ignore
|
||
--> for information on editions.</p>
|
||
<p>The 2018 Edition of the Rust language includes a number of improvements that
|
||
make Rust more ergonomic and easier to learn. This iteration of the book
|
||
contains a number of changes to reflect those improvements:</p>
|
||
<ul>
|
||
<li>Chapter 7, “Managing Growing Projects with Packages, Crates, and Modules,”
|
||
has been mostly rewritten. The module system and the way paths work in the
|
||
2018 Edition were made more consistent.</li>
|
||
<li>Chapter 10 has new sections titled “Traits as Parameters” and “Returning
|
||
Types that Implement Traits” that explain the new <code>impl Trait</code> syntax.</li>
|
||
<li>Chapter 11 has a new section titled “Using <code>Result<T, E></code> in Tests” that
|
||
shows how to write tests that use the <code>?</code> operator.</li>
|
||
<li>The “Advanced Lifetimes” section in Chapter 19 was removed because compiler
|
||
improvements have made the constructs in that section even rarer.</li>
|
||
<li>The previous Appendix D, “Macros,” has been expanded to include procedural
|
||
macros and was moved to the “Macros” section in Chapter 19.</li>
|
||
<li>Appendix A, “Keywords,” also explains the new raw identifiers feature that
|
||
enables code written in the 2015 Edition and the 2018 Edition to interoperate.</li>
|
||
<li>Appendix D is now titled “Useful Development Tools” and covers recently
|
||
released tools that help you write Rust code.</li>
|
||
<li>We fixed a number of small errors and imprecise wording throughout the book.
|
||
Thank you to the readers who reported them!</li>
|
||
</ul>
|
||
<p>Note that any code in earlier iterations of <em>The Rust Programming Language</em>
|
||
that compiled will continue to compile without <code>edition="2018"</code> in the
|
||
project’s <em>Cargo.toml</em>, even as you update the Rust compiler version you’re
|
||
using. That’s Rust’s backward compatibility guarantees at work!</p>
|
||
<p>The HTML format is available online at
|
||
<a href="https://doc.rust-lang.org/stable/book/">https://doc.rust-lang.org/stable/book/</a>
|
||
and offline with installations of Rust made with <code>rustup</code>; run <code>rustup docs --book</code> to open.</p>
|
||
<p>This text is available in <a href="https://nostarch.com/rust">paperback and ebook format from No Starch
|
||
Press</a>.</p>
|
||
<h1><a class="header" href="#foreword" id="foreword">Foreword</a></h1>
|
||
<p>It wasn’t always so clear, but the Rust programming language is fundamentally
|
||
about <em>empowerment</em>: no matter what kind of code you are writing now, Rust
|
||
empowers you to reach farther, to program with confidence in a wider variety of
|
||
domains than you did before.</p>
|
||
<p>Take, for example, “systems-level” work that deals with low-level details of
|
||
memory management, data representation, and concurrency. Traditionally, this
|
||
realm of programming is seen as arcane, accessible only to a select few who
|
||
have devoted the necessary years learning to avoid its infamous pitfalls. And
|
||
even those who practice it do so with caution, lest their code be open to
|
||
exploits, crashes, or corruption.</p>
|
||
<p>Rust breaks down these barriers by eliminating the old pitfalls and providing a
|
||
friendly, polished set of tools to help you along the way. Programmers who need
|
||
to “dip down” into lower-level control can do so with Rust, without taking on
|
||
the customary risk of crashes or security holes, and without having to learn
|
||
the fine points of a fickle toolchain. Better yet, the language is designed to
|
||
guide you naturally towards reliable code that is efficient in terms of speed
|
||
and memory usage.</p>
|
||
<p>Programmers who are already working with low-level code can use Rust to raise
|
||
their ambitions. For example, introducing parallelism in Rust is a relatively
|
||
low-risk operation: the compiler will catch the classical mistakes for you. And
|
||
you can tackle more aggressive optimizations in your code with the confidence
|
||
that you won’t accidentally introduce crashes or vulnerabilities.</p>
|
||
<p>But Rust isn’t limited to low-level systems programming. It’s expressive and
|
||
ergonomic enough to make CLI apps, web servers, and many other kinds of code
|
||
quite pleasant to write — you’ll find simple examples of both later in the
|
||
book. Working with Rust allows you to build skills that transfer from one
|
||
domain to another; you can learn Rust by writing a web app, then apply those
|
||
same skills to target your Raspberry Pi.</p>
|
||
<p>This book fully embraces the potential of Rust to empower its users. It’s a
|
||
friendly and approachable text intended to help you level up not just your
|
||
knowledge of Rust, but also your reach and confidence as a programmer in
|
||
general. So dive in, get ready to learn—and welcome to the Rust community!</p>
|
||
<p>— Nicholas Matsakis and Aaron Turon</p>
|
||
<h1><a class="header" href="#introduction" id="introduction">Introduction</a></h1>
|
||
<blockquote>
|
||
<p>Note: This edition of the book is the same as <a href="https://nostarch.com/rust">The Rust Programming
|
||
Language</a> available in print and ebook format from <a href="https://nostarch.com/">No Starch
|
||
Press</a>.</p>
|
||
</blockquote>
|
||
<p>Welcome to <em>The Rust Programming Language</em>, an introductory book about Rust.
|
||
The Rust programming language helps you write faster, more reliable software.
|
||
High-level ergonomics and low-level control are often at odds in programming
|
||
language design; Rust challenges that conflict. Through balancing powerful
|
||
technical capacity and a great developer experience, Rust gives you the option
|
||
to control low-level details (such as memory usage) without all the hassle
|
||
traditionally associated with such control.</p>
|
||
<h2><a class="header" href="#who-rust-is-for" id="who-rust-is-for">Who Rust Is For</a></h2>
|
||
<p>Rust is ideal for many people for a variety of reasons. Let’s look at a few of
|
||
the most important groups.</p>
|
||
<h3><a class="header" href="#teams-of-developers" id="teams-of-developers">Teams of Developers</a></h3>
|
||
<p>Rust is proving to be a productive tool for collaborating among large teams of
|
||
developers with varying levels of systems programming knowledge. Low-level code
|
||
is prone to a variety of subtle bugs, which in most other languages can be
|
||
caught only through extensive testing and careful code review by experienced
|
||
developers. In Rust, the compiler plays a gatekeeper role by refusing to
|
||
compile code with these elusive bugs, including concurrency bugs. By working
|
||
alongside the compiler, the team can spend their time focusing on the program’s
|
||
logic rather than chasing down bugs.</p>
|
||
<p>Rust also brings contemporary developer tools to the systems programming world:</p>
|
||
<ul>
|
||
<li>Cargo, the included dependency manager and build tool, makes adding,
|
||
compiling, and managing dependencies painless and consistent across the Rust
|
||
ecosystem.</li>
|
||
<li>Rustfmt ensures a consistent coding style across developers.</li>
|
||
<li>The Rust Language Server powers Integrated Development Environment (IDE)
|
||
integration for code completion and inline error messages.</li>
|
||
</ul>
|
||
<p>By using these and other tools in the Rust ecosystem, developers can be
|
||
productive while writing systems-level code.</p>
|
||
<h3><a class="header" href="#students" id="students">Students</a></h3>
|
||
<p>Rust is for students and those who are interested in learning about systems
|
||
concepts. Using Rust, many people have learned about topics like operating
|
||
systems development. The community is very welcoming and happy to answer
|
||
student questions. Through efforts such as this book, the Rust teams want to
|
||
make systems concepts more accessible to more people, especially those new to
|
||
programming.</p>
|
||
<h3><a class="header" href="#companies" id="companies">Companies</a></h3>
|
||
<p>Hundreds of companies, large and small, use Rust in production for a variety of
|
||
tasks. Those tasks include command line tools, web services, DevOps tooling,
|
||
embedded devices, audio and video analysis and transcoding, cryptocurrencies,
|
||
bioinformatics, search engines, Internet of Things applications, machine
|
||
learning, and even major parts of the Firefox web browser.</p>
|
||
<h3><a class="header" href="#open-source-developers" id="open-source-developers">Open Source Developers</a></h3>
|
||
<p>Rust is for people who want to build the Rust programming language, community,
|
||
developer tools, and libraries. We’d love to have you contribute to the Rust
|
||
language.</p>
|
||
<h3><a class="header" href="#people-who-value-speed-and-stability" id="people-who-value-speed-and-stability">People Who Value Speed and Stability</a></h3>
|
||
<p>Rust is for people who crave speed and stability in a language. By speed, we
|
||
mean the speed of the programs that you can create with Rust and the speed at
|
||
which Rust lets you write them. The Rust compiler’s checks ensure stability
|
||
through feature additions and refactoring. This is in contrast to the brittle
|
||
legacy code in languages without these checks, which developers are often
|
||
afraid to modify. By striving for zero-cost abstractions, higher-level features
|
||
that compile to lower-level code as fast as code written manually, Rust
|
||
endeavors to make safe code be fast code as well.</p>
|
||
<p>The Rust language hopes to support many other users as well; those mentioned
|
||
here are merely some of the biggest stakeholders. Overall, Rust’s greatest
|
||
ambition is to eliminate the trade-offs that programmers have accepted for
|
||
decades by providing safety <em>and</em> productivity, speed <em>and</em> ergonomics. Give
|
||
Rust a try and see if its choices work for you.</p>
|
||
<h2><a class="header" href="#who-this-book-is-for" id="who-this-book-is-for">Who This Book Is For</a></h2>
|
||
<p>This book assumes that you’ve written code in another programming language but
|
||
doesn’t make any assumptions about which one. We’ve tried to make the material
|
||
broadly accessible to those from a wide variety of programming backgrounds. We
|
||
don’t spend a lot of time talking about what programming <em>is</em> or how to think
|
||
about it. If you’re entirely new to programming, you would be better served by
|
||
reading a book that specifically provides an introduction to programming.</p>
|
||
<h2><a class="header" href="#how-to-use-this-book" id="how-to-use-this-book">How to Use This Book</a></h2>
|
||
<p>In general, this book assumes that you’re reading it in sequence from front to
|
||
back. Later chapters build on concepts in earlier chapters, and earlier
|
||
chapters might not delve into details on a topic; we typically revisit the
|
||
topic in a later chapter.</p>
|
||
<p>You’ll find two kinds of chapters in this book: concept chapters and project
|
||
chapters. In concept chapters, you’ll learn about an aspect of Rust. In project
|
||
chapters, we’ll build small programs together, applying what you’ve learned so
|
||
far. Chapters 2, 12, and 20 are project chapters; the rest are concept chapters.</p>
|
||
<p>Chapter 1 explains how to install Rust, how to write a “Hello, world!” program,
|
||
and how to use Cargo, Rust’s package manager and build tool. Chapter 2 is a
|
||
hands-on introduction to the Rust language. Here we cover concepts at a high
|
||
level, and later chapters will provide additional detail. If you want to get
|
||
your hands dirty right away, Chapter 2 is the place for that. At first, you
|
||
might even want to skip Chapter 3, which covers Rust features similar to those
|
||
of other programming languages, and head straight to Chapter 4 to learn about
|
||
Rust’s ownership system. However, if you’re a particularly meticulous learner
|
||
who prefers to learn every detail before moving on to the next, you might want
|
||
to skip Chapter 2 and go straight to Chapter 3, returning to Chapter 2 when
|
||
you’d like to work on a project applying the details you’ve learned.</p>
|
||
<p>Chapter 5 discusses structs and methods, and Chapter 6 covers enums, <code>match</code>
|
||
expressions, and the <code>if let</code> control flow construct. You’ll use structs and
|
||
enums to make custom types in Rust.</p>
|
||
<p>In Chapter 7, you’ll learn about Rust’s module system and about privacy rules
|
||
for organizing your code and its public Application Programming Interface
|
||
(API). Chapter 8 discusses some common collection data structures that the
|
||
standard library provides, such as vectors, strings, and hash maps. Chapter 9
|
||
explores Rust’s error-handling philosophy and techniques.</p>
|
||
<p>Chapter 10 digs into generics, traits, and lifetimes, which give you the power
|
||
to define code that applies to multiple types. Chapter 11 is all about testing,
|
||
which even with Rust’s safety guarantees is necessary to ensure your program’s
|
||
logic is correct. In Chapter 12, we’ll build our own implementation of a subset
|
||
of functionality from the <code>grep</code> command line tool that searches for text
|
||
within files. For this, we’ll use many of the concepts we discussed in the
|
||
previous chapters.</p>
|
||
<p>Chapter 13 explores closures and iterators: features of Rust that come from
|
||
functional programming languages. In Chapter 14, we’ll examine Cargo in more
|
||
depth and talk about best practices for sharing your libraries with others.
|
||
Chapter 15 discusses smart pointers that the standard library provides and the
|
||
traits that enable their functionality.</p>
|
||
<p>In Chapter 16, we’ll walk through different models of concurrent programming
|
||
and talk about how Rust helps you to program in multiple threads fearlessly.
|
||
Chapter 17 looks at how Rust idioms compare to object-oriented programming
|
||
principles you might be familiar with.</p>
|
||
<p>Chapter 18 is a reference on patterns and pattern matching, which are powerful
|
||
ways of expressing ideas throughout Rust programs. Chapter 19 contains a
|
||
smorgasbord of advanced topics of interest, including unsafe Rust, macros, and
|
||
more about lifetimes, traits, types, functions, and closures.</p>
|
||
<p>In Chapter 20, we’ll complete a project in which we’ll implement a low-level
|
||
multithreaded web server!</p>
|
||
<p>Finally, some appendixes contain useful information about the language in a
|
||
more reference-like format. Appendix A covers Rust’s keywords, Appendix B
|
||
covers Rust’s operators and symbols, Appendix C covers derivable traits
|
||
provided by the standard library, Appendix D covers some useful development
|
||
tools, and Appendix E explains Rust editions.</p>
|
||
<p>There is no wrong way to read this book: if you want to skip ahead, go for it!
|
||
You might have to jump back to earlier chapters if you experience any
|
||
confusion. But do whatever works for you.</p>
|
||
<p><span id="ferris"></span></p>
|
||
<p>An important part of the process of learning Rust is learning how to read the
|
||
error messages the compiler displays: these will guide you toward working code.
|
||
As such, we’ll provide many examples that don’t compile along with the error
|
||
message the compiler will show you in each situation. Know that if you enter
|
||
and run a random example, it may not compile! Make sure you read the
|
||
surrounding text to see whether the example you’re trying to run is meant to
|
||
error. Ferris will also help you distinguish code that isn’t meant to work:</p>
|
||
<table><thead><tr><th>Ferris</th><th>Meaning</th></tr></thead><tbody>
|
||
<tr><td><img src="img/ferris/does_not_compile.svg" class="ferris-explain"/></td><td>This code does not compile!</td></tr>
|
||
<tr><td><img src="img/ferris/panics.svg" class="ferris-explain"/></td><td>This code panics!</td></tr>
|
||
<tr><td><img src="img/ferris/unsafe.svg" class="ferris-explain"/></td><td>This code block contains unsafe code.</td></tr>
|
||
<tr><td><img src="img/ferris/not_desired_behavior.svg" class="ferris-explain"/></td><td>This code does not produce the desired behavior.</td></tr>
|
||
</tbody></table>
|
||
<p>In most situations, we’ll lead you to the correct version of any code that
|
||
doesn’t compile.</p>
|
||
<h2><a class="header" href="#source-code" id="source-code">Source Code</a></h2>
|
||
<p>The source files from which this book is generated can be found on
|
||
<a href="https://github.com/rust-lang/book/tree/master/src">GitHub</a>.</p>
|
||
<h1><a class="header" href="#getting-started" id="getting-started">Getting Started</a></h1>
|
||
<p>Let’s start your Rust journey! There’s a lot to learn, but every journey starts
|
||
somewhere. In this chapter, we’ll discuss:</p>
|
||
<ul>
|
||
<li>Installing Rust on Linux, macOS, and Windows</li>
|
||
<li>Writing a program that prints <code>Hello, world!</code></li>
|
||
<li>Using <code>cargo</code>, Rust’s package manager and build system</li>
|
||
</ul>
|
||
<h2><a class="header" href="#installation" id="installation">Installation</a></h2>
|
||
<p>The first step is to install Rust. We’ll download Rust through <code>rustup</code>, a
|
||
command line tool for managing Rust versions and associated tools. You’ll need
|
||
an internet connection for the download.</p>
|
||
<blockquote>
|
||
<p>Note: If you prefer not to use <code>rustup</code> for some reason, please see <a href="https://www.rust-lang.org/tools/install">the Rust
|
||
installation page</a> for other options.</p>
|
||
</blockquote>
|
||
<p>The following steps install the latest stable version of the Rust compiler.
|
||
Rust’s stability guarantees ensure that all the examples in the book that
|
||
compile will continue to compile with newer Rust versions. The output might
|
||
differ slightly between versions, because Rust often improves error messages
|
||
and warnings. In other words, any newer, stable version of Rust you install
|
||
using these steps should work as expected with the content of this book.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#command-line-notation" id="command-line-notation">Command Line Notation</a></h3>
|
||
<p>In this chapter and throughout the book, we’ll show some commands used in the
|
||
terminal. Lines that you should enter in a terminal all start with <code>$</code>. You
|
||
don’t need to type in the <code>$</code> character; it indicates the start of each
|
||
command. Lines that don’t start with <code>$</code> typically show the output of the
|
||
previous command. Additionally, PowerShell-specific examples will use <code>></code>
|
||
rather than <code>$</code>.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#installing-rustup-on-linux-or-macos" id="installing-rustup-on-linux-or-macos">Installing <code>rustup</code> on Linux or macOS</a></h3>
|
||
<p>If you’re using Linux or macOS, open a terminal and enter the following command:</p>
|
||
<pre><code class="language-text">$ curl https://sh.rustup.rs -sSf | sh
|
||
</code></pre>
|
||
<p>The command downloads a script and starts the installation of the <code>rustup</code>
|
||
tool, which installs the latest stable version of Rust. You might be prompted
|
||
for your password. If the install is successful, the following line will appear:</p>
|
||
<pre><code class="language-text">Rust is installed now. Great!
|
||
</code></pre>
|
||
<p>If you prefer, feel free to download the script and inspect it before running
|
||
it.</p>
|
||
<p>The installation script automatically adds Rust to your system PATH after your
|
||
next login. If you want to start using Rust right away instead of restarting
|
||
your terminal, run the following command in your shell to add Rust to your
|
||
system PATH manually:</p>
|
||
<pre><code class="language-text">$ source $HOME/.cargo/env
|
||
</code></pre>
|
||
<p>Alternatively, you can add the following line to your <em>~/.bash_profile</em>:</p>
|
||
<pre><code class="language-text">$ export PATH="$HOME/.cargo/bin:$PATH"
|
||
</code></pre>
|
||
<p>Additionally, you’ll need a linker of some kind. It’s likely one is already
|
||
installed, but when you try to compile a Rust program and get errors indicating
|
||
that a linker could not execute, that means a linker isn’t installed on your
|
||
system and you’ll need to install one manually. C compilers usually come with
|
||
the correct linker. Check your platform’s documentation for how to install a C
|
||
compiler. Also, some common Rust packages depend on C code and will need a C
|
||
compiler. Therefore, it might be worth installing one now.</p>
|
||
<h3><a class="header" href="#installing-rustup-on-windows" id="installing-rustup-on-windows">Installing <code>rustup</code> on Windows</a></h3>
|
||
<p>On Windows, go to <a href="https://www.rust-lang.org/tools/install">https://www.rust-lang.org/tools/install</a> and follow
|
||
the instructions for installing Rust. At some point in the installation, you’ll
|
||
receive a message explaining that you’ll also need the C++ build tools for
|
||
Visual Studio 2013 or later. The easiest way to acquire the build tools is to
|
||
install <a href="https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2019">Build Tools for Visual Studio 2019</a>. The tools are in
|
||
the Other Tools and Frameworks section.</p>
|
||
<p>The rest of this book uses commands that work in both <em>cmd.exe</em> and PowerShell.
|
||
If there are specific differences, we’ll explain which to use.</p>
|
||
<h3><a class="header" href="#updating-and-uninstalling" id="updating-and-uninstalling">Updating and Uninstalling</a></h3>
|
||
<p>After you’ve installed Rust via <code>rustup</code>, updating to the latest version is
|
||
easy. From your shell, run the following update script:</p>
|
||
<pre><code class="language-text">$ rustup update
|
||
</code></pre>
|
||
<p>To uninstall Rust and <code>rustup</code>, run the following uninstall script from your
|
||
shell:</p>
|
||
<pre><code class="language-text">$ rustup self uninstall
|
||
</code></pre>
|
||
<h3><a class="header" href="#troubleshooting" id="troubleshooting">Troubleshooting</a></h3>
|
||
<p>To check whether you have Rust installed correctly, open a shell and enter this
|
||
line:</p>
|
||
<pre><code class="language-text">$ rustc --version
|
||
</code></pre>
|
||
<p>You should see the version number, commit hash, and commit date for the latest
|
||
stable version that has been released in the following format:</p>
|
||
<pre><code class="language-text">rustc x.y.z (abcabcabc yyyy-mm-dd)
|
||
</code></pre>
|
||
<p>If you see this information, you have installed Rust successfully! If you don’t
|
||
see this information and you’re on Windows, check that Rust is in your <code>%PATH%</code>
|
||
system variable. If that’s all correct and Rust still isn’t working, there are
|
||
a number of places you can get help. The easiest is the #beginners channel on
|
||
<a href="https://discord.gg/rust-lang">the official Rust Discord</a>. There, you can chat with other Rustaceans
|
||
(a silly nickname we call ourselves) who can help you out. Other great
|
||
resources include <a href="https://users.rust-lang.org/">the Users forum</a> and <a href="http://stackoverflow.com/questions/tagged/rust">Stack Overflow</a>.</p>
|
||
<h3><a class="header" href="#local-documentation" id="local-documentation">Local Documentation</a></h3>
|
||
<p>The installation of Rust also includes a copy of the documentation locally, so
|
||
you can read it offline. Run <code>rustup doc</code> to open the local documentation in
|
||
your browser.</p>
|
||
<p>Any time a type or function is provided by the standard library and you’re not
|
||
sure what it does or how to use it, use the application programming interface
|
||
(API) documentation to find out!</p>
|
||
<h2><a class="header" href="#hello-world" id="hello-world">Hello, World!</a></h2>
|
||
<p>Now that you’ve installed Rust, let’s write your first Rust program. It’s
|
||
traditional when learning a new language to write a little program that prints
|
||
the text <code>Hello, world!</code> to the screen, so we’ll do the same here!</p>
|
||
<blockquote>
|
||
<p>Note: This book assumes basic familiarity with the command line. Rust makes
|
||
no specific demands about your editing or tooling or where your code lives, so
|
||
if you prefer to use an integrated development environment (IDE) instead of
|
||
the command line, feel free to use your favorite IDE. Many IDEs now have some
|
||
degree of Rust support; check the IDE’s documentation for details. Recently,
|
||
the Rust team has been focusing on enabling great IDE support, and progress
|
||
has been made rapidly on that front!</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#creating-a-project-directory" id="creating-a-project-directory">Creating a Project Directory</a></h3>
|
||
<p>You’ll start by making a directory to store your Rust code. It doesn’t matter
|
||
to Rust where your code lives, but for the exercises and projects in this book,
|
||
we suggest making a <em>projects</em> directory in your home directory and keeping all
|
||
your projects there.</p>
|
||
<p>Open a terminal and enter the following commands to make a <em>projects</em> directory
|
||
and a directory for the “Hello, world!” project within the <em>projects</em> directory.</p>
|
||
<p>For Linux, macOS, and PowerShell on Windows, enter this:</p>
|
||
<pre><code class="language-text">$ mkdir ~/projects
|
||
$ cd ~/projects
|
||
$ mkdir hello_world
|
||
$ cd hello_world
|
||
</code></pre>
|
||
<p>For Windows CMD, enter this:</p>
|
||
<pre><code class="language-cmd">> mkdir "%USERPROFILE%\projects"
|
||
> cd /d "%USERPROFILE%\projects"
|
||
> mkdir hello_world
|
||
> cd hello_world
|
||
</code></pre>
|
||
<h3><a class="header" href="#writing-and-running-a-rust-program" id="writing-and-running-a-rust-program">Writing and Running a Rust Program</a></h3>
|
||
<p>Next, make a new source file and call it <em>main.rs</em>. Rust files always end with
|
||
the <em>.rs</em> extension. If you’re using more than one word in your filename, use
|
||
an underscore to separate them. For example, use <em>hello_world.rs</em> rather than
|
||
<em>helloworld.rs</em>.</p>
|
||
<p>Now open the <em>main.rs</em> file you just created and enter the code in Listing 1-1.</p>
|
||
<p><span class="filename">Filename: main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
println!("Hello, world!");
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 1-1: A program that prints <code>Hello, world!</code></span></p>
|
||
<p>Save the file and go back to your terminal window. On Linux or macOS, enter
|
||
the following commands to compile and run the file:</p>
|
||
<pre><code class="language-text">$ rustc main.rs
|
||
$ ./main
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>On Windows, enter the command <code>.\main.exe</code> instead of <code>./main</code>:</p>
|
||
<pre><code class="language-powershell">> rustc main.rs
|
||
> .\main.exe
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>Regardless of your operating system, the string <code>Hello, world!</code> should print to
|
||
the terminal. If you don’t see this output, refer back to the
|
||
<a href="ch01-01-installation.html#troubleshooting">“Troubleshooting”</a><!-- ignore --> part of the Installation
|
||
section for ways to get help.</p>
|
||
<p>If <code>Hello, world!</code> did print, congratulations! You’ve officially written a Rust
|
||
program. That makes you a Rust programmer—welcome!</p>
|
||
<h3><a class="header" href="#anatomy-of-a-rust-program" id="anatomy-of-a-rust-program">Anatomy of a Rust Program</a></h3>
|
||
<p>Let’s review in detail what just happened in your “Hello, world!” program.
|
||
Here’s the first piece of the puzzle:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
|
||
}
|
||
</code></pre></pre>
|
||
<p>These lines define a function in Rust. The <code>main</code> function is special: it is
|
||
always the first code that runs in every executable Rust program. The first
|
||
line declares a function named <code>main</code> that has no parameters and returns
|
||
nothing. If there were parameters, they would go inside the parentheses, <code>()</code>.</p>
|
||
<p>Also, note that the function body is wrapped in curly brackets, <code>{}</code>. Rust
|
||
requires these around all function bodies. It’s good style to place the opening
|
||
curly bracket on the same line as the function declaration, adding one space in
|
||
between.</p>
|
||
<p>At the time of this writing, an automatic formatter tool called <code>rustfmt</code> is
|
||
under development. If you want to stick to a standard style across Rust
|
||
projects, <code>rustfmt</code> will format your code in a particular style. The Rust team
|
||
plans to eventually include this tool with the standard Rust distribution, like
|
||
<code>rustc</code>. So depending on when you read this book, it might already be installed
|
||
on your computer! Check the online documentation for more details.</p>
|
||
<p>Inside the <code>main</code> function is the following code:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span> println!("Hello, world!");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This line does all the work in this little program: it prints text to the
|
||
screen. There are four important details to notice here. First, Rust style is
|
||
to indent with four spaces, not a tab.</p>
|
||
<p>Second, <code>println!</code> calls a Rust macro. If it called a function instead, it
|
||
would be entered as <code>println</code> (without the <code>!</code>). We’ll discuss Rust macros in
|
||
more detail in Chapter 19. For now, you just need to know that using a <code>!</code>
|
||
means that you’re calling a macro instead of a normal function.</p>
|
||
<p>Third, you see the <code>"Hello, world!"</code> string. We pass this string as an argument
|
||
to <code>println!</code>, and the string is printed to the screen.</p>
|
||
<p>Fourth, we end the line with a semicolon (<code>;</code>), which indicates that this
|
||
expression is over and the next one is ready to begin. Most lines of Rust code
|
||
end with a semicolon.</p>
|
||
<h3><a class="header" href="#compiling-and-running-are-separate-steps" id="compiling-and-running-are-separate-steps">Compiling and Running Are Separate Steps</a></h3>
|
||
<p>You’ve just run a newly created program, so let’s examine each step in the
|
||
process.</p>
|
||
<p>Before running a Rust program, you must compile it using the Rust compiler by
|
||
entering the <code>rustc</code> command and passing it the name of your source file, like
|
||
this:</p>
|
||
<pre><code class="language-text">$ rustc main.rs
|
||
</code></pre>
|
||
<p>If you have a C or C++ background, you’ll notice that this is similar to <code>gcc</code>
|
||
or <code>clang</code>. After compiling successfully, Rust outputs a binary executable.</p>
|
||
<p>On Linux, macOS, and PowerShell on Windows, you can see the executable by
|
||
entering the <code>ls</code> command in your shell. On Linux and macOS, you’ll see two
|
||
files. With PowerShell on Windows, you’ll see the same three files that you
|
||
would see using CMD.</p>
|
||
<pre><code class="language-text">$ ls
|
||
main main.rs
|
||
</code></pre>
|
||
<p>With CMD on Windows, you would enter the following:</p>
|
||
<pre><code class="language-cmd">> dir /B %= the /B option says to only show the file names =%
|
||
main.exe
|
||
main.pdb
|
||
main.rs
|
||
</code></pre>
|
||
<p>This shows the source code file with the <em>.rs</em> extension, the executable file
|
||
(<em>main.exe</em> on Windows, but <em>main</em> on all other platforms), and, when using
|
||
Windows, a file containing debugging information with the <em>.pdb</em> extension.
|
||
From here, you run the <em>main</em> or <em>main.exe</em> file, like this:</p>
|
||
<pre><code class="language-text">$ ./main # or .\main.exe on Windows
|
||
</code></pre>
|
||
<p>If <em>main.rs</em> was your “Hello, world!” program, this line would print <code>Hello, world!</code> to your terminal.</p>
|
||
<p>If you’re more familiar with a dynamic language, such as Ruby, Python, or
|
||
JavaScript, you might not be used to compiling and running a program as
|
||
separate steps. Rust is an <em>ahead-of-time compiled</em> language, meaning you can
|
||
compile a program and give the executable to someone else, and they can run it
|
||
even without having Rust installed. If you give someone a <em>.rb</em>, <em>.py</em>, or
|
||
<em>.js</em> file, they need to have a Ruby, Python, or JavaScript implementation
|
||
installed (respectively). But in those languages, you only need one command to
|
||
compile and run your program. Everything is a trade-off in language design.</p>
|
||
<p>Just compiling with <code>rustc</code> is fine for simple programs, but as your project
|
||
grows, you’ll want to manage all the options and make it easy to share your
|
||
code. Next, we’ll introduce you to the Cargo tool, which will help you write
|
||
real-world Rust programs.</p>
|
||
<h2><a class="header" href="#hello-cargo" id="hello-cargo">Hello, Cargo!</a></h2>
|
||
<p>Cargo is Rust’s build system and package manager. Most Rustaceans use this tool
|
||
to manage their Rust projects because Cargo handles a lot of tasks for you,
|
||
such as building your code, downloading the libraries your code depends on, and
|
||
building those libraries. (We call libraries your code needs <em>dependencies</em>.)</p>
|
||
<p>The simplest Rust programs, like the one we’ve written so far, don’t have any
|
||
dependencies. So if we had built the “Hello, world!” project with Cargo, it
|
||
would only use the part of Cargo that handles building your code. As you write
|
||
more complex Rust programs, you’ll add dependencies, and if you start a project
|
||
using Cargo, adding dependencies will be much easier to do.</p>
|
||
<p>Because the vast majority of Rust projects use Cargo, the rest of this book
|
||
assumes that you’re using Cargo too. Cargo comes installed with Rust if you
|
||
used the official installers discussed in the
|
||
<a href="ch01-01-installation.html#installation">“Installation”</a><!-- ignore --> section. If you installed Rust
|
||
through some other means, check whether Cargo is installed by entering the
|
||
following into your terminal:</p>
|
||
<pre><code class="language-text">$ cargo --version
|
||
</code></pre>
|
||
<p>If you see a version number, you have it! If you see an error, such as <code>command not found</code>, look at the documentation for your method of installation to
|
||
determine how to install Cargo separately.</p>
|
||
<h3><a class="header" href="#creating-a-project-with-cargo" id="creating-a-project-with-cargo">Creating a Project with Cargo</a></h3>
|
||
<p>Let’s create a new project using Cargo and look at how it differs from our
|
||
original “Hello, world!” project. Navigate back to your <em>projects</em> directory (or
|
||
wherever you decided to store your code). Then, on any operating system, run
|
||
the following:</p>
|
||
<pre><code class="language-text">$ cargo new hello_cargo
|
||
$ cd hello_cargo
|
||
</code></pre>
|
||
<p>The first command creates a new directory called <em>hello_cargo</em>. We’ve named
|
||
our project <em>hello_cargo</em>, and Cargo creates its files in a directory of the
|
||
same name.</p>
|
||
<p>Go into the <em>hello_cargo</em> directory and list the files. You’ll see that Cargo
|
||
has generated two files and one directory for us: a <em>Cargo.toml</em> file and a
|
||
<em>src</em> directory with a <em>main.rs</em> file inside. It has also initialized a new Git
|
||
repository along with a <em>.gitignore</em> file.</p>
|
||
<blockquote>
|
||
<p>Note: Git is a common version control system. You can change <code>cargo new</code> to
|
||
use a different version control system or no version control system by using
|
||
the <code>--vcs</code> flag. Run <code>cargo new --help</code> to see the available options.</p>
|
||
</blockquote>
|
||
<p>Open <em>Cargo.toml</em> in your text editor of choice. It should look similar to the
|
||
code in Listing 1-2.</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[package]
|
||
name = "hello_cargo"
|
||
version = "0.1.0"
|
||
authors = ["Your Name <you@example.com>"]
|
||
edition = "2018"
|
||
|
||
[dependencies]
|
||
</code></pre>
|
||
<p><span class="caption">Listing 1-2: Contents of <em>Cargo.toml</em> generated by <code>cargo new</code></span></p>
|
||
<p>This file is in the <a href="https://github.com/toml-lang/toml"><em>TOML</em></a><!-- ignore --> (<em>Tom’s Obvious, Minimal
|
||
Language</em>) format, which is Cargo’s configuration format.</p>
|
||
<p>The first line, <code>[package]</code>, is a section heading that indicates that the
|
||
following statements are configuring a package. As we add more information to
|
||
this file, we’ll add other sections.</p>
|
||
<p>The next four lines set the configuration information Cargo needs to compile
|
||
your program: the name, the version, who wrote it, and the edition of Rust to
|
||
use. Cargo gets your name and email information from your environment, so if
|
||
that information is not correct, fix the information now and then save the
|
||
file. We’ll talk about the <code>edition</code> key in Appendix E.</p>
|
||
<p>The last line, <code>[dependencies]</code>, is the start of a section for you to list any
|
||
of your project’s dependencies. In Rust, packages of code are referred to as
|
||
<em>crates</em>. We won’t need any other crates for this project, but we will in the
|
||
first project in Chapter 2, so we’ll use this dependencies section then.</p>
|
||
<p>Now open <em>src/main.rs</em> and take a look:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
println!("Hello, world!");
|
||
}
|
||
</code></pre></pre>
|
||
<p>Cargo has generated a “Hello, world!” program for you, just like the one we
|
||
wrote in Listing 1-1! So far, the differences between our previous project and
|
||
the project Cargo generates are that Cargo placed the code in the <em>src</em>
|
||
directory, and we have a <em>Cargo.toml</em> configuration file in the top directory.</p>
|
||
<p>Cargo expects your source files to live inside the <em>src</em> directory. The
|
||
top-level project directory is just for README files, license information,
|
||
configuration files, and anything else not related to your code. Using Cargo
|
||
helps you organize your projects. There’s a place for everything, and
|
||
everything is in its place.</p>
|
||
<p>If you started a project that doesn’t use Cargo, as we did with the “Hello,
|
||
world!” project, you can convert it to a project that does use Cargo. Move the
|
||
project code into the <em>src</em> directory and create an appropriate <em>Cargo.toml</em>
|
||
file.</p>
|
||
<h3><a class="header" href="#building-and-running-a-cargo-project" id="building-and-running-a-cargo-project">Building and Running a Cargo Project</a></h3>
|
||
<p>Now let’s look at what’s different when we build and run the “Hello, world!”
|
||
program with Cargo! From your <em>hello_cargo</em> directory, build your project by
|
||
entering the following command:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling hello_cargo v0.1.0 (file:///projects/hello_cargo)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 2.85 secs
|
||
</code></pre>
|
||
<p>This command creates an executable file in <em>target/debug/hello_cargo</em> (or
|
||
<em>target\debug\hello_cargo.exe</em> on Windows) rather than in your current
|
||
directory. You can run the executable with this command:</p>
|
||
<pre><code class="language-text">$ ./target/debug/hello_cargo # or .\target\debug\hello_cargo.exe on Windows
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>If all goes well, <code>Hello, world!</code> should print to the terminal. Running <code>cargo build</code> for the first time also causes Cargo to create a new file at the top
|
||
level: <em>Cargo.lock</em>. This file keeps track of the exact versions of
|
||
dependencies in your project. This project doesn’t have dependencies, so the
|
||
file is a bit sparse. You won’t ever need to change this file manually; Cargo
|
||
manages its contents for you.</p>
|
||
<p>We just built a project with <code>cargo build</code> and ran it with
|
||
<code>./target/debug/hello_cargo</code>, but we can also use <code>cargo run</code> to compile the
|
||
code and then run the resulting executable all in one command:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/hello_cargo`
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>Notice that this time we didn’t see output indicating that Cargo was compiling
|
||
<code>hello_cargo</code>. Cargo figured out that the files hadn’t changed, so it just ran
|
||
the binary. If you had modified your source code, Cargo would have rebuilt the
|
||
project before running it, and you would have seen this output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling hello_cargo v0.1.0 (file:///projects/hello_cargo)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.33 secs
|
||
Running `target/debug/hello_cargo`
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>Cargo also provides a command called <code>cargo check</code>. This command quickly checks
|
||
your code to make sure it compiles but doesn’t produce an executable:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Checking hello_cargo v0.1.0 (file:///projects/hello_cargo)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.32 secs
|
||
</code></pre>
|
||
<p>Why would you not want an executable? Often, <code>cargo check</code> is much faster than
|
||
<code>cargo build</code>, because it skips the step of producing an executable. If you’re
|
||
continually checking your work while writing the code, using <code>cargo check</code> will
|
||
speed up the process! As such, many Rustaceans run <code>cargo check</code> periodically
|
||
as they write their program to make sure it compiles. Then they run <code>cargo build</code> when they’re ready to use the executable.</p>
|
||
<p>Let’s recap what we’ve learned so far about Cargo:</p>
|
||
<ul>
|
||
<li>We can build a project using <code>cargo build</code> or <code>cargo check</code>.</li>
|
||
<li>We can build and run a project in one step using <code>cargo run</code>.</li>
|
||
<li>Instead of saving the result of the build in the same directory as our code,
|
||
Cargo stores it in the <em>target/debug</em> directory.</li>
|
||
</ul>
|
||
<p>An additional advantage of using Cargo is that the commands are the same no
|
||
matter which operating system you’re working on. So, at this point, we’ll no
|
||
longer provide specific instructions for Linux and macOS versus Windows.</p>
|
||
<h3><a class="header" href="#building-for-release" id="building-for-release">Building for Release</a></h3>
|
||
<p>When your project is finally ready for release, you can use <code>cargo build --release</code> to compile it with optimizations. This command will create an
|
||
executable in <em>target/release</em> instead of <em>target/debug</em>. The optimizations
|
||
make your Rust code run faster, but turning them on lengthens the time it takes
|
||
for your program to compile. This is why there are two different profiles: one
|
||
for development, when you want to rebuild quickly and often, and another for
|
||
building the final program you’ll give to a user that won’t be rebuilt
|
||
repeatedly and that will run as fast as possible. If you’re benchmarking your
|
||
code’s running time, be sure to run <code>cargo build --release</code> and benchmark with
|
||
the executable in <em>target/release</em>.</p>
|
||
<h3><a class="header" href="#cargo-as-convention" id="cargo-as-convention">Cargo as Convention</a></h3>
|
||
<p>With simple projects, Cargo doesn’t provide a lot of value over just using
|
||
<code>rustc</code>, but it will prove its worth as your programs become more intricate.
|
||
With complex projects composed of multiple crates, it’s much easier to let
|
||
Cargo coordinate the build.</p>
|
||
<p>Even though the <code>hello_cargo</code> project is simple, it now uses much of the real
|
||
tooling you’ll use in the rest of your Rust career. In fact, to work on any
|
||
existing projects, you can use the following commands to check out the code
|
||
using Git, change to that project’s directory, and build:</p>
|
||
<pre><code class="language-text">$ git clone someurl.com/someproject
|
||
$ cd someproject
|
||
$ cargo build
|
||
</code></pre>
|
||
<p>For more information about Cargo, check out <a href="https://doc.rust-lang.org/cargo/">its documentation</a>.</p>
|
||
<h2><a class="header" href="#summary" id="summary">Summary</a></h2>
|
||
<p>You’re already off to a great start on your Rust journey! In this chapter,
|
||
you’ve learned how to:</p>
|
||
<ul>
|
||
<li>Install the latest stable version of Rust using <code>rustup</code></li>
|
||
<li>Update to a newer Rust version</li>
|
||
<li>Open locally installed documentation</li>
|
||
<li>Write and run a “Hello, world!” program using <code>rustc</code> directly</li>
|
||
<li>Create and run a new project using the conventions of Cargo</li>
|
||
</ul>
|
||
<p>This is a great time to build a more substantial program to get used to reading
|
||
and writing Rust code. So, in Chapter 2, we’ll build a guessing game program.
|
||
If you would rather start by learning how common programming concepts work in
|
||
Rust, see Chapter 3 and then return to Chapter 2.</p>
|
||
<h1><a class="header" href="#programming-a-guessing-game" id="programming-a-guessing-game">Programming a Guessing Game</a></h1>
|
||
<p>Let’s jump into Rust by working through a hands-on project together! This
|
||
chapter introduces you to a few common Rust concepts by showing you how to use
|
||
them in a real program. You’ll learn about <code>let</code>, <code>match</code>, methods, associated
|
||
functions, using external crates, and more! The following chapters will explore
|
||
these ideas in more detail. In this chapter, you’ll practice the fundamentals.</p>
|
||
<p>We’ll implement a classic beginner programming problem: a guessing game. Here’s
|
||
how it works: the program will generate a random integer between 1 and 100. It
|
||
will then prompt the player to enter a guess. After a guess is entered, the
|
||
program will indicate whether the guess is too low or too high. If the guess is
|
||
correct, the game will print a congratulatory message and exit.</p>
|
||
<h2><a class="header" href="#setting-up-a-new-project" id="setting-up-a-new-project">Setting Up a New Project</a></h2>
|
||
<p>To set up a new project, go to the <em>projects</em> directory that you created in
|
||
Chapter 1 and make a new project using Cargo, like so:</p>
|
||
<pre><code class="language-text">$ cargo new guessing_game
|
||
$ cd guessing_game
|
||
</code></pre>
|
||
<p>The first command, <code>cargo new</code>, takes the name of the project (<code>guessing_game</code>)
|
||
as the first argument. The second command changes to the new project’s
|
||
directory.</p>
|
||
<p>Look at the generated <em>Cargo.toml</em> file:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[package]
|
||
name = "guessing_game"
|
||
version = "0.1.0"
|
||
authors = ["Your Name <you@example.com>"]
|
||
edition = "2018"
|
||
|
||
[dependencies]
|
||
</code></pre>
|
||
<p>If the author information that Cargo obtained from your environment is not
|
||
correct, fix that in the file and save it again.</p>
|
||
<p>As you saw in Chapter 1, <code>cargo new</code> generates a “Hello, world!” program for
|
||
you. Check out the <em>src/main.rs</em> file:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
println!("Hello, world!");
|
||
}
|
||
</code></pre></pre>
|
||
<p>Now let’s compile this “Hello, world!” program and run it in the same step
|
||
using the <code>cargo run</code> command:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 1.50 secs
|
||
Running `target/debug/guessing_game`
|
||
Hello, world!
|
||
</code></pre>
|
||
<p>The <code>run</code> command comes in handy when you need to rapidly iterate on a project,
|
||
as we’ll do in this game, quickly testing each iteration before moving on to
|
||
the next one.</p>
|
||
<p>Reopen the <em>src/main.rs</em> file. You’ll be writing all the code in this file.</p>
|
||
<h2><a class="header" href="#processing-a-guess" id="processing-a-guess">Processing a Guess</a></h2>
|
||
<p>The first part of the guessing game program will ask for user input, process
|
||
that input, and check that the input is in the expected form. To start, we’ll
|
||
allow the player to input a guess. Enter the code in Listing 2-1 into
|
||
<em>src/main.rs</em>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::io;
|
||
|
||
fn main() {
|
||
println!("Guess the number!");
|
||
|
||
println!("Please input your guess.");
|
||
|
||
let mut guess = String::new();
|
||
|
||
io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
|
||
println!("You guessed: {}", guess);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-1: Code that gets a guess from the user and
|
||
prints it</span></p>
|
||
<p>This code contains a lot of information, so let’s go over it line by line. To
|
||
obtain user input and then print the result as output, we need to bring the
|
||
<code>io</code> (input/output) library into scope. The <code>io</code> library comes from the
|
||
standard library (which is known as <code>std</code>):</p>
|
||
<pre><code class="language-rust ignore">use std::io;
|
||
</code></pre>
|
||
<p>By default, Rust brings only a few types into the scope of every program in
|
||
<a href="../std/prelude/index.html">the <em>prelude</em></a><!-- ignore -->. If a type you want to use isn’t in the
|
||
prelude, you have to bring that type into scope explicitly with a <code>use</code>
|
||
statement. Using the <code>std::io</code> library provides you with a number of useful
|
||
features, including the ability to accept user input.</p>
|
||
<p>As you saw in Chapter 1, the <code>main</code> function is the entry point into the
|
||
program:</p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
</code></pre>
|
||
<p>The <code>fn</code> syntax declares a new function, the parentheses, <code>()</code>, indicate there
|
||
are no parameters, and the curly bracket, <code>{</code>, starts the body of the function.</p>
|
||
<p>As you also learned in Chapter 1, <code>println!</code> is a macro that prints a string to
|
||
the screen:</p>
|
||
<pre><code class="language-rust ignore">println!("Guess the number!");
|
||
|
||
println!("Please input your guess.");
|
||
</code></pre>
|
||
<p>This code is printing a prompt stating what the game is and requesting input
|
||
from the user.</p>
|
||
<h3><a class="header" href="#storing-values-with-variables" id="storing-values-with-variables">Storing Values with Variables</a></h3>
|
||
<p>Next, we’ll create a place to store the user input, like this:</p>
|
||
<pre><code class="language-rust ignore">let mut guess = String::new();
|
||
</code></pre>
|
||
<p>Now the program is getting interesting! There’s a lot going on in this little
|
||
line. Notice that this is a <code>let</code> statement, which is used to create a
|
||
<em>variable</em>. Here’s another example:</p>
|
||
<pre><code class="language-rust ignore">let foo = bar;
|
||
</code></pre>
|
||
<p>This line creates a new variable named <code>foo</code> and binds it to the value of the
|
||
<code>bar</code> variable. In Rust, variables are immutable by default. We’ll be
|
||
discussing this concept in detail in the <a href="ch03-01-variables-and-mutability.html#variables-and-mutability">“Variables and
|
||
Mutability”</a><!-- ignore --> section in Chapter 3.
|
||
The following example shows how to use <code>mut</code> before the variable name to make
|
||
a variable mutable:</p>
|
||
<pre><code class="language-rust ignore">let foo = 5; // immutable
|
||
let mut bar = 5; // mutable
|
||
</code></pre>
|
||
<blockquote>
|
||
<p>Note: The <code>//</code> syntax starts a comment that continues until the end of the
|
||
line. Rust ignores everything in comments, which are discussed in more detail
|
||
in Chapter 3.</p>
|
||
</blockquote>
|
||
<p>Let’s return to the guessing game program. You now know that <code>let mut guess</code>
|
||
will introduce a mutable variable named <code>guess</code>. On the other side of the equal
|
||
sign (<code>=</code>) is the value that <code>guess</code> is bound to, which is the result of
|
||
calling <code>String::new</code>, a function that returns a new instance of a <code>String</code>.
|
||
<a href="../std/string/struct.String.html"><code>String</code></a><!-- ignore --> is a string type provided by the standard
|
||
library that is a growable, UTF-8 encoded bit of text.</p>
|
||
<p>The <code>::</code> syntax in the <code>::new</code> line indicates that <code>new</code> is an <em>associated
|
||
function</em> of the <code>String</code> type. An associated function is implemented on a type,
|
||
in this case <code>String</code>, rather than on a particular instance of a <code>String</code>. Some
|
||
languages call this a <em>static method</em>.</p>
|
||
<p>This <code>new</code> function creates a new, empty string. You’ll find a <code>new</code> function
|
||
on many types, because it’s a common name for a function that makes a new value
|
||
of some kind.</p>
|
||
<p>To summarize, the <code>let mut guess = String::new();</code> line has created a mutable
|
||
variable that is currently bound to a new, empty instance of a <code>String</code>. Whew!</p>
|
||
<p>Recall that we included the input/output functionality from the standard
|
||
library with <code>use std::io;</code> on the first line of the program. Now we’ll call
|
||
the <code>stdin</code> function from the <code>io</code> module:</p>
|
||
<pre><code class="language-rust ignore">io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
</code></pre>
|
||
<p>If we hadn’t put the <code>use std::io</code> line at the beginning of the program, we
|
||
could have written this function call as <code>std::io::stdin</code>. The <code>stdin</code> function
|
||
returns an instance of <a href="../std/io/struct.Stdin.html"><code>std::io::Stdin</code></a><!-- ignore -->, which is a
|
||
type that represents a handle to the standard input for your terminal.</p>
|
||
<p>The next part of the code, <code>.read_line(&mut guess)</code>, calls the
|
||
<a href="../std/io/struct.Stdin.html#method.read_line"><code>read_line</code></a><!-- ignore --> method on the standard input handle to
|
||
get input from the user. We’re also passing one argument to <code>read_line</code>: <code>&mut guess</code>.</p>
|
||
<p>The job of <code>read_line</code> is to take whatever the user types into standard input
|
||
and place that into a string, so it takes that string as an argument. The
|
||
string argument needs to be mutable so the method can change the string’s
|
||
content by adding the user input.</p>
|
||
<p>The <code>&</code> indicates that this argument is a <em>reference</em>, which gives you a way to
|
||
let multiple parts of your code access one piece of data without needing to
|
||
copy that data into memory multiple times. References are a complex feature,
|
||
and one of Rust’s major advantages is how safe and easy it is to use
|
||
references. You don’t need to know a lot of those details to finish this
|
||
program. For now, all you need to know is that like variables, references are
|
||
immutable by default. Hence, you need to write <code>&mut guess</code> rather than
|
||
<code>&guess</code> to make it mutable. (Chapter 4 will explain references more
|
||
thoroughly.)</p>
|
||
<h3><a class="header" href="#handling-potential-failure-with-the-result-type" id="handling-potential-failure-with-the-result-type">Handling Potential Failure with the <code>Result</code> Type</a></h3>
|
||
<p>We’re not quite done with this line of code. Although what we’ve discussed so
|
||
far is a single line of text, it’s only the first part of the single logical
|
||
line of code. The second part is this method:</p>
|
||
<pre><code class="language-rust ignore">.expect("Failed to read line");
|
||
</code></pre>
|
||
<p>When you call a method with the <code>.foo()</code> syntax, it’s often wise to introduce a
|
||
newline and other whitespace to help break up long lines. We could have
|
||
written this code as:</p>
|
||
<pre><code class="language-rust ignore">io::stdin().read_line(&mut guess).expect("Failed to read line");
|
||
</code></pre>
|
||
<p>However, one long line is difficult to read, so it’s best to divide it: two
|
||
lines for two method calls. Now let’s discuss what this line does.</p>
|
||
<p>As mentioned earlier, <code>read_line</code> puts what the user types into the string
|
||
we’re passing it, but it also returns a value—in this case, an
|
||
<a href="../std/io/type.Result.html"><code>io::Result</code></a><!-- ignore -->. Rust has a number of types named
|
||
<code>Result</code> in its standard library: a generic <a href="../std/result/enum.Result.html"><code>Result</code></a><!-- ignore -->
|
||
as well as specific versions for submodules, such as <code>io::Result</code>.</p>
|
||
<p>The <code>Result</code> types are <a href="ch06-00-enums.html"><em>enumerations</em></a><!-- ignore -->, often referred
|
||
to as <em>enums</em>. An enumeration is a type that can have a fixed set of values,
|
||
and those values are called the enum’s <em>variants</em>. Chapter 6 will cover enums
|
||
in more detail.</p>
|
||
<p>For <code>Result</code>, the variants are <code>Ok</code> or <code>Err</code>. The <code>Ok</code> variant indicates the
|
||
operation was successful, and inside <code>Ok</code> is the successfully generated value.
|
||
The <code>Err</code> variant means the operation failed, and <code>Err</code> contains information
|
||
about how or why the operation failed.</p>
|
||
<p>The purpose of these <code>Result</code> types is to encode error-handling information.
|
||
Values of the <code>Result</code> type, like values of any type, have methods defined on
|
||
them. An instance of <code>io::Result</code> has an <a href="../std/result/enum.Result.html#method.expect"><code>expect</code> method</a><!-- ignore
|
||
--> that you can call. If this instance of <code>io::Result</code> is an <code>Err</code> value,
|
||
<code>expect</code> will cause the program to crash and display the message that you
|
||
passed as an argument to <code>expect</code>. If the <code>read_line</code> method returns an <code>Err</code>,
|
||
it would likely be the result of an error coming from the underlying operating
|
||
system. If this instance of <code>io::Result</code> is an <code>Ok</code> value, <code>expect</code> will take
|
||
the return value that <code>Ok</code> is holding and return just that value to you so you
|
||
can use it. In this case, that value is the number of bytes in what the user
|
||
entered into standard input.</p>
|
||
<p>If you don’t call <code>expect</code>, the program will compile, but you’ll get a warning:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
warning: unused `std::result::Result` which must be used
|
||
--> src/main.rs:10:5
|
||
|
|
||
10 | io::stdin().read_line(&mut guess);
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
|
|
||
= note: #[warn(unused_must_use)] on by default
|
||
</code></pre>
|
||
<p>Rust warns that you haven’t used the <code>Result</code> value returned from <code>read_line</code>,
|
||
indicating that the program hasn’t handled a possible error.</p>
|
||
<p>The right way to suppress the warning is to actually write error handling, but
|
||
because you just want to crash this program when a problem occurs, you can use
|
||
<code>expect</code>. You’ll learn about recovering from errors in Chapter 9.</p>
|
||
<h3><a class="header" href="#printing-values-with-println-placeholders" id="printing-values-with-println-placeholders">Printing Values with <code>println!</code> Placeholders</a></h3>
|
||
<p>Aside from the closing curly brackets, there’s only one more line to discuss in
|
||
the code added so far, which is the following:</p>
|
||
<pre><code class="language-rust ignore">println!("You guessed: {}", guess);
|
||
</code></pre>
|
||
<p>This line prints the string we saved the user’s input in. The set of curly
|
||
brackets, <code>{}</code>, is a placeholder: think of <code>{}</code> as little crab pincers that
|
||
hold a value in place. You can print more than one value using curly brackets:
|
||
the first set of curly brackets holds the first value listed after the format
|
||
string, the second set holds the second value, and so on. Printing multiple
|
||
values in one call to <code>println!</code> would look like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
let y = 10;
|
||
|
||
println!("x = {} and y = {}", x, y);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code would print <code>x = 5 and y = 10</code>.</p>
|
||
<h3><a class="header" href="#testing-the-first-part" id="testing-the-first-part">Testing the First Part</a></h3>
|
||
<p>Let’s test the first part of the guessing game. Run it using <code>cargo run</code>:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 2.53 secs
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
Please input your guess.
|
||
6
|
||
You guessed: 6
|
||
</code></pre>
|
||
<p>At this point, the first part of the game is done: we’re getting input from the
|
||
keyboard and then printing it.</p>
|
||
<h2><a class="header" href="#generating-a-secret-number" id="generating-a-secret-number">Generating a Secret Number</a></h2>
|
||
<p>Next, we need to generate a secret number that the user will try to guess. The
|
||
secret number should be different every time so the game is fun to play more
|
||
than once. Let’s use a random number between 1 and 100 so the game isn’t too
|
||
difficult. Rust doesn’t yet include random number functionality in its standard
|
||
library. However, the Rust team does provide a <a href="https://crates.io/crates/rand"><code>rand</code> crate</a>.</p>
|
||
<h3><a class="header" href="#using-a-crate-to-get-more-functionality" id="using-a-crate-to-get-more-functionality">Using a Crate to Get More Functionality</a></h3>
|
||
<p>Remember that a crate is a collection of Rust source code files.
|
||
The project we’ve been building is a <em>binary crate</em>, which is an executable.
|
||
The <code>rand</code> crate is a <em>library crate</em>, which contains code intended to be
|
||
used in other programs.</p>
|
||
<p>Cargo’s use of external crates is where it really shines. Before we can write
|
||
code that uses <code>rand</code>, we need to modify the <em>Cargo.toml</em> file to include the
|
||
<code>rand</code> crate as a dependency. Open that file now and add the following line to
|
||
the bottom beneath the <code>[dependencies]</code> section header that Cargo created for
|
||
you:</p>
|
||
<!-- When updating the version of `rand` used, also update the version of
|
||
`rand` used in these files so they all match:
|
||
* ch07-04-bringing-paths-into-scope-with-the-use-keyword.md
|
||
* ch14-03-cargo-workspaces.md
|
||
-->
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
rand = "0.5.5"
|
||
</code></pre>
|
||
<p>In the <em>Cargo.toml</em> file, everything that follows a header is part of a section
|
||
that continues until another section starts. The <code>[dependencies]</code> section is
|
||
where you tell Cargo which external crates your project depends on and which
|
||
versions of those crates you require. In this case, we’ll specify the <code>rand</code>
|
||
crate with the semantic version specifier <code>0.5.5</code>. Cargo understands <a href="http://semver.org">Semantic
|
||
Versioning</a><!-- ignore --> (sometimes called <em>SemVer</em>), which is a
|
||
standard for writing version numbers. The number <code>0.5.5</code> is actually shorthand
|
||
for <code>^0.5.5</code>, which means “any version that has a public API compatible with
|
||
version 0.5.5.”</p>
|
||
<p>Now, without changing any of the code, let’s build the project, as shown in
|
||
Listing 2-2.</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Updating crates.io index
|
||
Downloaded rand v0.5.5
|
||
Downloaded libc v0.2.62
|
||
Downloaded rand_core v0.2.2
|
||
Downloaded rand_core v0.3.1
|
||
Downloaded rand_core v0.4.2
|
||
Compiling rand_core v0.4.2
|
||
Compiling libc v0.2.62
|
||
Compiling rand_core v0.3.1
|
||
Compiling rand_core v0.2.2
|
||
Compiling rand v0.5.5
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 2.53 s
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-2: The output from running <code>cargo build</code> after
|
||
adding the rand crate as a dependency</span></p>
|
||
<p>You may see different version numbers (but they will all be compatible with
|
||
the code, thanks to SemVer!), and the lines may be in a different order.</p>
|
||
<p>Now that we have an external dependency, Cargo fetches the latest versions of
|
||
everything from the <em>registry</em>, which is a copy of data from
|
||
<a href="https://crates.io/">Crates.io</a>. Crates.io is where people in the Rust ecosystem post
|
||
their open source Rust projects for others to use.</p>
|
||
<p>After updating the registry, Cargo checks the <code>[dependencies]</code> section and
|
||
downloads any crates you don’t have yet. In this case, although we only listed
|
||
<code>rand</code> as a dependency, Cargo also grabbed <code>libc</code> and <code>rand_core</code>, because <code>rand</code>
|
||
depends on those to work. After downloading the crates, Rust compiles them and
|
||
then compiles the project with the dependencies available.</p>
|
||
<p>If you immediately run <code>cargo build</code> again without making any changes, you
|
||
won’t get any output aside from the <code>Finished</code> line. Cargo knows it has already
|
||
downloaded and compiled the dependencies, and you haven’t changed anything
|
||
about them in your <em>Cargo.toml</em> file. Cargo also knows that you haven’t changed
|
||
anything about your code, so it doesn’t recompile that either. With nothing to
|
||
do, it simply exits.</p>
|
||
<p>If you open up the <em>src/main.rs</em> file, make a trivial change, and then save it
|
||
and build again, you’ll only see two lines of output:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 2.53s
|
||
</code></pre>
|
||
<p>These lines show Cargo only updates the build with your tiny change to the
|
||
<em>src/main.rs</em> file. Your dependencies haven’t changed, so Cargo knows it can
|
||
reuse what it has already downloaded and compiled for those. It just rebuilds
|
||
your part of the code.</p>
|
||
<h4><a class="header" href="#ensuring-reproducible-builds-with-the-cargolock-file" id="ensuring-reproducible-builds-with-the-cargolock-file">Ensuring Reproducible Builds with the <em>Cargo.lock</em> File</a></h4>
|
||
<p>Cargo has a mechanism that ensures you can rebuild the same artifact every time
|
||
you or anyone else builds your code: Cargo will use only the versions of the
|
||
dependencies you specified until you indicate otherwise. For example, what
|
||
happens if next week version 0.5.6 of the <code>rand</code> crate comes out and
|
||
contains an important bug fix but also contains a regression that will break
|
||
your code?</p>
|
||
<p>The answer to this problem is the <em>Cargo.lock</em> file, which was created the
|
||
first time you ran <code>cargo build</code> and is now in your <em>guessing_game</em> directory.
|
||
When you build a project for the first time, Cargo figures out all the
|
||
versions of the dependencies that fit the criteria and then writes them to
|
||
the <em>Cargo.lock</em> file. When you build your project in the future, Cargo will
|
||
see that the <em>Cargo.lock</em> file exists and use the versions specified there
|
||
rather than doing all the work of figuring out versions again. This lets you
|
||
have a reproducible build automatically. In other words, your project will
|
||
remain at <code>0.5.5</code> until you explicitly upgrade, thanks to the <em>Cargo.lock</em>
|
||
file.</p>
|
||
<h4><a class="header" href="#updating-a-crate-to-get-a-new-version" id="updating-a-crate-to-get-a-new-version">Updating a Crate to Get a New Version</a></h4>
|
||
<p>When you <em>do</em> want to update a crate, Cargo provides another command, <code>update</code>,
|
||
which will ignore the <em>Cargo.lock</em> file and figure out all the latest versions
|
||
that fit your specifications in <em>Cargo.toml</em>. If that works, Cargo will write
|
||
those versions to the <em>Cargo.lock</em> file.</p>
|
||
<p>But by default, Cargo will only look for versions greater than <code>0.5.5</code> and less
|
||
than <code>0.6.0</code>. If the <code>rand</code> crate has released two new versions, <code>0.5.6</code> and
|
||
<code>0.6.0</code>, you would see the following if you ran <code>cargo update</code>:</p>
|
||
<pre><code class="language-text">$ cargo update
|
||
Updating crates.io index
|
||
Updating rand v0.5.5 -> v0.5.6
|
||
</code></pre>
|
||
<p>At this point, you would also notice a change in your <em>Cargo.lock</em> file noting
|
||
that the version of the <code>rand</code> crate you are now using is <code>0.5.6</code>.</p>
|
||
<p>If you wanted to use <code>rand</code> version <code>0.6.0</code> or any version in the <code>0.6.x</code>
|
||
series, you’d have to update the <em>Cargo.toml</em> file to look like this instead:</p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
rand = "0.6.0"
|
||
</code></pre>
|
||
<p>The next time you run <code>cargo build</code>, Cargo will update the registry of crates
|
||
available and reevaluate your <code>rand</code> requirements according to the new version
|
||
you have specified.</p>
|
||
<p>There’s a lot more to say about <a href="http://doc.crates.io">Cargo</a><!-- ignore --> and <a href="http://doc.crates.io/crates-io.html">its
|
||
ecosystem</a><!-- ignore --> which we’ll discuss in Chapter 14, but
|
||
for now, that’s all you need to know. Cargo makes it very easy to reuse
|
||
libraries, so Rustaceans are able to write smaller projects that are assembled
|
||
from a number of packages.</p>
|
||
<h3><a class="header" href="#generating-a-random-number" id="generating-a-random-number">Generating a Random Number</a></h3>
|
||
<p>Now that you’ve added the <code>rand</code> crate to <em>Cargo.toml</em>, let’s start using
|
||
<code>rand</code>. The next step is to update <em>src/main.rs</em>, as shown in Listing 2-3.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::io;
|
||
use rand::Rng;
|
||
|
||
fn main() {
|
||
println!("Guess the number!");
|
||
|
||
let secret_number = rand::thread_rng().gen_range(1, 101);
|
||
|
||
println!("The secret number is: {}", secret_number);
|
||
|
||
println!("Please input your guess.");
|
||
|
||
let mut guess = String::new();
|
||
|
||
io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
|
||
println!("You guessed: {}", guess);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-3: Adding code to generate a random
|
||
number</span></p>
|
||
<p>First, we add a <code>use</code> line: <code>use rand::Rng</code>. The <code>Rng</code> trait defines
|
||
methods that random number generators implement, and this trait must be in
|
||
scope for us to use those methods. Chapter 10 will cover traits in detail.</p>
|
||
<p>Next, we’re adding two lines in the middle. The <code>rand::thread_rng</code> function
|
||
will give us the particular random number generator that we’re going to use:
|
||
one that is local to the current thread of execution and seeded by the
|
||
operating system. Then we call the <code>gen_range</code> method on the random number
|
||
generator. This method is defined by the <code>Rng</code> trait that we brought into
|
||
scope with the <code>use rand::Rng</code> statement. The <code>gen_range</code> method takes two
|
||
numbers as arguments and generates a random number between them. It’s inclusive
|
||
on the lower bound but exclusive on the upper bound, so we need to specify <code>1</code>
|
||
and <code>101</code> to request a number between 1 and 100.</p>
|
||
<blockquote>
|
||
<p>Note: You won’t just know which traits to use and which methods and functions
|
||
to call from a crate. Instructions for using a crate are in each crate’s
|
||
documentation. Another neat feature of Cargo is that you can run the <code>cargo doc --open</code> command, which will build documentation provided by all of your
|
||
dependencies locally and open it in your browser. If you’re interested in
|
||
other functionality in the <code>rand</code> crate, for example, run <code>cargo doc --open</code>
|
||
and click <code>rand</code> in the sidebar on the left.</p>
|
||
</blockquote>
|
||
<p>The second line that we added to the middle of the code prints the secret
|
||
number. This is useful while we’re developing the program to be able to test
|
||
it, but we’ll delete it from the final version. It’s not much of a game if the
|
||
program prints the answer as soon as it starts!</p>
|
||
<p>Try running the program a few times:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 2.53 secs
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
The secret number is: 7
|
||
Please input your guess.
|
||
4
|
||
You guessed: 4
|
||
$ cargo run
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
The secret number is: 83
|
||
Please input your guess.
|
||
5
|
||
You guessed: 5
|
||
</code></pre>
|
||
<p>You should get different random numbers, and they should all be numbers between
|
||
1 and 100. Great job!</p>
|
||
<h2><a class="header" href="#comparing-the-guess-to-the-secret-number" id="comparing-the-guess-to-the-secret-number">Comparing the Guess to the Secret Number</a></h2>
|
||
<p>Now that we have user input and a random number, we can compare them. That step
|
||
is shown in Listing 2-4. Note that this code won’t compile quite yet, as we
|
||
will explain.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::io;
|
||
use std::cmp::Ordering;
|
||
use rand::Rng;
|
||
|
||
fn main() {
|
||
|
||
// ---snip---
|
||
|
||
println!("You guessed: {}", guess);
|
||
|
||
match guess.cmp(&secret_number) {
|
||
Ordering::Less => println!("Too small!"),
|
||
Ordering::Greater => println!("Too big!"),
|
||
Ordering::Equal => println!("You win!"),
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-4: Handling the possible return values of
|
||
comparing two numbers</span></p>
|
||
<p>The first new bit here is another <code>use</code> statement, bringing a type called
|
||
<code>std::cmp::Ordering</code> into scope from the standard library. Like <code>Result</code>,
|
||
<code>Ordering</code> is another enum, but the variants for <code>Ordering</code> are <code>Less</code>,
|
||
<code>Greater</code>, and <code>Equal</code>. These are the three outcomes that are possible when you
|
||
compare two values.</p>
|
||
<p>Then we add five new lines at the bottom that use the <code>Ordering</code> type. The
|
||
<code>cmp</code> method compares two values and can be called on anything that can be
|
||
compared. It takes a reference to whatever you want to compare with: here it’s
|
||
comparing the <code>guess</code> to the <code>secret_number</code>. Then it returns a variant of the
|
||
<code>Ordering</code> enum we brought into scope with the <code>use</code> statement. We use a
|
||
<a href="ch06-02-match.html"><code>match</code></a><!-- ignore --> expression to decide what to do next based on
|
||
which variant of <code>Ordering</code> was returned from the call to <code>cmp</code> with the values
|
||
in <code>guess</code> and <code>secret_number</code>.</p>
|
||
<p>A <code>match</code> expression is made up of <em>arms</em>. An arm consists of a <em>pattern</em> and
|
||
the code that should be run if the value given to the beginning of the <code>match</code>
|
||
expression fits that arm’s pattern. Rust takes the value given to <code>match</code> and
|
||
looks through each arm’s pattern in turn. The <code>match</code> construct and patterns
|
||
are powerful features in Rust that let you express a variety of situations your
|
||
code might encounter and make sure that you handle them all. These features
|
||
will be covered in detail in Chapter 6 and Chapter 18, respectively.</p>
|
||
<p>Let’s walk through an example of what would happen with the <code>match</code> expression
|
||
used here. Say that the user has guessed 50 and the randomly generated secret
|
||
number this time is 38. When the code compares 50 to 38, the <code>cmp</code> method will
|
||
return <code>Ordering::Greater</code>, because 50 is greater than 38. The <code>match</code>
|
||
expression gets the <code>Ordering::Greater</code> value and starts checking each arm’s
|
||
pattern. It looks at the first arm’s pattern, <code>Ordering::Less</code>, and sees that
|
||
the value <code>Ordering::Greater</code> does not match <code>Ordering::Less</code>, so it ignores
|
||
the code in that arm and moves to the next arm. The next arm’s pattern,
|
||
<code>Ordering::Greater</code>, <em>does</em> match <code>Ordering::Greater</code>! The associated code in
|
||
that arm will execute and print <code>Too big!</code> to the screen. The <code>match</code>
|
||
expression ends because it has no need to look at the last arm in this scenario.</p>
|
||
<p>However, the code in Listing 2-4 won’t compile yet. Let’s try it:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
error[E0308]: mismatched types
|
||
--> src/main.rs:23:21
|
||
|
|
||
23 | match guess.cmp(&secret_number) {
|
||
| ^^^^^^^^^^^^^^ expected struct `std::string::String`, found integer
|
||
|
|
||
= note: expected type `&std::string::String`
|
||
= note: found type `&{integer}`
|
||
|
||
error: aborting due to previous error
|
||
Could not compile `guessing_game`.
|
||
</code></pre>
|
||
<p>The core of the error states that there are <em>mismatched types</em>. Rust has a
|
||
strong, static type system. However, it also has type inference. When we wrote
|
||
<code>let mut guess = String::new()</code>, Rust was able to infer that <code>guess</code> should be
|
||
a <code>String</code> and didn’t make us write the type. The <code>secret_number</code>, on the other
|
||
hand, is a number type. A few number types can have a value between 1 and 100:
|
||
<code>i32</code>, a 32-bit number; <code>u32</code>, an unsigned 32-bit number; <code>i64</code>, a 64-bit
|
||
number; as well as others. Rust defaults to an <code>i32</code>, which is the type of
|
||
<code>secret_number</code> unless you add type information elsewhere that would cause Rust
|
||
to infer a different numerical type. The reason for the error is that Rust
|
||
cannot compare a string and a number type.</p>
|
||
<p>Ultimately, we want to convert the <code>String</code> the program reads as input into a
|
||
real number type so we can compare it numerically to the secret number. We can
|
||
do that by adding the following two lines to the <code>main</code> function body:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">// --snip--
|
||
|
||
let mut guess = String::new();
|
||
|
||
io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
|
||
let guess: u32 = guess.trim().parse()
|
||
.expect("Please type a number!");
|
||
|
||
println!("You guessed: {}", guess);
|
||
|
||
match guess.cmp(&secret_number) {
|
||
Ordering::Less => println!("Too small!"),
|
||
Ordering::Greater => println!("Too big!"),
|
||
Ordering::Equal => println!("You win!"),
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>The two new lines are:</p>
|
||
<pre><code class="language-rust ignore">let guess: u32 = guess.trim().parse()
|
||
.expect("Please type a number!");
|
||
</code></pre>
|
||
<p>We create a variable named <code>guess</code>. But wait, doesn’t the program already have
|
||
a variable named <code>guess</code>? It does, but Rust allows us to <em>shadow</em> the previous
|
||
value of <code>guess</code> with a new one. This feature is often used in situations in
|
||
which you want to convert a value from one type to another type. Shadowing lets
|
||
us reuse the <code>guess</code> variable name rather than forcing us to create two unique
|
||
variables, such as <code>guess_str</code> and <code>guess</code> for example. (Chapter 3 covers
|
||
shadowing in more detail.)</p>
|
||
<p>We bind <code>guess</code> to the expression <code>guess.trim().parse()</code>. The <code>guess</code> in the
|
||
expression refers to the original <code>guess</code> that was a <code>String</code> with the input in
|
||
it. The <code>trim</code> method on a <code>String</code> instance will eliminate any whitespace at
|
||
the beginning and end. Although <code>u32</code> can contain only numerical characters,
|
||
the user must press <span class="keystroke">enter</span> to satisfy
|
||
<code>read_line</code>. When the user presses <span class="keystroke">enter</span>, a
|
||
newline character is added to the string. For example, if the user types <span
|
||
class="keystroke">5</span> and presses <span class="keystroke">enter</span>,
|
||
<code>guess</code> looks like this: <code>5\n</code>. The <code>\n</code> represents “newline,” the result of
|
||
pressing <span class="keystroke">enter</span>. The <code>trim</code> method eliminates
|
||
<code>\n</code>, resulting in just <code>5</code>.</p>
|
||
<p>The <a href="../std/primitive.str.html#method.parse"><code>parse</code> method on strings</a><!-- ignore --> parses a string into some
|
||
kind of number. Because this method can parse a variety of number types, we
|
||
need to tell Rust the exact number type we want by using <code>let guess: u32</code>. The
|
||
colon (<code>:</code>) after <code>guess</code> tells Rust we’ll annotate the variable’s type. Rust
|
||
has a few built-in number types; the <code>u32</code> seen here is an unsigned, 32-bit
|
||
integer. It’s a good default choice for a small positive number. You’ll learn
|
||
about other number types in Chapter 3. Additionally, the <code>u32</code> annotation in
|
||
this example program and the comparison with <code>secret_number</code> means that Rust
|
||
will infer that <code>secret_number</code> should be a <code>u32</code> as well. So now the
|
||
comparison will be between two values of the same type!</p>
|
||
<p>The call to <code>parse</code> could easily cause an error. If, for example, the string
|
||
contained <code>A👍%</code>, there would be no way to convert that to a number. Because it
|
||
might fail, the <code>parse</code> method returns a <code>Result</code> type, much as the <code>read_line</code>
|
||
method does (discussed earlier in <a href="ch02-00-guessing-game-tutorial.html#handling-potential-failure-with-the-result-type">“Handling Potential Failure with the
|
||
<code>Result</code> Type”</a><!-- ignore
|
||
-->). We’ll treat this <code>Result</code> the same way by using the <code>expect</code> method
|
||
again. If <code>parse</code> returns an <code>Err</code> <code>Result</code> variant because it couldn’t create
|
||
a number from the string, the <code>expect</code> call will crash the game and print the
|
||
message we give it. If <code>parse</code> can successfully convert the string to a number,
|
||
it will return the <code>Ok</code> variant of <code>Result</code>, and <code>expect</code> will return the
|
||
number that we want from the <code>Ok</code> value.</p>
|
||
<p>Let’s run the program now!</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.43 secs
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
The secret number is: 58
|
||
Please input your guess.
|
||
76
|
||
You guessed: 76
|
||
Too big!
|
||
</code></pre>
|
||
<p>Nice! Even though spaces were added before the guess, the program still figured
|
||
out that the user guessed 76. Run the program a few times to verify the
|
||
different behavior with different kinds of input: guess the number correctly,
|
||
guess a number that is too high, and guess a number that is too low.</p>
|
||
<p>We have most of the game working now, but the user can make only one guess.
|
||
Let’s change that by adding a loop!</p>
|
||
<h2><a class="header" href="#allowing-multiple-guesses-with-looping" id="allowing-multiple-guesses-with-looping">Allowing Multiple Guesses with Looping</a></h2>
|
||
<p>The <code>loop</code> keyword creates an infinite loop. We’ll add that now to give users
|
||
more chances at guessing the number:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">// --snip--
|
||
|
||
println!("The secret number is: {}", secret_number);
|
||
|
||
loop {
|
||
println!("Please input your guess.");
|
||
|
||
// --snip--
|
||
|
||
match guess.cmp(&secret_number) {
|
||
Ordering::Less => println!("Too small!"),
|
||
Ordering::Greater => println!("Too big!"),
|
||
Ordering::Equal => println!("You win!"),
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>As you can see, we’ve moved everything into a loop from the guess input prompt
|
||
onward. Be sure to indent the lines inside the loop another four spaces each
|
||
and run the program again. Notice that there is a new problem because the
|
||
program is doing exactly what we told it to do: ask for another guess forever!
|
||
It doesn’t seem like the user can quit!</p>
|
||
<p>The user could always interrupt the program by using the keyboard shortcut <span
|
||
class="keystroke">ctrl-c</span>. But there’s another way to escape this
|
||
insatiable monster, as mentioned in the <code>parse</code> discussion in <a href="ch02-00-guessing-game-tutorial.html#comparing-the-guess-to-the-secret-number">“Comparing the
|
||
Guess to the Secret Number”</a><!--
|
||
ignore -->: if the user enters a non-number answer, the program will crash. The
|
||
user can take advantage of that in order to quit, as shown here:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 1.50 secs
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
The secret number is: 59
|
||
Please input your guess.
|
||
45
|
||
You guessed: 45
|
||
Too small!
|
||
Please input your guess.
|
||
60
|
||
You guessed: 60
|
||
Too big!
|
||
Please input your guess.
|
||
59
|
||
You guessed: 59
|
||
You win!
|
||
Please input your guess.
|
||
quit
|
||
thread 'main' panicked at 'Please type a number!: ParseIntError { kind: InvalidDigit }', src/libcore/result.rs:785
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
error: Process didn't exit successfully: `target/debug/guess` (exit code: 101)
|
||
</code></pre>
|
||
<p>Typing <code>quit</code> actually quits the game, but so will any other non-number input.
|
||
However, this is suboptimal to say the least. We want the game to automatically
|
||
stop when the correct number is guessed.</p>
|
||
<h3><a class="header" href="#quitting-after-a-correct-guess" id="quitting-after-a-correct-guess">Quitting After a Correct Guess</a></h3>
|
||
<p>Let’s program the game to quit when the user wins by adding a <code>break</code> statement:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">// --snip--
|
||
|
||
match guess.cmp(&secret_number) {
|
||
Ordering::Less => println!("Too small!"),
|
||
Ordering::Greater => println!("Too big!"),
|
||
Ordering::Equal => {
|
||
println!("You win!");
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>Adding the <code>break</code> line after <code>You win!</code> makes the program exit the loop when
|
||
the user guesses the secret number correctly. Exiting the loop also means
|
||
exiting the program, because the loop is the last part of <code>main</code>.</p>
|
||
<h3><a class="header" href="#handling-invalid-input" id="handling-invalid-input">Handling Invalid Input</a></h3>
|
||
<p>To further refine the game’s behavior, rather than crashing the program when
|
||
the user inputs a non-number, let’s make the game ignore a non-number so the
|
||
user can continue guessing. We can do that by altering the line where <code>guess</code>
|
||
is converted from a <code>String</code> to a <code>u32</code>, as shown in Listing 2-5.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">// --snip--
|
||
|
||
io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
|
||
let guess: u32 = match guess.trim().parse() {
|
||
Ok(num) => num,
|
||
Err(_) => continue,
|
||
};
|
||
|
||
println!("You guessed: {}", guess);
|
||
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-5: Ignoring a non-number guess and asking for
|
||
another guess instead of crashing the program</span></p>
|
||
<p>Switching from an <code>expect</code> call to a <code>match</code> expression is how you generally
|
||
move from crashing on an error to handling the error. Remember that <code>parse</code>
|
||
returns a <code>Result</code> type and <code>Result</code> is an enum that has the variants <code>Ok</code> or
|
||
<code>Err</code>. We’re using a <code>match</code> expression here, as we did with the <code>Ordering</code>
|
||
result of the <code>cmp</code> method.</p>
|
||
<p>If <code>parse</code> is able to successfully turn the string into a number, it will
|
||
return an <code>Ok</code> value that contains the resulting number. That <code>Ok</code> value will
|
||
match the first arm’s pattern, and the <code>match</code> expression will just return the
|
||
<code>num</code> value that <code>parse</code> produced and put inside the <code>Ok</code> value. That number
|
||
will end up right where we want it in the new <code>guess</code> variable we’re creating.</p>
|
||
<p>If <code>parse</code> is <em>not</em> able to turn the string into a number, it will return an
|
||
<code>Err</code> value that contains more information about the error. The <code>Err</code> value
|
||
does not match the <code>Ok(num)</code> pattern in the first <code>match</code> arm, but it does
|
||
match the <code>Err(_)</code> pattern in the second arm. The underscore, <code>_</code>, is a
|
||
catchall value; in this example, we’re saying we want to match all <code>Err</code>
|
||
values, no matter what information they have inside them. So the program will
|
||
execute the second arm’s code, <code>continue</code>, which tells the program to go to the
|
||
next iteration of the <code>loop</code> and ask for another guess. So, effectively, the
|
||
program ignores all errors that <code>parse</code> might encounter!</p>
|
||
<p>Now everything in the program should work as expected. Let’s try it:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Running `target/debug/guessing_game`
|
||
Guess the number!
|
||
The secret number is: 61
|
||
Please input your guess.
|
||
10
|
||
You guessed: 10
|
||
Too small!
|
||
Please input your guess.
|
||
99
|
||
You guessed: 99
|
||
Too big!
|
||
Please input your guess.
|
||
foo
|
||
Please input your guess.
|
||
61
|
||
You guessed: 61
|
||
You win!
|
||
</code></pre>
|
||
<p>Awesome! With one tiny final tweak, we will finish the guessing game. Recall
|
||
that the program is still printing the secret number. That worked well for
|
||
testing, but it ruins the game. Let’s delete the <code>println!</code> that outputs the
|
||
secret number. Listing 2-6 shows the final code.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::io;
|
||
use std::cmp::Ordering;
|
||
use rand::Rng;
|
||
|
||
fn main() {
|
||
println!("Guess the number!");
|
||
|
||
let secret_number = rand::thread_rng().gen_range(1, 101);
|
||
|
||
loop {
|
||
println!("Please input your guess.");
|
||
|
||
let mut guess = String::new();
|
||
|
||
io::stdin().read_line(&mut guess)
|
||
.expect("Failed to read line");
|
||
|
||
let guess: u32 = match guess.trim().parse() {
|
||
Ok(num) => num,
|
||
Err(_) => continue,
|
||
};
|
||
|
||
println!("You guessed: {}", guess);
|
||
|
||
match guess.cmp(&secret_number) {
|
||
Ordering::Less => println!("Too small!"),
|
||
Ordering::Greater => println!("Too big!"),
|
||
Ordering::Equal => {
|
||
println!("You win!");
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 2-6: Complete guessing game code</span></p>
|
||
<h2><a class="header" href="#summary-1" id="summary-1">Summary</a></h2>
|
||
<p>At this point, you’ve successfully built the guessing game. Congratulations!</p>
|
||
<p>This project was a hands-on way to introduce you to many new Rust concepts:
|
||
<code>let</code>, <code>match</code>, methods, associated functions, the use of external crates, and
|
||
more. In the next few chapters, you’ll learn about these concepts in more
|
||
detail. Chapter 3 covers concepts that most programming languages have, such as
|
||
variables, data types, and functions, and shows how to use them in Rust.
|
||
Chapter 4 explores ownership, a feature that makes Rust different from other
|
||
languages. Chapter 5 discusses structs and method syntax, and Chapter 6
|
||
explains how enums work.</p>
|
||
<h1><a class="header" href="#common-programming-concepts" id="common-programming-concepts">Common Programming Concepts</a></h1>
|
||
<p>This chapter covers concepts that appear in almost every programming language
|
||
and how they work in Rust. Many programming languages have much in common at
|
||
their core. None of the concepts presented in this chapter are unique to Rust,
|
||
but we’ll discuss them in the context of Rust and explain the conventions
|
||
around using these concepts.</p>
|
||
<p>Specifically, you’ll learn about variables, basic types, functions, comments,
|
||
and control flow. These foundations will be in every Rust program, and learning
|
||
them early will give you a strong core to start from.</p>
|
||
<blockquote>
|
||
<h4><a class="header" href="#keywords" id="keywords">Keywords</a></h4>
|
||
<p>The Rust language has a set of <em>keywords</em> that are reserved for use by
|
||
the language only, much as in other languages. Keep in mind that you cannot
|
||
use these words as names of variables or functions. Most of the keywords have
|
||
special meanings, and you’ll be using them to do various tasks in your Rust
|
||
programs; a few have no current functionality associated with them but have
|
||
been reserved for functionality that might be added to Rust in the future. You
|
||
can find a list of the keywords in Appendix A.</p>
|
||
</blockquote>
|
||
<h2><a class="header" href="#variables-and-mutability" id="variables-and-mutability">Variables and Mutability</a></h2>
|
||
<p>As mentioned in Chapter 2, by default variables are immutable. This is one of
|
||
many nudges Rust gives you to write your code in a way that takes advantage of
|
||
the safety and easy concurrency that Rust offers. However, you still have the
|
||
option to make your variables mutable. Let’s explore how and why Rust
|
||
encourages you to favor immutability and why sometimes you might want to opt
|
||
out.</p>
|
||
<p>When a variable is immutable, once a value is bound to a name, you can’t change
|
||
that value. To illustrate this, let’s generate a new project called <em>variables</em>
|
||
in your <em>projects</em> directory by using <code>cargo new variables</code>.</p>
|
||
<p>Then, in your new <em>variables</em> directory, open <em>src/main.rs</em> and replace its
|
||
code with the following code that won’t compile just yet:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = 5;
|
||
println!("The value of x is: {}", x);
|
||
x = 6;
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
</code></pre>
|
||
<p>Save and run the program using <code>cargo run</code>. You should receive an error
|
||
message, as shown in this output:</p>
|
||
<pre><code class="language-text">error[E0384]: cannot assign twice to immutable variable `x`
|
||
--> src/main.rs:4:5
|
||
|
|
||
2 | let x = 5;
|
||
| - first assignment to `x`
|
||
3 | println!("The value of x is: {}", x);
|
||
4 | x = 6;
|
||
| ^^^^^ cannot assign twice to immutable variable
|
||
</code></pre>
|
||
<p>This example shows how the compiler helps you find errors in your programs.
|
||
Even though compiler errors can be frustrating, they only mean your program
|
||
isn’t safely doing what you want it to do yet; they do <em>not</em> mean that you’re
|
||
not a good programmer! Experienced Rustaceans still get compiler errors.</p>
|
||
<p>The error message indicates that the cause of the error is that you <code>cannot assign twice to immutable variable x</code>, because you tried to assign a second
|
||
value to the immutable <code>x</code> variable.</p>
|
||
<p>It’s important that we get compile-time errors when we attempt to change a
|
||
value that we previously designated as immutable because this very situation
|
||
can lead to bugs. If one part of our code operates on the assumption that a
|
||
value will never change and another part of our code changes that value, it’s
|
||
possible that the first part of the code won’t do what it was designed to do.
|
||
The cause of this kind of bug can be difficult to track down after the fact,
|
||
especially when the second piece of code changes the value only <em>sometimes</em>.</p>
|
||
<p>In Rust, the compiler guarantees that when you state that a value won’t change,
|
||
it really won’t change. That means that when you’re reading and writing code,
|
||
you don’t have to keep track of how and where a value might change. Your code
|
||
is thus easier to reason through.</p>
|
||
<p>But mutability can be very useful. Variables are immutable only by default; as
|
||
you did in Chapter 2, you can make them mutable by adding <code>mut</code> in front of the
|
||
variable name. In addition to allowing this value to change, <code>mut</code> conveys
|
||
intent to future readers of the code by indicating that other parts of the code
|
||
will be changing this variable’s value.</p>
|
||
<p>For example, let’s change <em>src/main.rs</em> to the following:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let mut x = 5;
|
||
println!("The value of x is: {}", x);
|
||
x = 6;
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
</code></pre></pre>
|
||
<p>When we run the program now, we get this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling variables v0.1.0 (file:///projects/variables)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.30 secs
|
||
Running `target/debug/variables`
|
||
The value of x is: 5
|
||
The value of x is: 6
|
||
</code></pre>
|
||
<p>We’re allowed to change the value that <code>x</code> binds to from <code>5</code> to <code>6</code> when <code>mut</code>
|
||
is used. In some cases, you’ll want to make a variable mutable because it makes
|
||
the code more convenient to write than if it had only immutable variables.</p>
|
||
<p>There are multiple trade-offs to consider in addition to the prevention of
|
||
bugs. For example, in cases where you’re using large data structures, mutating
|
||
an instance in place may be faster than copying and returning newly allocated
|
||
instances. With smaller data structures, creating new instances and writing in
|
||
a more functional programming style may be easier to think through, so lower
|
||
performance might be a worthwhile penalty for gaining that clarity.</p>
|
||
<h3><a class="header" href="#differences-between-variables-and-constants" id="differences-between-variables-and-constants">Differences Between Variables and Constants</a></h3>
|
||
<p>Being unable to change the value of a variable might have reminded you of
|
||
another programming concept that most other languages have: <em>constants</em>. Like
|
||
immutable variables, constants are values that are bound to a name and are not
|
||
allowed to change, but there are a few differences between constants and
|
||
variables.</p>
|
||
<p>First, you aren’t allowed to use <code>mut</code> with constants. Constants aren’t just
|
||
immutable by default—they’re always immutable.</p>
|
||
<p>You declare constants using the <code>const</code> keyword instead of the <code>let</code> keyword,
|
||
and the type of the value <em>must</em> be annotated. We’re about to cover types and
|
||
type annotations in the next section, <a href="ch03-02-data-types.html#data-types">“Data Types,”</a><!-- ignore
|
||
--> so don’t worry about the details right now. Just know that you must always
|
||
annotate the type.</p>
|
||
<p>Constants can be declared in any scope, including the global scope, which makes
|
||
them useful for values that many parts of code need to know about.</p>
|
||
<p>The last difference is that constants may be set only to a constant expression,
|
||
not the result of a function call or any other value that could only be
|
||
computed at runtime.</p>
|
||
<p>Here’s an example of a constant declaration where the constant’s name is
|
||
<code>MAX_POINTS</code> and its value is set to 100,000. (Rust’s naming convention for
|
||
constants is to use all uppercase with underscores between words, and
|
||
underscores can be inserted in numeric literals to improve readability):</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>const MAX_POINTS: u32 = 100_000;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Constants are valid for the entire time a program runs, within the scope they
|
||
were declared in, making them a useful choice for values in your application
|
||
domain that multiple parts of the program might need to know about, such as the
|
||
maximum number of points any player of a game is allowed to earn or the speed
|
||
of light.</p>
|
||
<p>Naming hardcoded values used throughout your program as constants is useful in
|
||
conveying the meaning of that value to future maintainers of the code. It also
|
||
helps to have only one place in your code you would need to change if the
|
||
hardcoded value needed to be updated in the future.</p>
|
||
<h3><a class="header" href="#shadowing" id="shadowing">Shadowing</a></h3>
|
||
<p>As you saw in the guessing game tutorial in the <a href="ch02-00-guessing-game-tutorial.html#comparing-the-guess-to-the-secret-number">“Comparing the Guess to the
|
||
Secret Number”</a><!-- ignore -->
|
||
section in Chapter 2, you can declare a new variable with the same name as a
|
||
previous variable, and the new variable shadows the previous variable.
|
||
Rustaceans say that the first variable is <em>shadowed</em> by the second, which means
|
||
that the second variable’s value is what appears when the variable is used. We
|
||
can shadow a variable by using the same variable’s name and repeating the use
|
||
of the <code>let</code> keyword as follows:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 5;
|
||
|
||
let x = x + 1;
|
||
|
||
let x = x * 2;
|
||
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
</code></pre></pre>
|
||
<p>This program first binds <code>x</code> to a value of <code>5</code>. Then it shadows <code>x</code> by
|
||
repeating <code>let x =</code>, taking the original value and adding <code>1</code> so the value of
|
||
<code>x</code> is then <code>6</code>. The third <code>let</code> statement also shadows <code>x</code>, multiplying the
|
||
previous value by <code>2</code> to give <code>x</code> a final value of <code>12</code>. When we run this
|
||
program, it will output the following:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling variables v0.1.0 (file:///projects/variables)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/variables`
|
||
The value of x is: 12
|
||
</code></pre>
|
||
<p>Shadowing is different from marking a variable as <code>mut</code>, because we’ll get a
|
||
compile-time error if we accidentally try to reassign to this variable without
|
||
using the <code>let</code> keyword. By using <code>let</code>, we can perform a few transformations
|
||
on a value but have the variable be immutable after those transformations have
|
||
been completed.</p>
|
||
<p>The other difference between <code>mut</code> and shadowing is that because we’re
|
||
effectively creating a new variable when we use the <code>let</code> keyword again, we can
|
||
change the type of the value but reuse the same name. For example, say our
|
||
program asks a user to show how many spaces they want between some text by
|
||
inputting space characters, but we really want to store that input as a number:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let spaces = " ";
|
||
let spaces = spaces.len();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This construct is allowed because the first <code>spaces</code> variable is a string type
|
||
and the second <code>spaces</code> variable, which is a brand-new variable that happens to
|
||
have the same name as the first one, is a number type. Shadowing thus spares us
|
||
from having to come up with different names, such as <code>spaces_str</code> and
|
||
<code>spaces_num</code>; instead, we can reuse the simpler <code>spaces</code> name. However, if we
|
||
try to use <code>mut</code> for this, as shown here, we’ll get a compile-time error:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let mut spaces = " ";
|
||
spaces = spaces.len();
|
||
</code></pre>
|
||
<p>The error says we’re not allowed to mutate a variable’s type:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:3:14
|
||
|
|
||
3 | spaces = spaces.len();
|
||
| ^^^^^^^^^^^^ expected &str, found usize
|
||
|
|
||
= note: expected type `&str`
|
||
found type `usize`
|
||
</code></pre>
|
||
<p>Now that we’ve explored how variables work, let’s look at more data types they
|
||
can have.</p>
|
||
<h2><a class="header" href="#data-types" id="data-types">Data Types</a></h2>
|
||
<p>Every value in Rust is of a certain <em>data type</em>, which tells Rust what kind of
|
||
data is being specified so it knows how to work with that data. We’ll look at
|
||
two data type subsets: scalar and compound.</p>
|
||
<p>Keep in mind that Rust is a <em>statically typed</em> language, which means that it
|
||
must know the types of all variables at compile time. The compiler can usually
|
||
infer what type we want to use based on the value and how we use it. In cases
|
||
when many types are possible, such as when we converted a <code>String</code> to a numeric
|
||
type using <code>parse</code> in the <a href="ch02-00-guessing-game-tutorial.html#comparing-the-guess-to-the-secret-number">“Comparing the Guess to the Secret
|
||
Number”</a><!-- ignore --> section in
|
||
Chapter 2, we must add a type annotation, like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let guess: u32 = "42".parse().expect("Not a number!");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>If we don’t add the type annotation here, Rust will display the following
|
||
error, which means the compiler needs more information from us to know which
|
||
type we want to use:</p>
|
||
<pre><code class="language-text">error[E0282]: type annotations needed
|
||
--> src/main.rs:2:9
|
||
|
|
||
2 | let guess = "42".parse().expect("Not a number!");
|
||
| ^^^^^
|
||
| |
|
||
| cannot infer type for `_`
|
||
| consider giving `guess` a type
|
||
</code></pre>
|
||
<p>You’ll see different type annotations for other data types.</p>
|
||
<h3><a class="header" href="#scalar-types" id="scalar-types">Scalar Types</a></h3>
|
||
<p>A <em>scalar</em> type represents a single value. Rust has four primary scalar types:
|
||
integers, floating-point numbers, Booleans, and characters. You may recognize
|
||
these from other programming languages. Let’s jump into how they work in Rust.</p>
|
||
<h4><a class="header" href="#integer-types" id="integer-types">Integer Types</a></h4>
|
||
<p>An <em>integer</em> is a number without a fractional component. We used one integer
|
||
type in Chapter 2, the <code>u32</code> type. This type declaration indicates that the
|
||
value it’s associated with should be an unsigned integer (signed integer types
|
||
start with <code>i</code>, instead of <code>u</code>) that takes up 32 bits of space. Table 3-1 shows
|
||
the built-in integer types in Rust. Each variant in the Signed and Unsigned
|
||
columns (for example, <code>i16</code>) can be used to declare the type of an integer
|
||
value.</p>
|
||
<p><span class="caption">Table 3-1: Integer Types in Rust</span></p>
|
||
<table><thead><tr><th>Length</th><th>Signed</th><th>Unsigned</th></tr></thead><tbody>
|
||
<tr><td>8-bit</td><td><code>i8</code></td><td><code>u8</code></td></tr>
|
||
<tr><td>16-bit</td><td><code>i16</code></td><td><code>u16</code></td></tr>
|
||
<tr><td>32-bit</td><td><code>i32</code></td><td><code>u32</code></td></tr>
|
||
<tr><td>64-bit</td><td><code>i64</code></td><td><code>u64</code></td></tr>
|
||
<tr><td>128-bit</td><td><code>i128</code></td><td><code>u128</code></td></tr>
|
||
<tr><td>arch</td><td><code>isize</code></td><td><code>usize</code></td></tr>
|
||
</tbody></table>
|
||
<p>Each variant can be either signed or unsigned and has an explicit size.
|
||
<em>Signed</em> and <em>unsigned</em> refer to whether it’s possible for the number to be
|
||
negative or positive—in other words, whether the number needs to have a sign
|
||
with it (signed) or whether it will only ever be positive and can therefore be
|
||
represented without a sign (unsigned). It’s like writing numbers on paper: when
|
||
the sign matters, a number is shown with a plus sign or a minus sign; however,
|
||
when it’s safe to assume the number is positive, it’s shown with no sign.
|
||
Signed numbers are stored using <a href="https://en.wikipedia.org/wiki/Two%27s_complement">two’s complement</a> representation.</p>
|
||
<p>Each signed variant can store numbers from -(2<sup>n - 1</sup>) to 2<sup>n -
|
||
1</sup> - 1 inclusive, where <em>n</em> is the number of bits that variant uses. So an
|
||
<code>i8</code> can store numbers from -(2<sup>7</sup>) to 2<sup>7</sup> - 1, which equals
|
||
-128 to 127. Unsigned variants can store numbers from 0 to 2<sup>n</sup> - 1,
|
||
so a <code>u8</code> can store numbers from 0 to 2<sup>8</sup> - 1, which equals 0 to 255.</p>
|
||
<p>Additionally, the <code>isize</code> and <code>usize</code> types depend on the kind of computer your
|
||
program is running on: 64 bits if you’re on a 64-bit architecture and 32 bits
|
||
if you’re on a 32-bit architecture.</p>
|
||
<p>You can write integer literals in any of the forms shown in Table 3-2. Note
|
||
that all number literals except the byte literal allow a type suffix, such as
|
||
<code>57u8</code>, and <code>_</code> as a visual separator, such as <code>1_000</code>.</p>
|
||
<p><span class="caption">Table 3-2: Integer Literals in Rust</span></p>
|
||
<table><thead><tr><th>Number literals</th><th>Example</th></tr></thead><tbody>
|
||
<tr><td>Decimal</td><td><code>98_222</code></td></tr>
|
||
<tr><td>Hex</td><td><code>0xff</code></td></tr>
|
||
<tr><td>Octal</td><td><code>0o77</code></td></tr>
|
||
<tr><td>Binary</td><td><code>0b1111_0000</code></td></tr>
|
||
<tr><td>Byte (<code>u8</code> only)</td><td><code>b'A'</code></td></tr>
|
||
</tbody></table>
|
||
<p>So how do you know which type of integer to use? If you’re unsure, Rust’s
|
||
defaults are generally good choices, and integer types default to <code>i32</code>: this
|
||
type is generally the fastest, even on 64-bit systems. The primary situation in
|
||
which you’d use <code>isize</code> or <code>usize</code> is when indexing some sort of collection.</p>
|
||
<blockquote>
|
||
<h5><a class="header" href="#integer-overflow" id="integer-overflow">Integer Overflow</a></h5>
|
||
<p>Let’s say you have a variable of type <code>u8</code> that can hold values between 0 and 255.
|
||
If you try to change the variable to a value outside of that range, such
|
||
as 256, <em>integer overflow</em> will occur. Rust has some interesting rules
|
||
involving this behavior. When you’re compiling in debug mode, Rust includes
|
||
checks for integer overflow that cause your program to <em>panic</em> at runtime if
|
||
this behavior occurs. Rust uses the term panicking when a program exits with
|
||
an error; we’ll discuss panics in more depth in the <a href="ch09-01-unrecoverable-errors-with-panic.html">“Unrecoverable Errors
|
||
with <code>panic!</code>”</a><!-- ignore --> section in
|
||
Chapter 9.</p>
|
||
<p>When you’re compiling in release mode with the <code>--release</code> flag, Rust does
|
||
<em>not</em> include checks for integer overflow that cause panics. Instead, if
|
||
overflow occurs, Rust performs <em>two’s complement wrapping</em>. In short, values
|
||
greater than the maximum value the type can hold “wrap around” to the minimum
|
||
of the values the type can hold. In the case of a <code>u8</code>, 256 becomes 0, 257
|
||
becomes 1, and so on. The program won’t panic, but the variable will have a
|
||
value that probably isn’t what you were expecting it to have. Relying on
|
||
integer overflow’s wrapping behavior is considered an error. If you want to
|
||
wrap explicitly, you can use the standard library type <a href="../std/num/struct.Wrapping.html"><code>Wrapping</code></a>.</p>
|
||
</blockquote>
|
||
<h4><a class="header" href="#floating-point-types" id="floating-point-types">Floating-Point Types</a></h4>
|
||
<p>Rust also has two primitive types for <em>floating-point numbers</em>, which are
|
||
numbers with decimal points. Rust’s floating-point types are <code>f32</code> and <code>f64</code>,
|
||
which are 32 bits and 64 bits in size, respectively. The default type is <code>f64</code>
|
||
because on modern CPUs it’s roughly the same speed as <code>f32</code> but is capable of
|
||
more precision.</p>
|
||
<p>Here’s an example that shows floating-point numbers in action:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 2.0; // f64
|
||
|
||
let y: f32 = 3.0; // f32
|
||
}
|
||
</code></pre></pre>
|
||
<p>Floating-point numbers are represented according to the IEEE-754 standard. The
|
||
<code>f32</code> type is a single-precision float, and <code>f64</code> has double precision.</p>
|
||
<h4><a class="header" href="#numeric-operations" id="numeric-operations">Numeric Operations</a></h4>
|
||
<p>Rust supports the basic mathematical operations you’d expect for all of the
|
||
number types: addition, subtraction, multiplication, division, and remainder.
|
||
The following code shows how you’d use each one in a <code>let</code> statement:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
// addition
|
||
let sum = 5 + 10;
|
||
|
||
// subtraction
|
||
let difference = 95.5 - 4.3;
|
||
|
||
// multiplication
|
||
let product = 4 * 30;
|
||
|
||
// division
|
||
let quotient = 56.7 / 32.2;
|
||
|
||
// remainder
|
||
let remainder = 43 % 5;
|
||
}
|
||
</code></pre></pre>
|
||
<p>Each expression in these statements uses a mathematical operator and evaluates
|
||
to a single value, which is then bound to a variable. Appendix B contains a
|
||
list of all operators that Rust provides.</p>
|
||
<h4><a class="header" href="#the-boolean-type" id="the-boolean-type">The Boolean Type</a></h4>
|
||
<p>As in most other programming languages, a Boolean type in Rust has two possible
|
||
values: <code>true</code> and <code>false</code>. Booleans are one byte in size. The Boolean type in
|
||
Rust is specified using <code>bool</code>. For example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let t = true;
|
||
|
||
let f: bool = false; // with explicit type annotation
|
||
}
|
||
</code></pre></pre>
|
||
<p>The main way to use Boolean values is through conditionals, such as an <code>if</code>
|
||
expression. We’ll cover how <code>if</code> expressions work in Rust in the <a href="ch03-05-control-flow.html#control-flow">“Control
|
||
Flow”</a><!-- ignore --> section.</p>
|
||
<h4><a class="header" href="#the-character-type" id="the-character-type">The Character Type</a></h4>
|
||
<p>So far we’ve worked only with numbers, but Rust supports letters too. Rust’s
|
||
<code>char</code> type is the language’s most primitive alphabetic type, and the following
|
||
code shows one way to use it. (Note that <code>char</code> literals are specified with
|
||
single quotes, as opposed to string literals, which use double quotes.)</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let c = 'z';
|
||
let z = 'ℤ';
|
||
let heart_eyed_cat = '😻';
|
||
}
|
||
</code></pre></pre>
|
||
<p>Rust’s <code>char</code> type is four bytes in size and represents a Unicode Scalar Value,
|
||
which means it can represent a lot more than just ASCII. Accented letters;
|
||
Chinese, Japanese, and Korean characters; emoji; and zero-width spaces are all
|
||
valid <code>char</code> values in Rust. Unicode Scalar Values range from <code>U+0000</code> to
|
||
<code>U+D7FF</code> and <code>U+E000</code> to <code>U+10FFFF</code> inclusive. However, a “character” isn’t
|
||
really a concept in Unicode, so your human intuition for what a “character” is
|
||
may not match up with what a <code>char</code> is in Rust. We’ll discuss this topic in
|
||
detail in <a href="ch08-02-strings.html#storing-utf-8-encoded-text-with-strings">“Storing UTF-8 Encoded Text with Strings”</a><!-- ignore -->
|
||
in Chapter 8.</p>
|
||
<h3><a class="header" href="#compound-types" id="compound-types">Compound Types</a></h3>
|
||
<p><em>Compound types</em> can group multiple values into one type. Rust has two
|
||
primitive compound types: tuples and arrays.</p>
|
||
<h4><a class="header" href="#the-tuple-type" id="the-tuple-type">The Tuple Type</a></h4>
|
||
<p>A tuple is a general way of grouping together a number of values with a variety
|
||
of types into one compound type. Tuples have a fixed length: once declared,
|
||
they cannot grow or shrink in size.</p>
|
||
<p>We create a tuple by writing a comma-separated list of values inside
|
||
parentheses. Each position in the tuple has a type, and the types of the
|
||
different values in the tuple don’t have to be the same. We’ve added optional
|
||
type annotations in this example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let tup: (i32, f64, u8) = (500, 6.4, 1);
|
||
}
|
||
</code></pre></pre>
|
||
<p>The variable <code>tup</code> binds to the entire tuple, because a tuple is considered a
|
||
single compound element. To get the individual values out of a tuple, we can
|
||
use pattern matching to destructure a tuple value, like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let tup = (500, 6.4, 1);
|
||
|
||
let (x, y, z) = tup;
|
||
|
||
println!("The value of y is: {}", y);
|
||
}
|
||
</code></pre></pre>
|
||
<p>This program first creates a tuple and binds it to the variable <code>tup</code>. It then
|
||
uses a pattern with <code>let</code> to take <code>tup</code> and turn it into three separate
|
||
variables, <code>x</code>, <code>y</code>, and <code>z</code>. This is called <em>destructuring</em>, because it breaks
|
||
the single tuple into three parts. Finally, the program prints the value of
|
||
<code>y</code>, which is <code>6.4</code>.</p>
|
||
<p>In addition to destructuring through pattern matching, we can access a tuple
|
||
element directly by using a period (<code>.</code>) followed by the index of the value we
|
||
want to access. For example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x: (i32, f64, u8) = (500, 6.4, 1);
|
||
|
||
let five_hundred = x.0;
|
||
|
||
let six_point_four = x.1;
|
||
|
||
let one = x.2;
|
||
}
|
||
</code></pre></pre>
|
||
<p>This program creates a tuple, <code>x</code>, and then makes new variables for each
|
||
element by using their respective indices. As with most programming languages,
|
||
the first index in a tuple is 0.</p>
|
||
<h4><a class="header" href="#the-array-type" id="the-array-type">The Array Type</a></h4>
|
||
<p>Another way to have a collection of multiple values is with an <em>array</em>. Unlike
|
||
a tuple, every element of an array must have the same type. Arrays in Rust are
|
||
different from arrays in some other languages because arrays in Rust have a
|
||
fixed length, like tuples.</p>
|
||
<p>In Rust, the values going into an array are written as a comma-separated list
|
||
inside square brackets:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let a = [1, 2, 3, 4, 5];
|
||
}
|
||
</code></pre></pre>
|
||
<p>Arrays are useful when you want your data allocated on the stack rather than
|
||
the heap (we will discuss the stack and the heap more in Chapter 4) or when
|
||
you want to ensure you always have a fixed number of elements. An array isn’t
|
||
as flexible as the vector type, though. A vector is a similar collection type
|
||
provided by the standard library that <em>is</em> allowed to grow or shrink in size.
|
||
If you’re unsure whether to use an array or a vector, you should probably use a
|
||
vector. Chapter 8 discusses vectors in more detail.</p>
|
||
<p>An example of when you might want to use an array rather than a vector is in a
|
||
program that needs to know the names of the months of the year. It’s very
|
||
unlikely that such a program will need to add or remove months, so you can use
|
||
an array because you know it will always contain 12 elements:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let months = ["January", "February", "March", "April", "May", "June", "July",
|
||
"August", "September", "October", "November", "December"];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>You would write an array’s type by using square brackets, and within the
|
||
brackets include the type of each element, a semicolon, and then the number of
|
||
elements in the array, like so:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let a: [i32; 5] = [1, 2, 3, 4, 5];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Here, <code>i32</code> is the type of each element. After the semicolon, the number <code>5</code>
|
||
indicates the array contains five elements.</p>
|
||
<p>Writing an array’s type this way looks similar to an alternative syntax for
|
||
initializing an array: if you want to create an array that contains the same
|
||
value for each element, you can specify the initial value, followed by a
|
||
semicolon, and then the length of the array in square brackets, as shown here:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let a = [3; 5];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The array named <code>a</code> will contain <code>5</code> elements that will all be set to the value
|
||
<code>3</code> initially. This is the same as writing <code>let a = [3, 3, 3, 3, 3];</code> but in a
|
||
more concise way.</p>
|
||
<h5><a class="header" href="#accessing-array-elements" id="accessing-array-elements">Accessing Array Elements</a></h5>
|
||
<p>An array is a single chunk of memory allocated on the stack. You can access
|
||
elements of an array using indexing, like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let a = [1, 2, 3, 4, 5];
|
||
|
||
let first = a[0];
|
||
let second = a[1];
|
||
}
|
||
</code></pre></pre>
|
||
<p>In this example, the variable named <code>first</code> will get the value <code>1</code>, because
|
||
that is the value at index <code>[0]</code> in the array. The variable named <code>second</code> will
|
||
get the value <code>2</code> from index <code>[1]</code> in the array.</p>
|
||
<h5><a class="header" href="#invalid-array-element-access" id="invalid-array-element-access">Invalid Array Element Access</a></h5>
|
||
<p>What happens if you try to access an element of an array that is past the end
|
||
of the array? Say you change the example to the following code, which will
|
||
compile but exit with an error when it runs:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore panics">fn main() {
|
||
let a = [1, 2, 3, 4, 5];
|
||
let index = 10;
|
||
|
||
let element = a[index];
|
||
|
||
println!("The value of element is: {}", element);
|
||
}
|
||
</code></pre>
|
||
<p>Running this code using <code>cargo run</code> produces the following result:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling arrays v0.1.0 (file:///projects/arrays)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/arrays`
|
||
thread 'main' panicked at 'index out of bounds: the len is 5 but the index is
|
||
10', src/main.rs:5:19
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>The compilation didn’t produce any errors, but the program resulted in a
|
||
<em>runtime</em> error and didn’t exit successfully. When you attempt to access an
|
||
element using indexing, Rust will check that the index you’ve specified is less
|
||
than the array length. If the index is greater than or equal to the array
|
||
length, Rust will panic.</p>
|
||
<p>This is the first example of Rust’s safety principles in action. In many
|
||
low-level languages, this kind of check is not done, and when you provide an
|
||
incorrect index, invalid memory can be accessed. Rust protects you against this
|
||
kind of error by immediately exiting instead of allowing the memory access and
|
||
continuing. Chapter 9 discusses more of Rust’s error handling.</p>
|
||
<h2><a class="header" href="#functions" id="functions">Functions</a></h2>
|
||
<p>Functions are pervasive in Rust code. You’ve already seen one of the most
|
||
important functions in the language: the <code>main</code> function, which is the entry
|
||
point of many programs. You’ve also seen the <code>fn</code> keyword, which allows you to
|
||
declare new functions.</p>
|
||
<p>Rust code uses <em>snake case</em> as the conventional style for function and variable
|
||
names. In snake case, all letters are lowercase and underscores separate words.
|
||
Here’s a program that contains an example function definition:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
println!("Hello, world!");
|
||
|
||
another_function();
|
||
}
|
||
|
||
fn another_function() {
|
||
println!("Another function.");
|
||
}
|
||
</code></pre></pre>
|
||
<p>Function definitions in Rust start with <code>fn</code> and have a set of parentheses
|
||
after the function name. The curly brackets tell the compiler where the
|
||
function body begins and ends.</p>
|
||
<p>We can call any function we’ve defined by entering its name followed by a set
|
||
of parentheses. Because <code>another_function</code> is defined in the program, it can be
|
||
called from inside the <code>main</code> function. Note that we defined <code>another_function</code>
|
||
<em>after</em> the <code>main</code> function in the source code; we could have defined it before
|
||
as well. Rust doesn’t care where you define your functions, only that they’re
|
||
defined somewhere.</p>
|
||
<p>Let’s start a new binary project named <em>functions</em> to explore functions
|
||
further. Place the <code>another_function</code> example in <em>src/main.rs</em> and run it. You
|
||
should see the following output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling functions v0.1.0 (file:///projects/functions)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.28 secs
|
||
Running `target/debug/functions`
|
||
Hello, world!
|
||
Another function.
|
||
</code></pre>
|
||
<p>The lines execute in the order in which they appear in the <code>main</code> function.
|
||
First, the “Hello, world!” message prints, and then <code>another_function</code> is
|
||
called and its message is printed.</p>
|
||
<h3><a class="header" href="#function-parameters" id="function-parameters">Function Parameters</a></h3>
|
||
<p>Functions can also be defined to have <em>parameters</em>, which are special variables
|
||
that are part of a function’s signature. When a function has parameters, you
|
||
can provide it with concrete values for those parameters. Technically, the
|
||
concrete values are called <em>arguments</em>, but in casual conversation, people tend
|
||
to use the words <em>parameter</em> and <em>argument</em> interchangeably for either the
|
||
variables in a function’s definition or the concrete values passed in when you
|
||
call a function.</p>
|
||
<p>The following rewritten version of <code>another_function</code> shows what parameters
|
||
look like in Rust:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
another_function(5);
|
||
}
|
||
|
||
fn another_function(x: i32) {
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
</code></pre></pre>
|
||
<p>Try running this program; you should get the following output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling functions v0.1.0 (file:///projects/functions)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 1.21 secs
|
||
Running `target/debug/functions`
|
||
The value of x is: 5
|
||
</code></pre>
|
||
<p>The declaration of <code>another_function</code> has one parameter named <code>x</code>. The type of
|
||
<code>x</code> is specified as <code>i32</code>. When <code>5</code> is passed to <code>another_function</code>, the
|
||
<code>println!</code> macro puts <code>5</code> where the pair of curly brackets were in the format
|
||
string.</p>
|
||
<p>In function signatures, you <em>must</em> declare the type of each parameter. This is
|
||
a deliberate decision in Rust’s design: requiring type annotations in function
|
||
definitions means the compiler almost never needs you to use them elsewhere in
|
||
the code to figure out what you mean.</p>
|
||
<p>When you want a function to have multiple parameters, separate the parameter
|
||
declarations with commas, like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
another_function(5, 6);
|
||
}
|
||
|
||
fn another_function(x: i32, y: i32) {
|
||
println!("The value of x is: {}", x);
|
||
println!("The value of y is: {}", y);
|
||
}
|
||
</code></pre></pre>
|
||
<p>This example creates a function with two parameters, both of which are <code>i32</code>
|
||
types. The function then prints the values in both of its parameters. Note that
|
||
function parameters don’t all need to be the same type, they just happen to be
|
||
in this example.</p>
|
||
<p>Let’s try running this code. Replace the program currently in your <em>functions</em>
|
||
project’s <em>src/main.rs</em> file with the preceding example and run it using <code>cargo run</code>:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling functions v0.1.0 (file:///projects/functions)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/functions`
|
||
The value of x is: 5
|
||
The value of y is: 6
|
||
</code></pre>
|
||
<p>Because we called the function with <code>5</code> as the value for <code>x</code> and <code>6</code> is passed
|
||
as the value for <code>y</code>, the two strings are printed with these values.</p>
|
||
<h3><a class="header" href="#function-bodies-contain-statements-and-expressions" id="function-bodies-contain-statements-and-expressions">Function Bodies Contain Statements and Expressions</a></h3>
|
||
<p>Function bodies are made up of a series of statements optionally ending in an
|
||
expression. So far, we’ve only covered functions without an ending expression,
|
||
but you have seen an expression as part of a statement. Because Rust is an
|
||
expression-based language, this is an important distinction to understand.
|
||
Other languages don’t have the same distinctions, so let’s look at what
|
||
statements and expressions are and how their differences affect the bodies of
|
||
functions.</p>
|
||
<p>We’ve actually already used statements and expressions. <em>Statements</em> are
|
||
instructions that perform some action and do not return a value. <em>Expressions</em>
|
||
evaluate to a resulting value. Let’s look at some examples.</p>
|
||
<p>Creating a variable and assigning a value to it with the <code>let</code> keyword is a
|
||
statement. In Listing 3-1, <code>let y = 6;</code> is a statement.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let y = 6;
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 3-1: A <code>main</code> function declaration containing one statement</span></p>
|
||
<p>Function definitions are also statements; the entire preceding example is a
|
||
statement in itself.</p>
|
||
<p>Statements do not return values. Therefore, you can’t assign a <code>let</code> statement
|
||
to another variable, as the following code tries to do; you’ll get an error:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = (let y = 6);
|
||
}
|
||
</code></pre>
|
||
<p>When you run this program, the error you’ll get looks like this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling functions v0.1.0 (file:///projects/functions)
|
||
error: expected expression, found statement (`let`)
|
||
--> src/main.rs:2:14
|
||
|
|
||
2 | let x = (let y = 6);
|
||
| ^^^
|
||
|
|
||
= note: variable declaration using `let` is a statement
|
||
</code></pre>
|
||
<p>The <code>let y = 6</code> statement does not return a value, so there isn’t anything for
|
||
<code>x</code> to bind to. This is different from what happens in other languages, such as
|
||
C and Ruby, where the assignment returns the value of the assignment. In those
|
||
languages, you can write <code>x = y = 6</code> and have both <code>x</code> and <code>y</code> have the value
|
||
<code>6</code>; that is not the case in Rust.</p>
|
||
<p>Expressions evaluate to something and make up most of the rest of the code that
|
||
you’ll write in Rust. Consider a simple math operation, such as <code>5 + 6</code>, which
|
||
is an expression that evaluates to the value <code>11</code>. Expressions can be part of
|
||
statements: in Listing 3-1, the <code>6</code> in the statement <code>let y = 6;</code> is an
|
||
expression that evaluates to the value <code>6</code>. Calling a function is an
|
||
expression. Calling a macro is an expression. The block that we use to create
|
||
new scopes, <code>{}</code>, is an expression, for example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 5;
|
||
|
||
let y = {
|
||
let x = 3;
|
||
x + 1
|
||
};
|
||
|
||
println!("The value of y is: {}", y);
|
||
}
|
||
</code></pre></pre>
|
||
<p>This expression:</p>
|
||
<pre><code class="language-rust ignore">{
|
||
let x = 3;
|
||
x + 1
|
||
}
|
||
</code></pre>
|
||
<p>is a block that, in this case, evaluates to <code>4</code>. That value gets bound to <code>y</code>
|
||
as part of the <code>let</code> statement. Note the <code>x + 1</code> line without a semicolon at
|
||
the end, which is unlike most of the lines you’ve seen so far. Expressions do
|
||
not include ending semicolons. If you add a semicolon to the end of an
|
||
expression, you turn it into a statement, which will then not return a value.
|
||
Keep this in mind as you explore function return values and expressions next.</p>
|
||
<h3><a class="header" href="#functions-with-return-values" id="functions-with-return-values">Functions with Return Values</a></h3>
|
||
<p>Functions can return values to the code that calls them. We don’t name return
|
||
values, but we do declare their type after an arrow (<code>-></code>). In Rust, the return
|
||
value of the function is synonymous with the value of the final expression in
|
||
the block of the body of a function. You can return early from a function by
|
||
using the <code>return</code> keyword and specifying a value, but most functions return
|
||
the last expression implicitly. Here’s an example of a function that returns a
|
||
value:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn five() -> i32 {
|
||
5
|
||
}
|
||
|
||
fn main() {
|
||
let x = five();
|
||
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
</code></pre></pre>
|
||
<p>There are no function calls, macros, or even <code>let</code> statements in the <code>five</code>
|
||
function—just the number <code>5</code> by itself. That’s a perfectly valid function in
|
||
Rust. Note that the function’s return type is specified too, as <code>-> i32</code>. Try
|
||
running this code; the output should look like this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling functions v0.1.0 (file:///projects/functions)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.30 secs
|
||
Running `target/debug/functions`
|
||
The value of x is: 5
|
||
</code></pre>
|
||
<p>The <code>5</code> in <code>five</code> is the function’s return value, which is why the return type
|
||
is <code>i32</code>. Let’s examine this in more detail. There are two important bits:
|
||
first, the line <code>let x = five();</code> shows that we’re using the return value of a
|
||
function to initialize a variable. Because the function <code>five</code> returns a <code>5</code>,
|
||
that line is the same as the following:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Second, the <code>five</code> function has no parameters and defines the type of the
|
||
return value, but the body of the function is a lonely <code>5</code> with no semicolon
|
||
because it’s an expression whose value we want to return.</p>
|
||
<p>Let’s look at another example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = plus_one(5);
|
||
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
|
||
fn plus_one(x: i32) -> i32 {
|
||
x + 1
|
||
}
|
||
</code></pre></pre>
|
||
<p>Running this code will print <code>The value of x is: 6</code>. But if we place a
|
||
semicolon at the end of the line containing <code>x + 1</code>, changing it from an
|
||
expression to a statement, we’ll get an error.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = plus_one(5);
|
||
|
||
println!("The value of x is: {}", x);
|
||
}
|
||
|
||
fn plus_one(x: i32) -> i32 {
|
||
x + 1;
|
||
}
|
||
</code></pre>
|
||
<p>Compiling this code produces an error, as follows:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:7:28
|
||
|
|
||
7 | fn plus_one(x: i32) -> i32 {
|
||
| ____________________________^
|
||
8 | | x + 1;
|
||
| | - help: consider removing this semicolon
|
||
9 | | }
|
||
| |_^ expected i32, found ()
|
||
|
|
||
= note: expected type `i32`
|
||
found type `()`
|
||
</code></pre>
|
||
<p>The main error message, “mismatched types,” reveals the core issue with this
|
||
code. The definition of the function <code>plus_one</code> says that it will return an
|
||
<code>i32</code>, but statements don’t evaluate to a value, which is expressed by <code>()</code>,
|
||
an empty tuple. Therefore, nothing is returned, which contradicts the function
|
||
definition and results in an error. In this output, Rust provides a message to
|
||
possibly help rectify this issue: it suggests removing the semicolon, which
|
||
would fix the error.</p>
|
||
<h2><a class="header" href="#comments" id="comments">Comments</a></h2>
|
||
<p>All programmers strive to make their code easy to understand, but sometimes
|
||
extra explanation is warranted. In these cases, programmers leave notes, or
|
||
<em>comments</em>, in their source code that the compiler will ignore but people
|
||
reading the source code may find useful.</p>
|
||
<p>Here’s a simple comment:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>// hello, world
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>In Rust, comments must start with two slashes and continue until the end of the
|
||
line. For comments that extend beyond a single line, you’ll need to include
|
||
<code>//</code> on each line, like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>// So we’re doing something complicated here, long enough that we need
|
||
// multiple lines of comments to do it! Whew! Hopefully, this comment will
|
||
// explain what’s going on.
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Comments can also be placed at the end of lines containing code:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let lucky_number = 7; // I’m feeling lucky today
|
||
}
|
||
</code></pre></pre>
|
||
<p>But you’ll more often see them used in this format, with the comment on a
|
||
separate line above the code it’s annotating:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
// I’m feeling lucky today
|
||
let lucky_number = 7;
|
||
}
|
||
</code></pre></pre>
|
||
<p>Rust also has another kind of comment, documentation comments, which we’ll
|
||
discuss in the “Publishing a Crate to Crates.io” section of Chapter 14.</p>
|
||
<h2><a class="header" href="#control-flow" id="control-flow">Control Flow</a></h2>
|
||
<p>Deciding whether or not to run some code depending on if a condition is true
|
||
and deciding to run some code repeatedly while a condition is true are basic
|
||
building blocks in most programming languages. The most common constructs that
|
||
let you control the flow of execution of Rust code are <code>if</code> expressions and
|
||
loops.</p>
|
||
<h3><a class="header" href="#if-expressions" id="if-expressions"><code>if</code> Expressions</a></h3>
|
||
<p>An <code>if</code> expression allows you to branch your code depending on conditions. You
|
||
provide a condition and then state, “If this condition is met, run this block
|
||
of code. If the condition is not met, do not run this block of code.”</p>
|
||
<p>Create a new project called <em>branches</em> in your <em>projects</em> directory to explore
|
||
the <code>if</code> expression. In the <em>src/main.rs</em> file, input the following:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let number = 3;
|
||
|
||
if number < 5 {
|
||
println!("condition was true");
|
||
} else {
|
||
println!("condition was false");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>All <code>if</code> expressions start with the keyword <code>if</code>, which is followed by a
|
||
condition. In this case, the condition checks whether or not the variable
|
||
<code>number</code> has a value less than 5. The block of code we want to execute if the
|
||
condition is true is placed immediately after the condition inside curly
|
||
brackets. Blocks of code associated with the conditions in <code>if</code> expressions are
|
||
sometimes called <em>arms</em>, just like the arms in <code>match</code> expressions that we
|
||
discussed in the <a href="ch02-00-guessing-game-tutorial.html#comparing-the-guess-to-the-secret-number">“Comparing the Guess to the Secret
|
||
Number”</a><!-- ignore --> section of
|
||
Chapter 2.</p>
|
||
<p>Optionally, we can also include an <code>else</code> expression, which we chose
|
||
to do here, to give the program an alternative block of code to execute should
|
||
the condition evaluate to false. If you don’t provide an <code>else</code> expression and
|
||
the condition is false, the program will just skip the <code>if</code> block and move on
|
||
to the next bit of code.</p>
|
||
<p>Try running this code; you should see the following output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling branches v0.1.0 (file:///projects/branches)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/branches`
|
||
condition was true
|
||
</code></pre>
|
||
<p>Let’s try changing the value of <code>number</code> to a value that makes the condition
|
||
<code>false</code> to see what happens:</p>
|
||
<pre><code class="language-rust ignore">let number = 7;
|
||
</code></pre>
|
||
<p>Run the program again, and look at the output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling branches v0.1.0 (file:///projects/branches)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/branches`
|
||
condition was false
|
||
</code></pre>
|
||
<p>It’s also worth noting that the condition in this code <em>must</em> be a <code>bool</code>. If
|
||
the condition isn’t a <code>bool</code>, we’ll get an error. For example, try running the
|
||
following code:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let number = 3;
|
||
|
||
if number {
|
||
println!("number was three");
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>The <code>if</code> condition evaluates to a value of <code>3</code> this time, and Rust throws an
|
||
error:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:4:8
|
||
|
|
||
4 | if number {
|
||
| ^^^^^^ expected bool, found integer
|
||
|
|
||
= note: expected type `bool`
|
||
found type `{integer}`
|
||
</code></pre>
|
||
<p>The error indicates that Rust expected a <code>bool</code> but got an integer. Unlike
|
||
languages such as Ruby and JavaScript, Rust will not automatically try to
|
||
convert non-Boolean types to a Boolean. You must be explicit and always provide
|
||
<code>if</code> with a Boolean as its condition. If we want the <code>if</code> code block to run
|
||
only when a number is not equal to <code>0</code>, for example, we can change the <code>if</code>
|
||
expression to the following:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let number = 3;
|
||
|
||
if number != 0 {
|
||
println!("number was something other than zero");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>Running this code will print <code>number was something other than zero</code>.</p>
|
||
<h4><a class="header" href="#handling-multiple-conditions-with-else-if" id="handling-multiple-conditions-with-else-if">Handling Multiple Conditions with <code>else if</code></a></h4>
|
||
<p>You can have multiple conditions by combining <code>if</code> and <code>else</code> in an <code>else if</code>
|
||
expression. For example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let number = 6;
|
||
|
||
if number % 4 == 0 {
|
||
println!("number is divisible by 4");
|
||
} else if number % 3 == 0 {
|
||
println!("number is divisible by 3");
|
||
} else if number % 2 == 0 {
|
||
println!("number is divisible by 2");
|
||
} else {
|
||
println!("number is not divisible by 4, 3, or 2");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>This program has four possible paths it can take. After running it, you should
|
||
see the following output:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling branches v0.1.0 (file:///projects/branches)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running `target/debug/branches`
|
||
number is divisible by 3
|
||
</code></pre>
|
||
<p>When this program executes, it checks each <code>if</code> expression in turn and executes
|
||
the first body for which the condition holds true. Note that even though 6 is
|
||
divisible by 2, we don’t see the output <code>number is divisible by 2</code>, nor do we
|
||
see the <code>number is not divisible by 4, 3, or 2</code> text from the <code>else</code> block.
|
||
That’s because Rust only executes the block for the first true condition, and
|
||
once it finds one, it doesn’t even check the rest.</p>
|
||
<p>Using too many <code>else if</code> expressions can clutter your code, so if you have more
|
||
than one, you might want to refactor your code. Chapter 6 describes a powerful
|
||
Rust branching construct called <code>match</code> for these cases.</p>
|
||
<h4><a class="header" href="#using-if-in-a-let-statement" id="using-if-in-a-let-statement">Using <code>if</code> in a <code>let</code> Statement</a></h4>
|
||
<p>Because <code>if</code> is an expression, we can use it on the right side of a <code>let</code>
|
||
statement, as in Listing 3-2.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let condition = true;
|
||
let number = if condition {
|
||
5
|
||
} else {
|
||
6
|
||
};
|
||
|
||
println!("The value of number is: {}", number);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 3-2: Assigning the result of an <code>if</code> expression
|
||
to a variable</span></p>
|
||
<p>The <code>number</code> variable will be bound to a value based on the outcome of the <code>if</code>
|
||
expression. Run this code to see what happens:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling branches v0.1.0 (file:///projects/branches)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.30 secs
|
||
Running `target/debug/branches`
|
||
The value of number is: 5
|
||
</code></pre>
|
||
<p>Remember that blocks of code evaluate to the last expression in them, and
|
||
numbers by themselves are also expressions. In this case, the value of the
|
||
whole <code>if</code> expression depends on which block of code executes. This means the
|
||
values that have the potential to be results from each arm of the <code>if</code> must be
|
||
the same type; in Listing 3-2, the results of both the <code>if</code> arm and the <code>else</code>
|
||
arm were <code>i32</code> integers. If the types are mismatched, as in the following
|
||
example, we’ll get an error:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let condition = true;
|
||
|
||
let number = if condition {
|
||
5
|
||
} else {
|
||
"six"
|
||
};
|
||
|
||
println!("The value of number is: {}", number);
|
||
}
|
||
</code></pre>
|
||
<p>When we try to compile this code, we’ll get an error. The <code>if</code> and <code>else</code> arms
|
||
have value types that are incompatible, and Rust indicates exactly where to
|
||
find the problem in the program:</p>
|
||
<pre><code class="language-text">error[E0308]: if and else have incompatible types
|
||
--> src/main.rs:4:18
|
||
|
|
||
4 | let number = if condition {
|
||
| __________________^
|
||
5 | | 5
|
||
6 | | } else {
|
||
7 | | "six"
|
||
8 | | };
|
||
| |_____^ expected integer, found &str
|
||
|
|
||
= note: expected type `{integer}`
|
||
found type `&str`
|
||
</code></pre>
|
||
<p>The expression in the <code>if</code> block evaluates to an integer, and the expression in
|
||
the <code>else</code> block evaluates to a string. This won’t work because variables must
|
||
have a single type. Rust needs to know at compile time what type the <code>number</code>
|
||
variable is, definitively, so it can verify at compile time that its type is
|
||
valid everywhere we use <code>number</code>. Rust wouldn’t be able to do that if the type
|
||
of <code>number</code> was only determined at runtime; the compiler would be more complex
|
||
and would make fewer guarantees about the code if it had to keep track of
|
||
multiple hypothetical types for any variable.</p>
|
||
<h3><a class="header" href="#repetition-with-loops" id="repetition-with-loops">Repetition with Loops</a></h3>
|
||
<p>It’s often useful to execute a block of code more than once. For this task,
|
||
Rust provides several <em>loops</em>. A loop runs through the code inside the loop
|
||
body to the end and then starts immediately back at the beginning. To
|
||
experiment with loops, let’s make a new project called <em>loops</em>.</p>
|
||
<p>Rust has three kinds of loops: <code>loop</code>, <code>while</code>, and <code>for</code>. Let’s try each one.</p>
|
||
<h4><a class="header" href="#repeating-code-with-loop" id="repeating-code-with-loop">Repeating Code with <code>loop</code></a></h4>
|
||
<p>The <code>loop</code> keyword tells Rust to execute a block of code over and over again
|
||
forever or until you explicitly tell it to stop.</p>
|
||
<p>As an example, change the <em>src/main.rs</em> file in your <em>loops</em> directory to look
|
||
like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
loop {
|
||
println!("again!");
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>When we run this program, we’ll see <code>again!</code> printed over and over continuously
|
||
until we stop the program manually. Most terminals support a keyboard shortcut,
|
||
<span class="keystroke">ctrl-c</span>, to interrupt a program that is stuck in
|
||
a continual loop. Give it a try:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling loops v0.1.0 (file:///projects/loops)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.29 secs
|
||
Running `target/debug/loops`
|
||
again!
|
||
again!
|
||
again!
|
||
again!
|
||
^Cagain!
|
||
</code></pre>
|
||
<p>The symbol <code>^C</code> represents where you pressed <span class="keystroke">ctrl-c
|
||
</span>. You may or may not see the word <code>again!</code> printed after the <code>^C</code>,
|
||
depending on where the code was in the loop when it received the interrupt
|
||
signal.</p>
|
||
<p>Fortunately, Rust provides another, more reliable way to break out of a loop.
|
||
You can place the <code>break</code> keyword within the loop to tell the program when to
|
||
stop executing the loop. Recall that we did this in the guessing game in the
|
||
<a href="ch02-00-guessing-game-tutorial.html#quitting-after-a-correct-guess">“Quitting After a Correct Guess”</a><!-- ignore
|
||
--> section of Chapter 2 to exit the program when the user won the game by
|
||
guessing the correct number.</p>
|
||
<h4><a class="header" href="#returning-values-from-loops" id="returning-values-from-loops">Returning Values from Loops</a></h4>
|
||
<p>One of the uses of a <code>loop</code> is to retry an operation you know might fail, such
|
||
as checking whether a thread has completed its job. However, you might need to
|
||
pass the result of that operation to the rest of your code. To do this, you can
|
||
add the value you want returned after the <code>break</code> expression you use to stop
|
||
the loop; that value will be returned out of the loop so you can use it, as
|
||
shown here:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let mut counter = 0;
|
||
|
||
let result = loop {
|
||
counter += 1;
|
||
|
||
if counter == 10 {
|
||
break counter * 2;
|
||
}
|
||
};
|
||
|
||
println!("The result is {}", result);
|
||
}
|
||
</code></pre></pre>
|
||
<p>Before the loop, we declare a variable named <code>counter</code> and initialize it to
|
||
<code>0</code>. Then we declare a variable named <code>result</code> to hold the value returned from
|
||
the loop. On every iteration of the loop, we add <code>1</code> to the <code>counter</code> variable,
|
||
and then check whether the counter is equal to <code>10</code>. When it is, we use the
|
||
<code>break</code> keyword with the value <code>counter * 2</code>. After the loop, we use a
|
||
semicolon to end the statement that assigns the value to <code>result</code>. Finally, we
|
||
print the value in <code>result</code>, which in this case is 20.</p>
|
||
<h4><a class="header" href="#conditional-loops-with-while" id="conditional-loops-with-while">Conditional Loops with <code>while</code></a></h4>
|
||
<p>It’s often useful for a program to evaluate a condition within a loop. While
|
||
the condition is true, the loop runs. When the condition ceases to be true, the
|
||
program calls <code>break</code>, stopping the loop. This loop type could be implemented
|
||
using a combination of <code>loop</code>, <code>if</code>, <code>else</code>, and <code>break</code>; you could try that
|
||
now in a program, if you’d like.</p>
|
||
<p>However, this pattern is so common that Rust has a built-in language construct
|
||
for it, called a <code>while</code> loop. Listing 3-3 uses <code>while</code>: the program loops
|
||
three times, counting down each time, and then, after the loop, it prints
|
||
another message and exits.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let mut number = 3;
|
||
|
||
while number != 0 {
|
||
println!("{}!", number);
|
||
|
||
number -= 1;
|
||
}
|
||
|
||
println!("LIFTOFF!!!");
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 3-3: Using a <code>while</code> loop to run code while a
|
||
condition holds true</span></p>
|
||
<p>This construct eliminates a lot of nesting that would be necessary if you used
|
||
<code>loop</code>, <code>if</code>, <code>else</code>, and <code>break</code>, and it’s clearer. While a condition holds
|
||
true, the code runs; otherwise, it exits the loop.</p>
|
||
<h4><a class="header" href="#looping-through-a-collection-with-for" id="looping-through-a-collection-with-for">Looping Through a Collection with <code>for</code></a></h4>
|
||
<p>You could use the <code>while</code> construct to loop over the elements of a collection,
|
||
such as an array. For example, let’s look at Listing 3-4.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let a = [10, 20, 30, 40, 50];
|
||
let mut index = 0;
|
||
|
||
while index < 5 {
|
||
println!("the value is: {}", a[index]);
|
||
|
||
index += 1;
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 3-4: Looping through each element of a collection
|
||
using a <code>while</code> loop</span></p>
|
||
<p>Here, the code counts up through the elements in the array. It starts at index
|
||
<code>0</code>, and then loops until it reaches the final index in the array (that is,
|
||
when <code>index < 5</code> is no longer true). Running this code will print every element
|
||
in the array:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling loops v0.1.0 (file:///projects/loops)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.32 secs
|
||
Running `target/debug/loops`
|
||
the value is: 10
|
||
the value is: 20
|
||
the value is: 30
|
||
the value is: 40
|
||
the value is: 50
|
||
</code></pre>
|
||
<p>All five array values appear in the terminal, as expected. Even though <code>index</code>
|
||
will reach a value of <code>5</code> at some point, the loop stops executing before trying
|
||
to fetch a sixth value from the array.</p>
|
||
<p>But this approach is error prone; we could cause the program to panic if the
|
||
index length is incorrect. It’s also slow, because the compiler adds runtime
|
||
code to perform the conditional check on every element on every iteration
|
||
through the loop.</p>
|
||
<p>As a more concise alternative, you can use a <code>for</code> loop and execute some code
|
||
for each item in a collection. A <code>for</code> loop looks like the code in Listing 3-5.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let a = [10, 20, 30, 40, 50];
|
||
|
||
for element in a.iter() {
|
||
println!("the value is: {}", element);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 3-5: Looping through each element of a collection
|
||
using a <code>for</code> loop</span></p>
|
||
<p>When we run this code, we’ll see the same output as in Listing 3-4. More
|
||
importantly, we’ve now increased the safety of the code and eliminated the
|
||
chance of bugs that might result from going beyond the end of the array or not
|
||
going far enough and missing some items.</p>
|
||
<p>For example, in the code in Listing 3-4, if you removed an item from the <code>a</code>
|
||
array but forgot to update the condition to <code>while index < 4</code>, the code would
|
||
panic. Using the <code>for</code> loop, you wouldn’t need to remember to change any other
|
||
code if you changed the number of values in the array.</p>
|
||
<p>The safety and conciseness of <code>for</code> loops make them the most commonly used loop
|
||
construct in Rust. Even in situations in which you want to run some code a
|
||
certain number of times, as in the countdown example that used a <code>while</code> loop
|
||
in Listing 3-3, most Rustaceans would use a <code>for</code> loop. The way to do that
|
||
would be to use a <code>Range</code>, which is a type provided by the standard library
|
||
that generates all numbers in sequence starting from one number and ending
|
||
before another number.</p>
|
||
<p>Here’s what the countdown would look like using a <code>for</code> loop and another method
|
||
we’ve not yet talked about, <code>rev</code>, to reverse the range:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
for number in (1..4).rev() {
|
||
println!("{}!", number);
|
||
}
|
||
println!("LIFTOFF!!!");
|
||
}
|
||
</code></pre></pre>
|
||
<p>This code is a bit nicer, isn’t it?</p>
|
||
<h2><a class="header" href="#summary-2" id="summary-2">Summary</a></h2>
|
||
<p>You made it! That was a sizable chapter: you learned about variables, scalar
|
||
and compound data types, functions, comments, <code>if</code> expressions, and loops! If
|
||
you want to practice with the concepts discussed in this chapter, try building
|
||
programs to do the following:</p>
|
||
<ul>
|
||
<li>Convert temperatures between Fahrenheit and Celsius.</li>
|
||
<li>Generate the nth Fibonacci number.</li>
|
||
<li>Print the lyrics to the Christmas carol “The Twelve Days of Christmas,”
|
||
taking advantage of the repetition in the song.</li>
|
||
</ul>
|
||
<p>When you’re ready to move on, we’ll talk about a concept in Rust that <em>doesn’t</em>
|
||
commonly exist in other programming languages: ownership.</p>
|
||
<h1><a class="header" href="#understanding-ownership" id="understanding-ownership">Understanding Ownership</a></h1>
|
||
<p>Ownership is Rust’s most unique feature, and it enables Rust to make memory
|
||
safety guarantees without needing a garbage collector. Therefore, it’s
|
||
important to understand how ownership works in Rust. In this chapter, we’ll
|
||
talk about ownership as well as several related features: borrowing, slices,
|
||
and how Rust lays data out in memory.</p>
|
||
<h2><a class="header" href="#what-is-ownership" id="what-is-ownership">What Is Ownership?</a></h2>
|
||
<p>Rust’s central feature is <em>ownership</em>. Although the feature is straightforward
|
||
to explain, it has deep implications for the rest of the language.</p>
|
||
<p>All programs have to manage the way they use a computer’s memory while running.
|
||
Some languages have garbage collection that constantly looks for no longer used
|
||
memory as the program runs; in other languages, the programmer must explicitly
|
||
allocate and free the memory. Rust uses a third approach: memory is managed
|
||
through a system of ownership with a set of rules that the compiler checks at
|
||
compile time. None of the ownership features slow down your program while it’s
|
||
running.</p>
|
||
<p>Because ownership is a new concept for many programmers, it does take some time
|
||
to get used to. The good news is that the more experienced you become with Rust
|
||
and the rules of the ownership system, the more you’ll be able to naturally
|
||
develop code that is safe and efficient. Keep at it!</p>
|
||
<p>When you understand ownership, you’ll have a solid foundation for understanding
|
||
the features that make Rust unique. In this chapter, you’ll learn ownership by
|
||
working through some examples that focus on a very common data structure:
|
||
strings.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#the-stack-and-the-heap" id="the-stack-and-the-heap">The Stack and the Heap</a></h3>
|
||
<p>In many programming languages, you don’t have to think about the stack and
|
||
the heap very often. But in a systems programming language like Rust, whether
|
||
a value is on the stack or the heap has more of an effect on how the language
|
||
behaves and why you have to make certain decisions. Parts of ownership will
|
||
be described in relation to the stack and the heap later in this chapter, so
|
||
here is a brief explanation in preparation.</p>
|
||
<p>Both the stack and the heap are parts of memory that are available to your code
|
||
to use at runtime, but they are structured in different ways. The stack stores
|
||
values in the order it gets them and removes the values in the opposite order.
|
||
This is referred to as <em>last in, first out</em>. Think of a stack of plates: when
|
||
you add more plates, you put them on top of the pile, and when you need a
|
||
plate, you take one off the top. Adding or removing plates from the middle or
|
||
bottom wouldn’t work as well! Adding data is called <em>pushing onto the stack</em>,
|
||
and removing data is called <em>popping off the stack</em>.</p>
|
||
<p>All data stored on the stack must have a known, fixed size. Data with an
|
||
unknown size at compile time or a size that might change must be stored on
|
||
the heap instead. The heap is less organized: when you put data on the heap,
|
||
you request a certain amount of space. The operating system finds an empty
|
||
spot in the heap that is big enough, marks it as being in use, and returns a
|
||
<em>pointer</em>, which is the address of that location. This process is called
|
||
<em>allocating on the heap</em> and is sometimes abbreviated as just <em>allocating</em>.
|
||
Pushing values onto the stack is not considered allocating. Because the
|
||
pointer is a known, fixed size, you can store the pointer on the stack, but
|
||
when you want the actual data, you must follow the pointer.</p>
|
||
<p>Think of being seated at a restaurant. When you enter, you state the number of
|
||
people in your group, and the staff finds an empty table that fits everyone
|
||
and leads you there. If someone in your group comes late, they can ask where
|
||
you’ve been seated to find you.</p>
|
||
<p>Pushing to the stack is faster than allocating on the heap because the
|
||
operating system never has to search for a place to store new data; that
|
||
location is always at the top of the stack. Comparatively, allocating space
|
||
on the heap requires more work, because the operating system must first find
|
||
a big enough space to hold the data and then perform bookkeeping to prepare
|
||
for the next allocation.</p>
|
||
<p>Accessing data in the heap is slower than accessing data on the stack because
|
||
you have to follow a pointer to get there. Contemporary processors are faster
|
||
if they jump around less in memory. Continuing the analogy, consider a server
|
||
at a restaurant taking orders from many tables. It’s most efficient to get
|
||
all the orders at one table before moving on to the next table. Taking an
|
||
order from table A, then an order from table B, then one from A again, and
|
||
then one from B again would be a much slower process. By the same token, a
|
||
processor can do its job better if it works on data that’s close to other
|
||
data (as it is on the stack) rather than farther away (as it can be on the
|
||
heap). Allocating a large amount of space on the heap can also take time.</p>
|
||
<p>When your code calls a function, the values passed into the function
|
||
(including, potentially, pointers to data on the heap) and the function’s
|
||
local variables get pushed onto the stack. When the function is over, those
|
||
values get popped off the stack.</p>
|
||
<p>Keeping track of what parts of code are using what data on the heap,
|
||
minimizing the amount of duplicate data on the heap, and cleaning up unused
|
||
data on the heap so you don’t run out of space are all problems that ownership
|
||
addresses. Once you understand ownership, you won’t need to think about the
|
||
stack and the heap very often, but knowing that managing heap data is why
|
||
ownership exists can help explain why it works the way it does.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#ownership-rules" id="ownership-rules">Ownership Rules</a></h3>
|
||
<p>First, let’s take a look at the ownership rules. Keep these rules in mind as we
|
||
work through the examples that illustrate them:</p>
|
||
<ul>
|
||
<li>Each value in Rust has a variable that’s called its <em>owner</em>.</li>
|
||
<li>There can only be one owner at a time.</li>
|
||
<li>When the owner goes out of scope, the value will be dropped.</li>
|
||
</ul>
|
||
<h3><a class="header" href="#variable-scope" id="variable-scope">Variable Scope</a></h3>
|
||
<p>We’ve walked through an example of a Rust program already in Chapter 2. Now
|
||
that we’re past basic syntax, we won’t include all the <code>fn main() {</code> code in
|
||
examples, so if you’re following along, you’ll have to put the following
|
||
examples inside a <code>main</code> function manually. As a result, our examples will be a
|
||
bit more concise, letting us focus on the actual details rather than
|
||
boilerplate code.</p>
|
||
<p>As a first example of ownership, we’ll look at the <em>scope</em> of some variables. A
|
||
scope is the range within a program for which an item is valid. Let’s say we
|
||
have a variable that looks like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = "hello";
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The variable <code>s</code> refers to a string literal, where the value of the string is
|
||
hardcoded into the text of our program. The variable is valid from the point at
|
||
which it’s declared until the end of the current <em>scope</em>. Listing 4-1 has
|
||
comments annotating where the variable <code>s</code> is valid.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>{ // s is not valid here, it’s not yet declared
|
||
let s = "hello"; // s is valid from this point forward
|
||
|
||
// do stuff with s
|
||
} // this scope is now over, and s is no longer valid
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 4-1: A variable and the scope in which it is
|
||
valid</span></p>
|
||
<p>In other words, there are two important points in time here:</p>
|
||
<ul>
|
||
<li>When <code>s</code> comes <em>into scope</em>, it is valid.</li>
|
||
<li>It remains valid until it goes <em>out of scope</em>.</li>
|
||
</ul>
|
||
<p>At this point, the relationship between scopes and when variables are valid is
|
||
similar to that in other programming languages. Now we’ll build on top of this
|
||
understanding by introducing the <code>String</code> type.</p>
|
||
<h3><a class="header" href="#the-string-type" id="the-string-type">The <code>String</code> Type</a></h3>
|
||
<p>To illustrate the rules of ownership, we need a data type that is more complex
|
||
than the ones we covered in the <a href="ch03-02-data-types.html#data-types">“Data Types”</a><!-- ignore -->
|
||
section of Chapter 3. The types covered previously are all stored on the stack
|
||
and popped off the stack when their scope is over, but we want to look at data
|
||
that is stored on the heap and explore how Rust knows when to clean up that
|
||
data.</p>
|
||
<p>We’ll use <code>String</code> as the example here and concentrate on the parts of <code>String</code>
|
||
that relate to ownership. These aspects also apply to other complex data types,
|
||
whether they are provided by the standard library or created by you. We’ll
|
||
discuss <code>String</code> in more depth in Chapter 8.</p>
|
||
<p>We’ve already seen string literals, where a string value is hardcoded into our
|
||
program. String literals are convenient, but they aren’t suitable for every
|
||
situation in which we may want to use text. One reason is that they’re
|
||
immutable. Another is that not every string value can be known when we write
|
||
our code: for example, what if we want to take user input and store it? For
|
||
these situations, Rust has a second string type, <code>String</code>. This type is
|
||
allocated on the heap and as such is able to store an amount of text that is
|
||
unknown to us at compile time. You can create a <code>String</code> from a string literal
|
||
using the <code>from</code> function, like so:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("hello");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The double colon (<code>::</code>) is an operator that allows us to namespace this
|
||
particular <code>from</code> function under the <code>String</code> type rather than using some sort
|
||
of name like <code>string_from</code>. We’ll discuss this syntax more in the <a href="ch05-03-method-syntax.html#method-syntax">“Method
|
||
Syntax”</a><!-- ignore --> section of Chapter 5 and when we talk
|
||
about namespacing with modules in <a href="ch07-03-paths-for-referring-to-an-item-in-the-module-tree.html">“Paths for Referring to an Item in the
|
||
Module Tree”</a><!-- ignore --> in Chapter 7.</p>
|
||
<p>This kind of string <em>can</em> be mutated:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s = String::from("hello");
|
||
|
||
s.push_str(", world!"); // push_str() appends a literal to a String
|
||
|
||
println!("{}", s); // This will print `hello, world!`
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>So, what’s the difference here? Why can <code>String</code> be mutated but literals
|
||
cannot? The difference is how these two types deal with memory.</p>
|
||
<h3><a class="header" href="#memory-and-allocation" id="memory-and-allocation">Memory and Allocation</a></h3>
|
||
<p>In the case of a string literal, we know the contents at compile time, so the
|
||
text is hardcoded directly into the final executable. This is why string
|
||
literals are fast and efficient. But these properties only come from the string
|
||
literal’s immutability. Unfortunately, we can’t put a blob of memory into the
|
||
binary for each piece of text whose size is unknown at compile time and whose
|
||
size might change while running the program.</p>
|
||
<p>With the <code>String</code> type, in order to support a mutable, growable piece of text,
|
||
we need to allocate an amount of memory on the heap, unknown at compile time,
|
||
to hold the contents. This means:</p>
|
||
<ul>
|
||
<li>The memory must be requested from the operating system at runtime.</li>
|
||
<li>We need a way of returning this memory to the operating system when we’re
|
||
done with our <code>String</code>.</li>
|
||
</ul>
|
||
<p>That first part is done by us: when we call <code>String::from</code>, its implementation
|
||
requests the memory it needs. This is pretty much universal in programming
|
||
languages.</p>
|
||
<p>However, the second part is different. In languages with a <em>garbage collector
|
||
(GC)</em>, the GC keeps track and cleans up memory that isn’t being used anymore,
|
||
and we don’t need to think about it. Without a GC, it’s our responsibility to
|
||
identify when memory is no longer being used and call code to explicitly return
|
||
it, just as we did to request it. Doing this correctly has historically been a
|
||
difficult programming problem. If we forget, we’ll waste memory. If we do it
|
||
too early, we’ll have an invalid variable. If we do it twice, that’s a bug too.
|
||
We need to pair exactly one <code>allocate</code> with exactly one <code>free</code>.</p>
|
||
<p>Rust takes a different path: the memory is automatically returned once the
|
||
variable that owns it goes out of scope. Here’s a version of our scope example
|
||
from Listing 4-1 using a <code>String</code> instead of a string literal:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>{
|
||
let s = String::from("hello"); // s is valid from this point forward
|
||
|
||
// do stuff with s
|
||
} // this scope is now over, and s is no
|
||
// longer valid
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>There is a natural point at which we can return the memory our <code>String</code> needs
|
||
to the operating system: when <code>s</code> goes out of scope. When a variable goes out
|
||
of scope, Rust calls a special function for us. This function is called <code>drop</code>,
|
||
and it’s where the author of <code>String</code> can put the code to return the memory.
|
||
Rust calls <code>drop</code> automatically at the closing curly bracket.</p>
|
||
<blockquote>
|
||
<p>Note: In C++, this pattern of deallocating resources at the end of an item’s
|
||
lifetime is sometimes called <em>Resource Acquisition Is Initialization (RAII)</em>.
|
||
The <code>drop</code> function in Rust will be familiar to you if you’ve used RAII
|
||
patterns.</p>
|
||
</blockquote>
|
||
<p>This pattern has a profound impact on the way Rust code is written. It may seem
|
||
simple right now, but the behavior of code can be unexpected in more
|
||
complicated situations when we want to have multiple variables use the data
|
||
we’ve allocated on the heap. Let’s explore some of those situations now.</p>
|
||
<h4><a class="header" href="#ways-variables-and-data-interact-move" id="ways-variables-and-data-interact-move">Ways Variables and Data Interact: Move</a></h4>
|
||
<p>Multiple variables can interact with the same data in different ways in Rust.
|
||
Let’s look at an example using an integer in Listing 4-2.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
let y = x;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 4-2: Assigning the integer value of variable <code>x</code>
|
||
to <code>y</code></span></p>
|
||
<p>We can probably guess what this is doing: “bind the value <code>5</code> to <code>x</code>; then make
|
||
a copy of the value in <code>x</code> and bind it to <code>y</code>.” We now have two variables, <code>x</code>
|
||
and <code>y</code>, and both equal <code>5</code>. This is indeed what is happening, because integers
|
||
are simple values with a known, fixed size, and these two <code>5</code> values are pushed
|
||
onto the stack.</p>
|
||
<p>Now let’s look at the <code>String</code> version:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s1 = String::from("hello");
|
||
let s2 = s1;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This looks very similar to the previous code, so we might assume that the way
|
||
it works would be the same: that is, the second line would make a copy of the
|
||
value in <code>s1</code> and bind it to <code>s2</code>. But this isn’t quite what happens.</p>
|
||
<p>Take a look at Figure 4-1 to see what is happening to <code>String</code> under the
|
||
covers. A <code>String</code> is made up of three parts, shown on the left: a pointer to
|
||
the memory that holds the contents of the string, a length, and a capacity.
|
||
This group of data is stored on the stack. On the right is the memory on the
|
||
heap that holds the contents.</p>
|
||
<img alt="String in memory" src="img/trpl04-01.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 4-1: Representation in memory of a <code>String</code>
|
||
holding the value <code>"hello"</code> bound to <code>s1</code></span></p>
|
||
<p>The length is how much memory, in bytes, the contents of the <code>String</code> is
|
||
currently using. The capacity is the total amount of memory, in bytes, that the
|
||
<code>String</code> has received from the operating system. The difference between length
|
||
and capacity matters, but not in this context, so for now, it’s fine to ignore
|
||
the capacity.</p>
|
||
<p>When we assign <code>s1</code> to <code>s2</code>, the <code>String</code> data is copied, meaning we copy the
|
||
pointer, the length, and the capacity that are on the stack. We do not copy the
|
||
data on the heap that the pointer refers to. In other words, the data
|
||
representation in memory looks like Figure 4-2.</p>
|
||
<img alt="s1 and s2 pointing to the same value" src="img/trpl04-02.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 4-2: Representation in memory of the variable <code>s2</code>
|
||
that has a copy of the pointer, length, and capacity of <code>s1</code></span></p>
|
||
<p>The representation does <em>not</em> look like Figure 4-3, which is what memory would
|
||
look like if Rust instead copied the heap data as well. If Rust did this, the
|
||
operation <code>s2 = s1</code> could be very expensive in terms of runtime performance if
|
||
the data on the heap were large.</p>
|
||
<img alt="s1 and s2 to two places" src="img/trpl04-03.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 4-3: Another possibility for what <code>s2 = s1</code> might
|
||
do if Rust copied the heap data as well</span></p>
|
||
<p>Earlier, we said that when a variable goes out of scope, Rust automatically
|
||
calls the <code>drop</code> function and cleans up the heap memory for that variable. But
|
||
Figure 4-2 shows both data pointers pointing to the same location. This is a
|
||
problem: when <code>s2</code> and <code>s1</code> go out of scope, they will both try to free the
|
||
same memory. This is known as a <em>double free</em> error and is one of the memory
|
||
safety bugs we mentioned previously. Freeing memory twice can lead to memory
|
||
corruption, which can potentially lead to security vulnerabilities.</p>
|
||
<p>To ensure memory safety, there’s one more detail to what happens in this
|
||
situation in Rust. Instead of trying to copy the allocated memory, Rust
|
||
considers <code>s1</code> to no longer be valid and, therefore, Rust doesn’t need to free
|
||
anything when <code>s1</code> goes out of scope. Check out what happens when you try to
|
||
use <code>s1</code> after <code>s2</code> is created; it won’t work:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let s1 = String::from("hello");
|
||
let s2 = s1;
|
||
|
||
println!("{}, world!", s1);
|
||
</code></pre>
|
||
<p>You’ll get an error like this because Rust prevents you from using the
|
||
invalidated reference:</p>
|
||
<pre><code class="language-text">error[E0382]: use of moved value: `s1`
|
||
--> src/main.rs:5:28
|
||
|
|
||
3 | let s2 = s1;
|
||
| -- value moved here
|
||
4 |
|
||
5 | println!("{}, world!", s1);
|
||
| ^^ value used here after move
|
||
|
|
||
= note: move occurs because `s1` has type `std::string::String`, which does
|
||
not implement the `Copy` trait
|
||
</code></pre>
|
||
<p>If you’ve heard the terms <em>shallow copy</em> and <em>deep copy</em> while working with
|
||
other languages, the concept of copying the pointer, length, and capacity
|
||
without copying the data probably sounds like making a shallow copy. But
|
||
because Rust also invalidates the first variable, instead of being called a
|
||
shallow copy, it’s known as a <em>move</em>. In this example, we would say that
|
||
<code>s1</code> was <em>moved</em> into <code>s2</code>. So what actually happens is shown in Figure 4-4.</p>
|
||
<img alt="s1 moved to s2" src="img/trpl04-04.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 4-4: Representation in memory after <code>s1</code> has been
|
||
invalidated</span></p>
|
||
<p>That solves our problem! With only <code>s2</code> valid, when it goes out of scope, it
|
||
alone will free the memory, and we’re done.</p>
|
||
<p>In addition, there’s a design choice that’s implied by this: Rust will never
|
||
automatically create “deep” copies of your data. Therefore, any <em>automatic</em>
|
||
copying can be assumed to be inexpensive in terms of runtime performance.</p>
|
||
<h4><a class="header" href="#ways-variables-and-data-interact-clone" id="ways-variables-and-data-interact-clone">Ways Variables and Data Interact: Clone</a></h4>
|
||
<p>If we <em>do</em> want to deeply copy the heap data of the <code>String</code>, not just the
|
||
stack data, we can use a common method called <code>clone</code>. We’ll discuss method
|
||
syntax in Chapter 5, but because methods are a common feature in many
|
||
programming languages, you’ve probably seen them before.</p>
|
||
<p>Here’s an example of the <code>clone</code> method in action:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s1 = String::from("hello");
|
||
let s2 = s1.clone();
|
||
|
||
println!("s1 = {}, s2 = {}", s1, s2);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This works just fine and explicitly produces the behavior shown in Figure 4-3,
|
||
where the heap data <em>does</em> get copied.</p>
|
||
<p>When you see a call to <code>clone</code>, you know that some arbitrary code is being
|
||
executed and that code may be expensive. It’s a visual indicator that something
|
||
different is going on.</p>
|
||
<h4><a class="header" href="#stack-only-data-copy" id="stack-only-data-copy">Stack-Only Data: Copy</a></h4>
|
||
<p>There’s another wrinkle we haven’t talked about yet. This code using integers,
|
||
part of which was shown in Listing 4-2, works and is valid:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
let y = x;
|
||
|
||
println!("x = {}, y = {}", x, y);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>But this code seems to contradict what we just learned: we don’t have a call to
|
||
<code>clone</code>, but <code>x</code> is still valid and wasn’t moved into <code>y</code>.</p>
|
||
<p>The reason is that types such as integers that have a known size at compile
|
||
time are stored entirely on the stack, so copies of the actual values are quick
|
||
to make. That means there’s no reason we would want to prevent <code>x</code> from being
|
||
valid after we create the variable <code>y</code>. In other words, there’s no difference
|
||
between deep and shallow copying here, so calling <code>clone</code> wouldn’t do anything
|
||
different from the usual shallow copying and we can leave it out.</p>
|
||
<p>Rust has a special annotation called the <code>Copy</code> trait that we can place on
|
||
types like integers that are stored on the stack (we’ll talk more about traits
|
||
in Chapter 10). If a type has the <code>Copy</code> trait, an older variable is still
|
||
usable after assignment. Rust won’t let us annotate a type with the <code>Copy</code>
|
||
trait if the type, or any of its parts, has implemented the <code>Drop</code> trait. If
|
||
the type needs something special to happen when the value goes out of scope and
|
||
we add the <code>Copy</code> annotation to that type, we’ll get a compile-time error. To
|
||
learn about how to add the <code>Copy</code> annotation to your type, see <a href="appendix-03-derivable-traits.html">“Derivable
|
||
Traits”</a><!-- ignore --> in Appendix C.</p>
|
||
<p>So what types are <code>Copy</code>? You can check the documentation for the given type to
|
||
be sure, but as a general rule, any group of simple scalar values can be
|
||
<code>Copy</code>, and nothing that requires allocation or is some form of resource is
|
||
<code>Copy</code>. Here are some of the types that are <code>Copy</code>:</p>
|
||
<ul>
|
||
<li>All the integer types, such as <code>u32</code>.</li>
|
||
<li>The Boolean type, <code>bool</code>, with values <code>true</code> and <code>false</code>.</li>
|
||
<li>All the floating point types, such as <code>f64</code>.</li>
|
||
<li>The character type, <code>char</code>.</li>
|
||
<li>Tuples, if they only contain types that are also <code>Copy</code>. For example,
|
||
<code>(i32, i32)</code> is <code>Copy</code>, but <code>(i32, String)</code> is not.</li>
|
||
</ul>
|
||
<h3><a class="header" href="#ownership-and-functions" id="ownership-and-functions">Ownership and Functions</a></h3>
|
||
<p>The semantics for passing a value to a function are similar to those for
|
||
assigning a value to a variable. Passing a variable to a function will move or
|
||
copy, just as assignment does. Listing 4-3 has an example with some annotations
|
||
showing where variables go into and out of scope.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let s = String::from("hello"); // s comes into scope
|
||
|
||
takes_ownership(s); // s's value moves into the function...
|
||
// ... and so is no longer valid here
|
||
|
||
let x = 5; // x comes into scope
|
||
|
||
makes_copy(x); // x would move into the function,
|
||
// but i32 is Copy, so it’s okay to still
|
||
// use x afterward
|
||
|
||
} // Here, x goes out of scope, then s. But because s's value was moved, nothing
|
||
// special happens.
|
||
|
||
fn takes_ownership(some_string: String) { // some_string comes into scope
|
||
println!("{}", some_string);
|
||
} // Here, some_string goes out of scope and `drop` is called. The backing
|
||
// memory is freed.
|
||
|
||
fn makes_copy(some_integer: i32) { // some_integer comes into scope
|
||
println!("{}", some_integer);
|
||
} // Here, some_integer goes out of scope. Nothing special happens.
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 4-3: Functions with ownership and scope
|
||
annotated</span></p>
|
||
<p>If we tried to use <code>s</code> after the call to <code>takes_ownership</code>, Rust would throw a
|
||
compile-time error. These static checks protect us from mistakes. Try adding
|
||
code to <code>main</code> that uses <code>s</code> and <code>x</code> to see where you can use them and where
|
||
the ownership rules prevent you from doing so.</p>
|
||
<h3><a class="header" href="#return-values-and-scope" id="return-values-and-scope">Return Values and Scope</a></h3>
|
||
<p>Returning values can also transfer ownership. Listing 4-4 is an example with
|
||
similar annotations to those in Listing 4-3.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let s1 = gives_ownership(); // gives_ownership moves its return
|
||
// value into s1
|
||
|
||
let s2 = String::from("hello"); // s2 comes into scope
|
||
|
||
let s3 = takes_and_gives_back(s2); // s2 is moved into
|
||
// takes_and_gives_back, which also
|
||
// moves its return value into s3
|
||
} // Here, s3 goes out of scope and is dropped. s2 goes out of scope but was
|
||
// moved, so nothing happens. s1 goes out of scope and is dropped.
|
||
|
||
fn gives_ownership() -> String { // gives_ownership will move its
|
||
// return value into the function
|
||
// that calls it
|
||
|
||
let some_string = String::from("hello"); // some_string comes into scope
|
||
|
||
some_string // some_string is returned and
|
||
// moves out to the calling
|
||
// function
|
||
}
|
||
|
||
// takes_and_gives_back will take a String and return one
|
||
fn takes_and_gives_back(a_string: String) -> String { // a_string comes into
|
||
// scope
|
||
|
||
a_string // a_string is returned and moves out to the calling function
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 4-4: Transferring ownership of return
|
||
values</span></p>
|
||
<p>The ownership of a variable follows the same pattern every time: assigning a
|
||
value to another variable moves it. When a variable that includes data on the
|
||
heap goes out of scope, the value will be cleaned up by <code>drop</code> unless the data
|
||
has been moved to be owned by another variable.</p>
|
||
<p>Taking ownership and then returning ownership with every function is a bit
|
||
tedious. What if we want to let a function use a value but not take ownership?
|
||
It’s quite annoying that anything we pass in also needs to be passed back if we
|
||
want to use it again, in addition to any data resulting from the body of the
|
||
function that we might want to return as well.</p>
|
||
<p>It’s possible to return multiple values using a tuple, as shown in Listing 4-5.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let s1 = String::from("hello");
|
||
|
||
let (s2, len) = calculate_length(s1);
|
||
|
||
println!("The length of '{}' is {}.", s2, len);
|
||
}
|
||
|
||
fn calculate_length(s: String) -> (String, usize) {
|
||
let length = s.len(); // len() returns the length of a String
|
||
|
||
(s, length)
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 4-5: Returning ownership of parameters</span></p>
|
||
<p>But this is too much ceremony and a lot of work for a concept that should be
|
||
common. Luckily for us, Rust has a feature for this concept, called
|
||
<em>references</em>.</p>
|
||
<h2><a class="header" href="#references-and-borrowing" id="references-and-borrowing">References and Borrowing</a></h2>
|
||
<p>The issue with the tuple code in Listing 4-5 is that we have to return the
|
||
<code>String</code> to the calling function so we can still use the <code>String</code> after the
|
||
call to <code>calculate_length</code>, because the <code>String</code> was moved into
|
||
<code>calculate_length</code>.</p>
|
||
<p>Here is how you would define and use a <code>calculate_length</code> function that has a
|
||
reference to an object as a parameter instead of taking ownership of the
|
||
value:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let s1 = String::from("hello");
|
||
|
||
let len = calculate_length(&s1);
|
||
|
||
println!("The length of '{}' is {}.", s1, len);
|
||
}
|
||
|
||
fn calculate_length(s: &String) -> usize {
|
||
s.len()
|
||
}
|
||
</code></pre></pre>
|
||
<p>First, notice that all the tuple code in the variable declaration and the
|
||
function return value is gone. Second, note that we pass <code>&s1</code> into
|
||
<code>calculate_length</code> and, in its definition, we take <code>&String</code> rather than
|
||
<code>String</code>.</p>
|
||
<p>These ampersands are <em>references</em>, and they allow you to refer to some value
|
||
without taking ownership of it. Figure 4-5 shows a diagram.</p>
|
||
<img alt="&String s pointing at String s1" src="img/trpl04-05.svg" class="center" />
|
||
<p><span class="caption">Figure 4-5: A diagram of <code>&String s</code> pointing at <code>String s1</code></span></p>
|
||
<blockquote>
|
||
<p>Note: The opposite of referencing by using <code>&</code> is <em>dereferencing</em>, which is
|
||
accomplished with the dereference operator, <code>*</code>. We’ll see some uses of the
|
||
dereference operator in Chapter 8 and discuss details of dereferencing in
|
||
Chapter 15.</p>
|
||
</blockquote>
|
||
<p>Let’s take a closer look at the function call here:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">fn calculate_length(s: &String) -> usize {
|
||
</span><span class="boring"> s.len()
|
||
</span><span class="boring">}
|
||
</span>let s1 = String::from("hello");
|
||
|
||
let len = calculate_length(&s1);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>&s1</code> syntax lets us create a reference that <em>refers</em> to the value of <code>s1</code>
|
||
but does not own it. Because it does not own it, the value it points to will
|
||
not be dropped when the reference goes out of scope.</p>
|
||
<p>Likewise, the signature of the function uses <code>&</code> to indicate that the type of
|
||
the parameter <code>s</code> is a reference. Let’s add some explanatory annotations:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn calculate_length(s: &String) -> usize { // s is a reference to a String
|
||
s.len()
|
||
} // Here, s goes out of scope. But because it does not have ownership of what
|
||
// it refers to, nothing happens.
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The scope in which the variable <code>s</code> is valid is the same as any function
|
||
parameter’s scope, but we don’t drop what the reference points to when it goes
|
||
out of scope because we don’t have ownership. When functions have references as
|
||
parameters instead of the actual values, we won’t need to return the values in
|
||
order to give back ownership, because we never had ownership.</p>
|
||
<p>We call having references as function parameters <em>borrowing</em>. As in real life,
|
||
if a person owns something, you can borrow it from them. When you’re done, you
|
||
have to give it back.</p>
|
||
<p>So what happens if we try to modify something we’re borrowing? Try the code in
|
||
Listing 4-6. Spoiler alert: it doesn’t work!</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let s = String::from("hello");
|
||
|
||
change(&s);
|
||
}
|
||
|
||
fn change(some_string: &String) {
|
||
some_string.push_str(", world");
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 4-6: Attempting to modify a borrowed value</span></p>
|
||
<p>Here’s the error:</p>
|
||
<pre><code class="language-text">error[E0596]: cannot borrow immutable borrowed content `*some_string` as mutable
|
||
--> error.rs:8:5
|
||
|
|
||
7 | fn change(some_string: &String) {
|
||
| ------- use `&mut String` here to make mutable
|
||
8 | some_string.push_str(", world");
|
||
| ^^^^^^^^^^^ cannot borrow as mutable
|
||
</code></pre>
|
||
<p>Just as variables are immutable by default, so are references. We’re not
|
||
allowed to modify something we have a reference to.</p>
|
||
<h3><a class="header" href="#mutable-references" id="mutable-references">Mutable References</a></h3>
|
||
<p>We can fix the error in the code from Listing 4-6 with just a small tweak:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let mut s = String::from("hello");
|
||
|
||
change(&mut s);
|
||
}
|
||
|
||
fn change(some_string: &mut String) {
|
||
some_string.push_str(", world");
|
||
}
|
||
</code></pre></pre>
|
||
<p>First, we had to change <code>s</code> to be <code>mut</code>. Then we had to create a mutable
|
||
reference with <code>&mut s</code> and accept a mutable reference with <code>some_string: &mut String</code>.</p>
|
||
<p>But mutable references have one big restriction: you can have only one mutable
|
||
reference to a particular piece of data in a particular scope. This code will
|
||
fail:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">let mut s = String::from("hello");
|
||
|
||
let r1 = &mut s;
|
||
let r2 = &mut s;
|
||
|
||
println!("{}, {}", r1, r2);
|
||
</code></pre>
|
||
<p>Here’s the error:</p>
|
||
<pre><code class="language-text">error[E0499]: cannot borrow `s` as mutable more than once at a time
|
||
--> src/main.rs:5:14
|
||
|
|
||
4 | let r1 = &mut s;
|
||
| ------ first mutable borrow occurs here
|
||
5 | let r2 = &mut s;
|
||
| ^^^^^^ second mutable borrow occurs here
|
||
6 |
|
||
7 | println!("{}, {}", r1, r2);
|
||
| -- first borrow later used here
|
||
</code></pre>
|
||
<p>This restriction allows for mutation but in a very controlled fashion. It’s
|
||
something that new Rustaceans struggle with, because most languages let you
|
||
mutate whenever you’d like.</p>
|
||
<p>The benefit of having this restriction is that Rust can prevent data races at
|
||
compile time. A <em>data race</em> is similar to a race condition and happens when
|
||
these three behaviors occur:</p>
|
||
<ul>
|
||
<li>Two or more pointers access the same data at the same time.</li>
|
||
<li>At least one of the pointers is being used to write to the data.</li>
|
||
<li>There’s no mechanism being used to synchronize access to the data.</li>
|
||
</ul>
|
||
<p>Data races cause undefined behavior and can be difficult to diagnose and fix
|
||
when you’re trying to track them down at runtime; Rust prevents this problem
|
||
from happening because it won’t even compile code with data races!</p>
|
||
<p>As always, we can use curly brackets to create a new scope, allowing for
|
||
multiple mutable references, just not <em>simultaneous</em> ones:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s = String::from("hello");
|
||
|
||
{
|
||
let r1 = &mut s;
|
||
|
||
} // r1 goes out of scope here, so we can make a new reference with no problems.
|
||
|
||
let r2 = &mut s;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>A similar rule exists for combining mutable and immutable references. This code
|
||
results in an error:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let mut s = String::from("hello");
|
||
|
||
let r1 = &s; // no problem
|
||
let r2 = &s; // no problem
|
||
let r3 = &mut s; // BIG PROBLEM
|
||
|
||
println!("{}, {}, and {}", r1, r2, r3);
|
||
</code></pre>
|
||
<p>Here’s the error:</p>
|
||
<pre><code class="language-text">error[E0502]: cannot borrow `s` as mutable because it is also borrowed as immutable
|
||
--> src/main.rs:6:14
|
||
|
|
||
4 | let r1 = &s; // no problem
|
||
| -- immutable borrow occurs here
|
||
5 | let r2 = &s; // no problem
|
||
6 | let r3 = &mut s; // BIG PROBLEM
|
||
| ^^^^^^ mutable borrow occurs here
|
||
7 |
|
||
8 | println!("{}, {}, and {}", r1, r2, r3);
|
||
| -- immutable borrow later used here
|
||
</code></pre>
|
||
<p>Whew! We <em>also</em> cannot have a mutable reference while we have an immutable one.
|
||
Users of an immutable reference don’t expect the values to suddenly change out
|
||
from under them! However, multiple immutable references are okay because no one
|
||
who is just reading the data has the ability to affect anyone else’s reading of
|
||
the data.</p>
|
||
<p>Note that a reference’s scope starts from where it is introduced and continues
|
||
through the last time that reference is used. For instance, this code will
|
||
compile because the last usage of the immutable references occurs before the
|
||
mutable reference is introduced:</p>
|
||
<!-- This example is being ignored because there's a bug in rustdoc making the
|
||
edition2018 not work. The bug is currently fixed in nightly, so when we update
|
||
the book to >= 1.35, `ignore` can be removed from this example. -->
|
||
<pre><code class="language-rust edition2018 ignore">let mut s = String::from("hello");
|
||
|
||
let r1 = &s; // no problem
|
||
let r2 = &s; // no problem
|
||
println!("{} and {}", r1, r2);
|
||
// r1 and r2 are no longer used after this point
|
||
|
||
let r3 = &mut s; // no problem
|
||
println!("{}", r3);
|
||
</code></pre>
|
||
<p>The scopes of the immutable references <code>r1</code> and <code>r2</code> end after the <code>println!</code>
|
||
where they are last used, which is before the mutable reference <code>r3</code> is
|
||
created. These scopes don’t overlap, so this code is allowed.</p>
|
||
<p>Even though borrowing errors may be frustrating at times, remember that it’s
|
||
the Rust compiler pointing out a potential bug early (at compile time rather
|
||
than at runtime) and showing you exactly where the problem is. Then you don’t
|
||
have to track down why your data isn’t what you thought it was.</p>
|
||
<h3><a class="header" href="#dangling-references" id="dangling-references">Dangling References</a></h3>
|
||
<p>In languages with pointers, it’s easy to erroneously create a <em>dangling
|
||
pointer</em>, a pointer that references a location in memory that may have been
|
||
given to someone else, by freeing some memory while preserving a pointer to
|
||
that memory. In Rust, by contrast, the compiler guarantees that references will
|
||
never be dangling references: if you have a reference to some data, the
|
||
compiler will ensure that the data will not go out of scope before the
|
||
reference to the data does.</p>
|
||
<p>Let’s try to create a dangling reference, which Rust will prevent with a
|
||
compile-time error:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let reference_to_nothing = dangle();
|
||
}
|
||
|
||
fn dangle() -> &String {
|
||
let s = String::from("hello");
|
||
|
||
&s
|
||
}
|
||
</code></pre>
|
||
<p>Here’s the error:</p>
|
||
<pre><code class="language-text">error[E0106]: missing lifetime specifier
|
||
--> main.rs:5:16
|
||
|
|
||
5 | fn dangle() -> &String {
|
||
| ^ expected lifetime parameter
|
||
|
|
||
= help: this function's return type contains a borrowed value, but there is
|
||
no value for it to be borrowed from
|
||
= help: consider giving it a 'static lifetime
|
||
</code></pre>
|
||
<p>This error message refers to a feature we haven’t covered yet: lifetimes. We’ll
|
||
discuss lifetimes in detail in Chapter 10. But, if you disregard the parts
|
||
about lifetimes, the message does contain the key to why this code is a problem:</p>
|
||
<pre><code class="language-text">this function's return type contains a borrowed value, but there is no value
|
||
for it to be borrowed from.
|
||
</code></pre>
|
||
<p>Let’s take a closer look at exactly what’s happening at each stage of our
|
||
<code>dangle</code> code:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn dangle() -> &String { // dangle returns a reference to a String
|
||
|
||
let s = String::from("hello"); // s is a new String
|
||
|
||
&s // we return a reference to the String, s
|
||
} // Here, s goes out of scope, and is dropped. Its memory goes away.
|
||
// Danger!
|
||
</code></pre>
|
||
<p>Because <code>s</code> is created inside <code>dangle</code>, when the code of <code>dangle</code> is finished,
|
||
<code>s</code> will be deallocated. But we tried to return a reference to it. That means
|
||
this reference would be pointing to an invalid <code>String</code>. That’s no good! Rust
|
||
won’t let us do this.</p>
|
||
<p>The solution here is to return the <code>String</code> directly:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn no_dangle() -> String {
|
||
let s = String::from("hello");
|
||
|
||
s
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This works without any problems. Ownership is moved out, and nothing is
|
||
deallocated.</p>
|
||
<h3><a class="header" href="#the-rules-of-references" id="the-rules-of-references">The Rules of References</a></h3>
|
||
<p>Let’s recap what we’ve discussed about references:</p>
|
||
<ul>
|
||
<li>At any given time, you can have <em>either</em> one mutable reference <em>or</em> any
|
||
number of immutable references.</li>
|
||
<li>References must always be valid.</li>
|
||
</ul>
|
||
<p>Next, we’ll look at a different kind of reference: slices.</p>
|
||
<h2><a class="header" href="#the-slice-type" id="the-slice-type">The Slice Type</a></h2>
|
||
<p>Another data type that does not have ownership is the <em>slice</em>. Slices let you
|
||
reference a contiguous sequence of elements in a collection rather than the
|
||
whole collection.</p>
|
||
<p>Here’s a small programming problem: write a function that takes a string and
|
||
returns the first word it finds in that string. If the function doesn’t find a
|
||
space in the string, the whole string must be one word, so the entire string
|
||
should be returned.</p>
|
||
<p>Let’s think about the signature of this function:</p>
|
||
<pre><code class="language-rust ignore">fn first_word(s: &String) -> ?
|
||
</code></pre>
|
||
<p>This function, <code>first_word</code>, has a <code>&String</code> as a parameter. We don’t want
|
||
ownership, so this is fine. But what should we return? We don’t really have a
|
||
way to talk about <em>part</em> of a string. However, we could return the index of the
|
||
end of the word. Let’s try that, as shown in Listing 4-7.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn first_word(s: &String) -> usize {
|
||
let bytes = s.as_bytes();
|
||
|
||
for (i, &item) in bytes.iter().enumerate() {
|
||
if item == b' ' {
|
||
return i;
|
||
}
|
||
}
|
||
|
||
s.len()
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 4-7: The <code>first_word</code> function that returns a
|
||
byte index value into the <code>String</code> parameter</span></p>
|
||
<p>Because we need to go through the <code>String</code> element by element and check whether
|
||
a value is a space, we’ll convert our <code>String</code> to an array of bytes using the
|
||
<code>as_bytes</code> method:</p>
|
||
<pre><code class="language-rust ignore">let bytes = s.as_bytes();
|
||
</code></pre>
|
||
<p>Next, we create an iterator over the array of bytes using the <code>iter</code> method:</p>
|
||
<pre><code class="language-rust ignore">for (i, &item) in bytes.iter().enumerate() {
|
||
</code></pre>
|
||
<p>We’ll discuss iterators in more detail in Chapter 13. For now, know that <code>iter</code>
|
||
is a method that returns each element in a collection and that <code>enumerate</code>
|
||
wraps the result of <code>iter</code> and returns each element as part of a tuple instead.
|
||
The first element of the tuple returned from <code>enumerate</code> is the index, and the
|
||
second element is a reference to the element. This is a bit more convenient
|
||
than calculating the index ourselves.</p>
|
||
<p>Because the <code>enumerate</code> method returns a tuple, we can use patterns to
|
||
destructure that tuple, just like everywhere else in Rust. So in the <code>for</code>
|
||
loop, we specify a pattern that has <code>i</code> for the index in the tuple and <code>&item</code>
|
||
for the single byte in the tuple. Because we get a reference to the element
|
||
from <code>.iter().enumerate()</code>, we use <code>&</code> in the pattern.</p>
|
||
<p>Inside the <code>for</code> loop, we search for the byte that represents the space by
|
||
using the byte literal syntax. If we find a space, we return the position.
|
||
Otherwise, we return the length of the string by using <code>s.len()</code>:</p>
|
||
<pre><code class="language-rust ignore"> if item == b' ' {
|
||
return i;
|
||
}
|
||
}
|
||
|
||
s.len()
|
||
</code></pre>
|
||
<p>We now have a way to find out the index of the end of the first word in the
|
||
string, but there’s a problem. We’re returning a <code>usize</code> on its own, but it’s
|
||
only a meaningful number in the context of the <code>&String</code>. In other words,
|
||
because it’s a separate value from the <code>String</code>, there’s no guarantee that it
|
||
will still be valid in the future. Consider the program in Listing 4-8 that
|
||
uses the <code>first_word</code> function from Listing 4-7.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn first_word(s: &String) -> usize {
|
||
</span><span class="boring"> let bytes = s.as_bytes();
|
||
</span><span class="boring">
|
||
</span><span class="boring"> for (i, &item) in bytes.iter().enumerate() {
|
||
</span><span class="boring"> if item == b' ' {
|
||
</span><span class="boring"> return i;
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span><span class="boring"> s.len()
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let mut s = String::from("hello world");
|
||
|
||
let word = first_word(&s); // word will get the value 5
|
||
|
||
s.clear(); // this empties the String, making it equal to ""
|
||
|
||
// word still has the value 5 here, but there's no more string that
|
||
// we could meaningfully use the value 5 with. word is now totally invalid!
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 4-8: Storing the result from calling the
|
||
<code>first_word</code> function and then changing the <code>String</code> contents</span></p>
|
||
<p>This program compiles without any errors and would also do so if we used <code>word</code>
|
||
after calling <code>s.clear()</code>. Because <code>word</code> isn’t connected to the state of <code>s</code>
|
||
at all, <code>word</code> still contains the value <code>5</code>. We could use that value <code>5</code> with
|
||
the variable <code>s</code> to try to extract the first word out, but this would be a bug
|
||
because the contents of <code>s</code> have changed since we saved <code>5</code> in <code>word</code>.</p>
|
||
<p>Having to worry about the index in <code>word</code> getting out of sync with the data in
|
||
<code>s</code> is tedious and error prone! Managing these indices is even more brittle if
|
||
we write a <code>second_word</code> function. Its signature would have to look like this:</p>
|
||
<pre><code class="language-rust ignore">fn second_word(s: &String) -> (usize, usize) {
|
||
</code></pre>
|
||
<p>Now we’re tracking a starting <em>and</em> an ending index, and we have even more
|
||
values that were calculated from data in a particular state but aren’t tied to
|
||
that state at all. We now have three unrelated variables floating around that
|
||
need to be kept in sync.</p>
|
||
<p>Luckily, Rust has a solution to this problem: string slices.</p>
|
||
<h3><a class="header" href="#string-slices" id="string-slices">String Slices</a></h3>
|
||
<p>A <em>string slice</em> is a reference to part of a <code>String</code>, and it looks like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("hello world");
|
||
|
||
let hello = &s[0..5];
|
||
let world = &s[6..11];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This is similar to taking a reference to the whole <code>String</code> but with the extra
|
||
<code>[0..5]</code> bit. Rather than a reference to the entire <code>String</code>, it’s a reference
|
||
to a portion of the <code>String</code>.</p>
|
||
<p>We can create slices using a range within brackets by specifying
|
||
<code>[starting_index..ending_index]</code>, where <code>starting_index</code> is the first position
|
||
in the slice and <code>ending_index</code> is one more than the last position in the
|
||
slice. Internally, the slice data structure stores the starting position and
|
||
the length of the slice, which corresponds to <code>ending_index</code> minus
|
||
<code>starting_index</code>. So in the case of <code>let world = &s[6..11];</code>, <code>world</code> would be
|
||
a slice that contains a pointer to the 7th byte of <code>s</code> with a length value of 5.</p>
|
||
<p>Figure 4-6 shows this in a diagram.</p>
|
||
<img alt="world containing a pointer to the 6th byte of String s and a length 5" src="img/trpl04-06.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 4-6: String slice referring to part of a
|
||
<code>String</code></span></p>
|
||
<p>With Rust’s <code>..</code> range syntax, if you want to start at the first index (zero),
|
||
you can drop the value before the two periods. In other words, these are equal:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("hello");
|
||
|
||
let slice = &s[0..2];
|
||
let slice = &s[..2];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>By the same token, if your slice includes the last byte of the <code>String</code>, you
|
||
can drop the trailing number. That means these are equal:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("hello");
|
||
|
||
let len = s.len();
|
||
|
||
let slice = &s[3..len];
|
||
let slice = &s[3..];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>You can also drop both values to take a slice of the entire string. So these
|
||
are equal:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("hello");
|
||
|
||
let len = s.len();
|
||
|
||
let slice = &s[0..len];
|
||
let slice = &s[..];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<blockquote>
|
||
<p>Note: String slice range indices must occur at valid UTF-8 character
|
||
boundaries. If you attempt to create a string slice in the middle of a
|
||
multibyte character, your program will exit with an error. For the purposes
|
||
of introducing string slices, we are assuming ASCII only in this section; a
|
||
more thorough discussion of UTF-8 handling is in the <a href="ch08-02-strings.html#storing-utf-8-encoded-text-with-strings">“Storing UTF-8 Encoded
|
||
Text with Strings”</a><!-- ignore --> section of Chapter 8.</p>
|
||
</blockquote>
|
||
<p>With all this information in mind, let’s rewrite <code>first_word</code> to return a
|
||
slice. The type that signifies “string slice” is written as <code>&str</code>:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn first_word(s: &String) -> &str {
|
||
let bytes = s.as_bytes();
|
||
|
||
for (i, &item) in bytes.iter().enumerate() {
|
||
if item == b' ' {
|
||
return &s[0..i];
|
||
}
|
||
}
|
||
|
||
&s[..]
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We get the index for the end of the word in the same way as we did in Listing
|
||
4-7, by looking for the first occurrence of a space. When we find a space, we
|
||
return a string slice using the start of the string and the index of the space
|
||
as the starting and ending indices.</p>
|
||
<p>Now when we call <code>first_word</code>, we get back a single value that is tied to the
|
||
underlying data. The value is made up of a reference to the starting point of
|
||
the slice and the number of elements in the slice.</p>
|
||
<p>Returning a slice would also work for a <code>second_word</code> function:</p>
|
||
<pre><code class="language-rust ignore">fn second_word(s: &String) -> &str {
|
||
</code></pre>
|
||
<p>We now have a straightforward API that’s much harder to mess up, because the
|
||
compiler will ensure the references into the <code>String</code> remain valid. Remember
|
||
the bug in the program in Listing 4-8, when we got the index to the end of the
|
||
first word but then cleared the string so our index was invalid? That code was
|
||
logically incorrect but didn’t show any immediate errors. The problems would
|
||
show up later if we kept trying to use the first word index with an emptied
|
||
string. Slices make this bug impossible and let us know we have a problem with
|
||
our code much sooner. Using the slice version of <code>first_word</code> will throw a
|
||
compile-time error:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let mut s = String::from("hello world");
|
||
|
||
let word = first_word(&s);
|
||
|
||
s.clear(); // error!
|
||
|
||
println!("the first word is: {}", word);
|
||
}
|
||
</code></pre>
|
||
<p>Here’s the compiler error:</p>
|
||
<pre><code class="language-text">error[E0502]: cannot borrow `s` as mutable because it is also borrowed as immutable
|
||
--> src/main.rs:18:5
|
||
|
|
||
16 | let word = first_word(&s);
|
||
| -- immutable borrow occurs here
|
||
17 |
|
||
18 | s.clear(); // error!
|
||
| ^^^^^^^^^ mutable borrow occurs here
|
||
19 |
|
||
20 | println!("the first word is: {}", word);
|
||
| ---- immutable borrow later used here
|
||
</code></pre>
|
||
<p>Recall from the borrowing rules that if we have an immutable reference to
|
||
something, we cannot also take a mutable reference. Because <code>clear</code> needs to
|
||
truncate the <code>String</code>, it needs to get a mutable reference. Rust disallows
|
||
this, and compilation fails. Not only has Rust made our API easier to use, but
|
||
it has also eliminated an entire class of errors at compile time!</p>
|
||
<h4><a class="header" href="#string-literals-are-slices" id="string-literals-are-slices">String Literals Are Slices</a></h4>
|
||
<p>Recall that we talked about string literals being stored inside the binary. Now
|
||
that we know about slices, we can properly understand string literals:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = "Hello, world!";
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The type of <code>s</code> here is <code>&str</code>: it’s a slice pointing to that specific point of
|
||
the binary. This is also why string literals are immutable; <code>&str</code> is an
|
||
immutable reference.</p>
|
||
<h4><a class="header" href="#string-slices-as-parameters" id="string-slices-as-parameters">String Slices as Parameters</a></h4>
|
||
<p>Knowing that you can take slices of literals and <code>String</code> values leads us to
|
||
one more improvement on <code>first_word</code>, and that’s its signature:</p>
|
||
<pre><code class="language-rust ignore">fn first_word(s: &String) -> &str {
|
||
</code></pre>
|
||
<p>A more experienced Rustacean would write the signature shown in Listing 4-9
|
||
instead because it allows us to use the same function on both <code>&String</code> values
|
||
and <code>&str</code> values.</p>
|
||
<pre><code class="language-rust ignore">fn first_word(s: &str) -> &str {
|
||
</code></pre>
|
||
<p><span class="caption">Listing 4-9: Improving the <code>first_word</code> function by using
|
||
a string slice for the type of the <code>s</code> parameter</span></p>
|
||
<p>If we have a string slice, we can pass that directly. If we have a <code>String</code>, we
|
||
can pass a slice of the entire <code>String</code>. Defining a function to take a string
|
||
slice instead of a reference to a <code>String</code> makes our API more general and useful
|
||
without losing any functionality:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn first_word(s: &str) -> &str {
|
||
</span><span class="boring"> let bytes = s.as_bytes();
|
||
</span><span class="boring">
|
||
</span><span class="boring"> for (i, &item) in bytes.iter().enumerate() {
|
||
</span><span class="boring"> if item == b' ' {
|
||
</span><span class="boring"> return &s[0..i];
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span><span class="boring"> &s[..]
|
||
</span><span class="boring">}
|
||
</span>fn main() {
|
||
let my_string = String::from("hello world");
|
||
|
||
// first_word works on slices of `String`s
|
||
let word = first_word(&my_string[..]);
|
||
|
||
let my_string_literal = "hello world";
|
||
|
||
// first_word works on slices of string literals
|
||
let word = first_word(&my_string_literal[..]);
|
||
|
||
// Because string literals *are* string slices already,
|
||
// this works too, without the slice syntax!
|
||
let word = first_word(my_string_literal);
|
||
}
|
||
</code></pre></pre>
|
||
<h3><a class="header" href="#other-slices" id="other-slices">Other Slices</a></h3>
|
||
<p>String slices, as you might imagine, are specific to strings. But there’s a
|
||
more general slice type, too. Consider this array:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let a = [1, 2, 3, 4, 5];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Just as we might want to refer to a part of a string, we might want to refer
|
||
to part of an array. We’d do so like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let a = [1, 2, 3, 4, 5];
|
||
|
||
let slice = &a[1..3];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This slice has the type <code>&[i32]</code>. It works the same way as string slices do, by
|
||
storing a reference to the first element and a length. You’ll use this kind of
|
||
slice for all sorts of other collections. We’ll discuss these collections in
|
||
detail when we talk about vectors in Chapter 8.</p>
|
||
<h2><a class="header" href="#summary-3" id="summary-3">Summary</a></h2>
|
||
<p>The concepts of ownership, borrowing, and slices ensure memory safety in Rust
|
||
programs at compile time. The Rust language gives you control over your memory
|
||
usage in the same way as other systems programming languages, but having the
|
||
owner of data automatically clean up that data when the owner goes out of scope
|
||
means you don’t have to write and debug extra code to get this control.</p>
|
||
<p>Ownership affects how lots of other parts of Rust work, so we’ll talk about
|
||
these concepts further throughout the rest of the book. Let’s move on to
|
||
Chapter 5 and look at grouping pieces of data together in a <code>struct</code>.</p>
|
||
<h1><a class="header" href="#using-structs-to-structure-related-data" id="using-structs-to-structure-related-data">Using Structs to Structure Related Data</a></h1>
|
||
<p>A <em>struct</em>, or <em>structure</em>, is a custom data type that lets you name and
|
||
package together multiple related values that make up a meaningful group. If
|
||
you’re familiar with an object-oriented language, a <em>struct</em> is like an
|
||
object’s data attributes. In this chapter, we’ll compare and contrast tuples
|
||
with structs, demonstrate how to use structs, and discuss how to define methods
|
||
and associated functions to specify behavior associated with a struct’s data.
|
||
Structs and enums (discussed in Chapter 6) are the building blocks for creating
|
||
new types in your program’s domain to take full advantage of Rust’s compile
|
||
time type checking.</p>
|
||
<h2><a class="header" href="#defining-and-instantiating-structs" id="defining-and-instantiating-structs">Defining and Instantiating Structs</a></h2>
|
||
<p>Structs are similar to tuples, which were discussed in Chapter 3. Like tuples,
|
||
the pieces of a struct can be different types. Unlike with tuples, you’ll name
|
||
each piece of data so it’s clear what the values mean. As a result of these
|
||
names, structs are more flexible than tuples: you don’t have to rely on the
|
||
order of the data to specify or access the values of an instance.</p>
|
||
<p>To define a struct, we enter the keyword <code>struct</code> and name the entire struct. A
|
||
struct’s name should describe the significance of the pieces of data being
|
||
grouped together. Then, inside curly brackets, we define the names and types of
|
||
the pieces of data, which we call <em>fields</em>. For example, Listing 5-1 shows a
|
||
struct that stores information about a user account.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct User {
|
||
username: String,
|
||
email: String,
|
||
sign_in_count: u64,
|
||
active: bool,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-1: A <code>User</code> struct definition</span></p>
|
||
<p>To use a struct after we’ve defined it, we create an <em>instance</em> of that struct
|
||
by specifying concrete values for each of the fields. We create an instance by
|
||
stating the name of the struct and then add curly brackets containing <code>key: value</code> pairs, where the keys are the names of the fields and the values are the
|
||
data we want to store in those fields. We don’t have to specify the fields in
|
||
the same order in which we declared them in the struct. In other words, the
|
||
struct definition is like a general template for the type, and instances fill
|
||
in that template with particular data to create values of the type. For
|
||
example, we can declare a particular user as shown in Listing 5-2.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>let user1 = User {
|
||
email: String::from("someone@example.com"),
|
||
username: String::from("someusername123"),
|
||
active: true,
|
||
sign_in_count: 1,
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-2: Creating an instance of the <code>User</code>
|
||
struct</span></p>
|
||
<p>To get a specific value from a struct, we can use dot notation. If we wanted
|
||
just this user’s email address, we could use <code>user1.email</code> wherever we wanted
|
||
to use this value. If the instance is mutable, we can change a value by using
|
||
the dot notation and assigning into a particular field. Listing 5-3 shows how
|
||
to change the value in the <code>email</code> field of a mutable <code>User</code> instance.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>let mut user1 = User {
|
||
email: String::from("someone@example.com"),
|
||
username: String::from("someusername123"),
|
||
active: true,
|
||
sign_in_count: 1,
|
||
};
|
||
|
||
user1.email = String::from("anotheremail@example.com");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-3: Changing the value in the <code>email</code> field of a
|
||
<code>User</code> instance</span></p>
|
||
<p>Note that the entire instance must be mutable; Rust doesn’t allow us to mark
|
||
only certain fields as mutable. As with any expression, we can construct a new
|
||
instance of the struct as the last expression in the function body to
|
||
implicitly return that new instance.</p>
|
||
<p>Listing 5-4 shows a <code>build_user</code> function that returns a <code>User</code> instance with
|
||
the given email and username. The <code>active</code> field gets the value of <code>true</code>, and
|
||
the <code>sign_in_count</code> gets a value of <code>1</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn build_user(email: String, username: String) -> User {
|
||
User {
|
||
email: email,
|
||
username: username,
|
||
active: true,
|
||
sign_in_count: 1,
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-4: A <code>build_user</code> function that takes an email
|
||
and username and returns a <code>User</code> instance</span></p>
|
||
<p>It makes sense to name the function parameters with the same name as the struct
|
||
fields, but having to repeat the <code>email</code> and <code>username</code> field names and
|
||
variables is a bit tedious. If the struct had more fields, repeating each name
|
||
would get even more annoying. Luckily, there’s a convenient shorthand!</p>
|
||
<h3><a class="header" href="#using-the-field-init-shorthand-when-variables-and-fields-have-the-same-name" id="using-the-field-init-shorthand-when-variables-and-fields-have-the-same-name">Using the Field Init Shorthand when Variables and Fields Have the Same Name</a></h3>
|
||
<p>Because the parameter names and the struct field names are exactly the same in
|
||
Listing 5-4, we can use the <em>field init shorthand</em> syntax to rewrite
|
||
<code>build_user</code> so that it behaves exactly the same but doesn’t have the
|
||
repetition of <code>email</code> and <code>username</code>, as shown in Listing 5-5.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn build_user(email: String, username: String) -> User {
|
||
User {
|
||
email,
|
||
username,
|
||
active: true,
|
||
sign_in_count: 1,
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-5: A <code>build_user</code> function that uses field init
|
||
shorthand because the <code>email</code> and <code>username</code> parameters have the same name as
|
||
struct fields</span></p>
|
||
<p>Here, we’re creating a new instance of the <code>User</code> struct, which has a field
|
||
named <code>email</code>. We want to set the <code>email</code> field’s value to the value in the
|
||
<code>email</code> parameter of the <code>build_user</code> function. Because the <code>email</code> field and
|
||
the <code>email</code> parameter have the same name, we only need to write <code>email</code> rather
|
||
than <code>email: email</code>.</p>
|
||
<h3><a class="header" href="#creating-instances-from-other-instances-with-struct-update-syntax" id="creating-instances-from-other-instances-with-struct-update-syntax">Creating Instances From Other Instances With Struct Update Syntax</a></h3>
|
||
<p>It’s often useful to create a new instance of a struct that uses most of an old
|
||
instance’s values but changes some. You’ll do this using <em>struct update syntax</em>.</p>
|
||
<p>First, Listing 5-6 shows how we create a new <code>User</code> instance in <code>user2</code> without
|
||
the update syntax. We set new values for <code>email</code> and <code>username</code> but otherwise
|
||
use the same values from <code>user1</code> that we created in Listing 5-2.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">let user1 = User {
|
||
</span><span class="boring"> email: String::from("someone@example.com"),
|
||
</span><span class="boring"> username: String::from("someusername123"),
|
||
</span><span class="boring"> active: true,
|
||
</span><span class="boring"> sign_in_count: 1,
|
||
</span><span class="boring">};
|
||
</span><span class="boring">
|
||
</span>let user2 = User {
|
||
email: String::from("another@example.com"),
|
||
username: String::from("anotherusername567"),
|
||
active: user1.active,
|
||
sign_in_count: user1.sign_in_count,
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-6: Creating a new <code>User</code> instance using some of
|
||
the values from <code>user1</code></span></p>
|
||
<p>Using struct update syntax, we can achieve the same effect with less code, as
|
||
shown in Listing 5-7. The syntax <code>..</code> specifies that the remaining fields not
|
||
explicitly set should have the same value as the fields in the given instance.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct User {
|
||
</span><span class="boring"> username: String,
|
||
</span><span class="boring"> email: String,
|
||
</span><span class="boring"> sign_in_count: u64,
|
||
</span><span class="boring"> active: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">let user1 = User {
|
||
</span><span class="boring"> email: String::from("someone@example.com"),
|
||
</span><span class="boring"> username: String::from("someusername123"),
|
||
</span><span class="boring"> active: true,
|
||
</span><span class="boring"> sign_in_count: 1,
|
||
</span><span class="boring">};
|
||
</span><span class="boring">
|
||
</span>let user2 = User {
|
||
email: String::from("another@example.com"),
|
||
username: String::from("anotherusername567"),
|
||
..user1
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-7: Using struct update syntax to set new
|
||
<code>email</code> and <code>username</code> values for a <code>User</code> instance but use the rest of the
|
||
values from the fields of the instance in the <code>user1</code> variable</span></p>
|
||
<p>The code in Listing 5-7 also creates an instance in <code>user2</code> that has a
|
||
different value for <code>email</code> and <code>username</code> but has the same values for the
|
||
<code>active</code> and <code>sign_in_count</code> fields from <code>user1</code>.</p>
|
||
<h3><a class="header" href="#using-tuple-structs-without-named-fields-to-create-different-types" id="using-tuple-structs-without-named-fields-to-create-different-types">Using Tuple Structs without Named Fields to Create Different Types</a></h3>
|
||
<p>You can also define structs that look similar to tuples, called <em>tuple
|
||
structs</em>. Tuple structs have the added meaning the struct name provides but
|
||
don’t have names associated with their fields; rather, they just have the types
|
||
of the fields. Tuple structs are useful when you want to give the whole tuple a
|
||
name and make the tuple be a different type from other tuples, and naming each
|
||
field as in a regular struct would be verbose or redundant.</p>
|
||
<p>To define a tuple struct, start with the <code>struct</code> keyword and the struct name
|
||
followed by the types in the tuple. For example, here are definitions and
|
||
usages of two tuple structs named <code>Color</code> and <code>Point</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct Color(i32, i32, i32);
|
||
struct Point(i32, i32, i32);
|
||
|
||
let black = Color(0, 0, 0);
|
||
let origin = Point(0, 0, 0);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Note that the <code>black</code> and <code>origin</code> values are different types, because they’re
|
||
instances of different tuple structs. Each struct you define is its own type,
|
||
even though the fields within the struct have the same types. For example, a
|
||
function that takes a parameter of type <code>Color</code> cannot take a <code>Point</code> as an
|
||
argument, even though both types are made up of three <code>i32</code> values. Otherwise,
|
||
tuple struct instances behave like tuples: you can destructure them into their
|
||
individual pieces, you can use a <code>.</code> followed by the index to access an
|
||
individual value, and so on.</p>
|
||
<h3><a class="header" href="#unit-like-structs-without-any-fields" id="unit-like-structs-without-any-fields">Unit-Like Structs Without Any Fields</a></h3>
|
||
<p>You can also define structs that don’t have any fields! These are called
|
||
<em>unit-like structs</em> because they behave similarly to <code>()</code>, the unit type.
|
||
Unit-like structs can be useful in situations in which you need to implement a
|
||
trait on some type but don’t have any data that you want to store in the type
|
||
itself. We’ll discuss traits in Chapter 10.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#ownership-of-struct-data" id="ownership-of-struct-data">Ownership of Struct Data</a></h3>
|
||
<p>In the <code>User</code> struct definition in Listing 5-1, we used the owned <code>String</code>
|
||
type rather than the <code>&str</code> string slice type. This is a deliberate choice
|
||
because we want instances of this struct to own all of its data and for that
|
||
data to be valid for as long as the entire struct is valid.</p>
|
||
<p>It’s possible for structs to store references to data owned by something else,
|
||
but to do so requires the use of <em>lifetimes</em>, a Rust feature that we’ll
|
||
discuss in Chapter 10. Lifetimes ensure that the data referenced by a struct
|
||
is valid for as long as the struct is. Let’s say you try to store a reference
|
||
in a struct without specifying lifetimes, like this, which won’t work:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">struct User {
|
||
username: &str,
|
||
email: &str,
|
||
sign_in_count: u64,
|
||
active: bool,
|
||
}
|
||
|
||
fn main() {
|
||
let user1 = User {
|
||
email: "someone@example.com",
|
||
username: "someusername123",
|
||
active: true,
|
||
sign_in_count: 1,
|
||
};
|
||
}
|
||
</code></pre>
|
||
<p>The compiler will complain that it needs lifetime specifiers:</p>
|
||
<pre><code class="language-text">error[E0106]: missing lifetime specifier
|
||
-->
|
||
|
|
||
2 | username: &str,
|
||
| ^ expected lifetime parameter
|
||
|
||
error[E0106]: missing lifetime specifier
|
||
-->
|
||
|
|
||
3 | email: &str,
|
||
| ^ expected lifetime parameter
|
||
</code></pre>
|
||
<p>In Chapter 10, we’ll discuss how to fix these errors so you can store
|
||
references in structs, but for now, we’ll fix errors like these using owned
|
||
types like <code>String</code> instead of references like <code>&str</code>.</p>
|
||
</blockquote>
|
||
<h2><a class="header" href="#an-example-program-using-structs" id="an-example-program-using-structs">An Example Program Using Structs</a></h2>
|
||
<p>To understand when we might want to use structs, let’s write a program that
|
||
calculates the area of a rectangle. We’ll start with single variables, and then
|
||
refactor the program until we’re using structs instead.</p>
|
||
<p>Let’s make a new binary project with Cargo called <em>rectangles</em> that will take
|
||
the width and height of a rectangle specified in pixels and calculate the area
|
||
of the rectangle. Listing 5-8 shows a short program with one way of doing
|
||
exactly that in our project’s <em>src/main.rs</em>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let width1 = 30;
|
||
let height1 = 50;
|
||
|
||
println!(
|
||
"The area of the rectangle is {} square pixels.",
|
||
area(width1, height1)
|
||
);
|
||
}
|
||
|
||
fn area(width: u32, height: u32) -> u32 {
|
||
width * height
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 5-8: Calculating the area of a rectangle
|
||
specified by separate width and height variables</span></p>
|
||
<p>Now, run this program using <code>cargo run</code>:</p>
|
||
<pre><code class="language-text">The area of the rectangle is 1500 square pixels.
|
||
</code></pre>
|
||
<p>Even though Listing 5-8 works and figures out the area of the rectangle by
|
||
calling the <code>area</code> function with each dimension, we can do better. The width
|
||
and the height are related to each other because together they describe one
|
||
rectangle.</p>
|
||
<p>The issue with this code is evident in the signature of <code>area</code>:</p>
|
||
<pre><code class="language-rust ignore">fn area(width: u32, height: u32) -> u32 {
|
||
</code></pre>
|
||
<p>The <code>area</code> function is supposed to calculate the area of one rectangle, but the
|
||
function we wrote has two parameters. The parameters are related, but that’s
|
||
not expressed anywhere in our program. It would be more readable and more
|
||
manageable to group width and height together. We’ve already discussed one way
|
||
we might do that in <a href="ch03-02-data-types.html#the-tuple-type">“The Tuple Type”</a><!-- ignore --> section
|
||
of Chapter 3: by using tuples.</p>
|
||
<h3><a class="header" href="#refactoring-with-tuples" id="refactoring-with-tuples">Refactoring with Tuples</a></h3>
|
||
<p>Listing 5-9 shows another version of our program that uses tuples.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let rect1 = (30, 50);
|
||
|
||
println!(
|
||
"The area of the rectangle is {} square pixels.",
|
||
area(rect1)
|
||
);
|
||
}
|
||
|
||
fn area(dimensions: (u32, u32)) -> u32 {
|
||
dimensions.0 * dimensions.1
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 5-9: Specifying the width and height of the
|
||
rectangle with a tuple</span></p>
|
||
<p>In one way, this program is better. Tuples let us add a bit of structure, and
|
||
we’re now passing just one argument. But in another way, this version is less
|
||
clear: tuples don’t name their elements, so our calculation has become more
|
||
confusing because we have to index into the parts of the tuple.</p>
|
||
<p>It doesn’t matter if we mix up width and height for the area calculation, but
|
||
if we want to draw the rectangle on the screen, it would matter! We would have
|
||
to keep in mind that <code>width</code> is the tuple index <code>0</code> and <code>height</code> is the tuple
|
||
index <code>1</code>. If someone else worked on this code, they would have to figure this
|
||
out and keep it in mind as well. It would be easy to forget or mix up these
|
||
values and cause errors, because we haven’t conveyed the meaning of our data in
|
||
our code.</p>
|
||
<h3><a class="header" href="#refactoring-with-structs-adding-more-meaning" id="refactoring-with-structs-adding-more-meaning">Refactoring with Structs: Adding More Meaning</a></h3>
|
||
<p>We use structs to add meaning by labeling the data. We can transform the tuple
|
||
we’re using into a data type with a name for the whole as well as names for the
|
||
parts, as shown in Listing 5-10.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Rectangle {
|
||
width: u32,
|
||
height: u32,
|
||
}
|
||
|
||
fn main() {
|
||
let rect1 = Rectangle { width: 30, height: 50 };
|
||
|
||
println!(
|
||
"The area of the rectangle is {} square pixels.",
|
||
area(&rect1)
|
||
);
|
||
}
|
||
|
||
fn area(rectangle: &Rectangle) -> u32 {
|
||
rectangle.width * rectangle.height
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 5-10: Defining a <code>Rectangle</code> struct</span></p>
|
||
<p>Here we’ve defined a struct and named it <code>Rectangle</code>. Inside the curly
|
||
brackets, we defined the fields as <code>width</code> and <code>height</code>, both of which have
|
||
type <code>u32</code>. Then in <code>main</code>, we created a particular instance of <code>Rectangle</code>
|
||
that has a width of 30 and a height of 50.</p>
|
||
<p>Our <code>area</code> function is now defined with one parameter, which we’ve named
|
||
<code>rectangle</code>, whose type is an immutable borrow of a struct <code>Rectangle</code>
|
||
instance. As mentioned in Chapter 4, we want to borrow the struct rather than
|
||
take ownership of it. This way, <code>main</code> retains its ownership and can continue
|
||
using <code>rect1</code>, which is the reason we use the <code>&</code> in the function signature and
|
||
where we call the function.</p>
|
||
<p>The <code>area</code> function accesses the <code>width</code> and <code>height</code> fields of the <code>Rectangle</code>
|
||
instance. Our function signature for <code>area</code> now says exactly what we mean:
|
||
calculate the area of <code>Rectangle</code>, using its <code>width</code> and <code>height</code> fields. This
|
||
conveys that the width and height are related to each other, and it gives
|
||
descriptive names to the values rather than using the tuple index values of <code>0</code>
|
||
and <code>1</code>. This is a win for clarity.</p>
|
||
<h3><a class="header" href="#adding-useful-functionality-with-derived-traits" id="adding-useful-functionality-with-derived-traits">Adding Useful Functionality with Derived Traits</a></h3>
|
||
<p>It’d be nice to be able to print an instance of <code>Rectangle</code> while we’re
|
||
debugging our program and see the values for all its fields. Listing 5-11 tries
|
||
using the <code>println!</code> macro as we have used in previous chapters. This won’t
|
||
work, however.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">struct Rectangle {
|
||
width: u32,
|
||
height: u32,
|
||
}
|
||
|
||
fn main() {
|
||
let rect1 = Rectangle { width: 30, height: 50 };
|
||
|
||
println!("rect1 is {}", rect1);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 5-11: Attempting to print a <code>Rectangle</code>
|
||
instance</span></p>
|
||
<p>When we compile this code, we get an error with this core message:</p>
|
||
<pre><code class="language-text">error[E0277]: `Rectangle` doesn't implement `std::fmt::Display`
|
||
</code></pre>
|
||
<p>The <code>println!</code> macro can do many kinds of formatting, and by default, the curly
|
||
brackets tell <code>println!</code> to use formatting known as <code>Display</code>: output intended
|
||
for direct end user consumption. The primitive types we’ve seen so far
|
||
implement <code>Display</code> by default, because there’s only one way you’d want to show
|
||
a <code>1</code> or any other primitive type to a user. But with structs, the way
|
||
<code>println!</code> should format the output is less clear because there are more
|
||
display possibilities: Do you want commas or not? Do you want to print the
|
||
curly brackets? Should all the fields be shown? Due to this ambiguity, Rust
|
||
doesn’t try to guess what we want, and structs don’t have a provided
|
||
implementation of <code>Display</code>.</p>
|
||
<p>If we continue reading the errors, we’ll find this helpful note:</p>
|
||
<pre><code class="language-text">= help: the trait `std::fmt::Display` is not implemented for `Rectangle`
|
||
= note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead
|
||
</code></pre>
|
||
<p>Let’s try it! The <code>println!</code> macro call will now look like <code>println!("rect1 is {:?}", rect1);</code>. Putting the specifier <code>:?</code> inside the curly brackets tells
|
||
<code>println!</code> we want to use an output format called <code>Debug</code>. The <code>Debug</code> trait
|
||
enables us to print our struct in a way that is useful for developers so we can
|
||
see its value while we’re debugging our code.</p>
|
||
<p>Compile the code with this change. Drat! We still get an error:</p>
|
||
<pre><code class="language-text">error[E0277]: `Rectangle` doesn't implement `std::fmt::Debug`
|
||
</code></pre>
|
||
<p>But again, the compiler gives us a helpful note:</p>
|
||
<pre><code class="language-text">= help: the trait `std::fmt::Debug` is not implemented for `Rectangle`
|
||
= note: add `#[derive(Debug)]` or manually implement `std::fmt::Debug`
|
||
</code></pre>
|
||
<p>Rust <em>does</em> include functionality to print out debugging information, but we
|
||
have to explicitly opt in to make that functionality available for our struct.
|
||
To do that, we add the annotation <code>#[derive(Debug)]</code> just before the struct
|
||
definition, as shown in Listing 5-12.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">#[derive(Debug)]
|
||
struct Rectangle {
|
||
width: u32,
|
||
height: u32,
|
||
}
|
||
|
||
fn main() {
|
||
let rect1 = Rectangle { width: 30, height: 50 };
|
||
|
||
println!("rect1 is {:?}", rect1);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 5-12: Adding the annotation to derive the <code>Debug</code>
|
||
trait and printing the <code>Rectangle</code> instance using debug formatting</span></p>
|
||
<p>Now when we run the program, we won’t get any errors, and we’ll see the
|
||
following output:</p>
|
||
<pre><code class="language-text">rect1 is Rectangle { width: 30, height: 50 }
|
||
</code></pre>
|
||
<p>Nice! It’s not the prettiest output, but it shows the values of all the fields
|
||
for this instance, which would definitely help during debugging. When we have
|
||
larger structs, it’s useful to have output that’s a bit easier to read; in
|
||
those cases, we can use <code>{:#?}</code> instead of <code>{:?}</code> in the <code>println!</code> string.
|
||
When we use the <code>{:#?}</code> style in the example, the output will look like this:</p>
|
||
<pre><code class="language-text">rect1 is Rectangle {
|
||
width: 30,
|
||
height: 50
|
||
}
|
||
</code></pre>
|
||
<p>Rust has provided a number of traits for us to use with the <code>derive</code> annotation
|
||
that can add useful behavior to our custom types. Those traits and their
|
||
behaviors are listed in Appendix C. We’ll cover how to implement these traits
|
||
with custom behavior as well as how to create your own traits in Chapter 10.</p>
|
||
<p>Our <code>area</code> function is very specific: it only computes the area of rectangles.
|
||
It would be helpful to tie this behavior more closely to our <code>Rectangle</code>
|
||
struct, because it won’t work with any other type. Let’s look at how we can
|
||
continue to refactor this code by turning the <code>area</code> function into an <code>area</code>
|
||
<em>method</em> defined on our <code>Rectangle</code> type.</p>
|
||
<h2><a class="header" href="#method-syntax" id="method-syntax">Method Syntax</a></h2>
|
||
<p><em>Methods</em> are similar to functions: they’re declared with the <code>fn</code> keyword and
|
||
their name, they can have parameters and a return value, and they contain some
|
||
code that is run when they’re called from somewhere else. However, methods are
|
||
different from functions in that they’re defined within the context of a struct
|
||
(or an enum or a trait object, which we cover in Chapters 6 and 17,
|
||
respectively), and their first parameter is always <code>self</code>, which represents the
|
||
instance of the struct the method is being called on.</p>
|
||
<h3><a class="header" href="#defining-methods" id="defining-methods">Defining Methods</a></h3>
|
||
<p>Let’s change the <code>area</code> function that has a <code>Rectangle</code> instance as a parameter
|
||
and instead make an <code>area</code> method defined on the <code>Rectangle</code> struct, as shown
|
||
in Listing 5-13.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">#[derive(Debug)]
|
||
struct Rectangle {
|
||
width: u32,
|
||
height: u32,
|
||
}
|
||
|
||
impl Rectangle {
|
||
fn area(&self) -> u32 {
|
||
self.width * self.height
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let rect1 = Rectangle { width: 30, height: 50 };
|
||
|
||
println!(
|
||
"The area of the rectangle is {} square pixels.",
|
||
rect1.area()
|
||
);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 5-13: Defining an <code>area</code> method on the
|
||
<code>Rectangle</code> struct</span></p>
|
||
<p>To define the function within the context of <code>Rectangle</code>, we start an <code>impl</code>
|
||
(implementation) block. Then we move the <code>area</code> function within the <code>impl</code>
|
||
curly brackets and change the first (and in this case, only) parameter to be
|
||
<code>self</code> in the signature and everywhere within the body. In <code>main</code>, where we
|
||
called the <code>area</code> function and passed <code>rect1</code> as an argument, we can instead
|
||
use <em>method syntax</em> to call the <code>area</code> method on our <code>Rectangle</code> instance.
|
||
The method syntax goes after an instance: we add a dot followed by the method
|
||
name, parentheses, and any arguments.</p>
|
||
<p>In the signature for <code>area</code>, we use <code>&self</code> instead of <code>rectangle: &Rectangle</code>
|
||
because Rust knows the type of <code>self</code> is <code>Rectangle</code> due to this method’s being
|
||
inside the <code>impl Rectangle</code> context. Note that we still need to use the <code>&</code>
|
||
before <code>self</code>, just as we did in <code>&Rectangle</code>. Methods can take ownership of
|
||
<code>self</code>, borrow <code>self</code> immutably as we’ve done here, or borrow <code>self</code> mutably,
|
||
just as they can any other parameter.</p>
|
||
<p>We’ve chosen <code>&self</code> here for the same reason we used <code>&Rectangle</code> in the
|
||
function version: we don’t want to take ownership, and we just want to read the
|
||
data in the struct, not write to it. If we wanted to change the instance that
|
||
we’ve called the method on as part of what the method does, we’d use <code>&mut self</code> as the first parameter. Having a method that takes ownership of the
|
||
instance by using just <code>self</code> as the first parameter is rare; this technique is
|
||
usually used when the method transforms <code>self</code> into something else and you want
|
||
to prevent the caller from using the original instance after the transformation.</p>
|
||
<p>The main benefit of using methods instead of functions, in addition to using
|
||
method syntax and not having to repeat the type of <code>self</code> in every method’s
|
||
signature, is for organization. We’ve put all the things we can do with an
|
||
instance of a type in one <code>impl</code> block rather than making future users of our
|
||
code search for capabilities of <code>Rectangle</code> in various places in the library we
|
||
provide.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#wheres-the---operator" id="wheres-the---operator">Where’s the <code>-></code> Operator?</a></h3>
|
||
<p>In C and C++, two different operators are used for calling methods: you use
|
||
<code>.</code> if you’re calling a method on the object directly and <code>-></code> if you’re
|
||
calling the method on a pointer to the object and need to dereference the
|
||
pointer first. In other words, if <code>object</code> is a pointer,
|
||
<code>object->something()</code> is similar to <code>(*object).something()</code>.</p>
|
||
<p>Rust doesn’t have an equivalent to the <code>-></code> operator; instead, Rust has a
|
||
feature called <em>automatic referencing and dereferencing</em>. Calling methods is
|
||
one of the few places in Rust that has this behavior.</p>
|
||
<p>Here’s how it works: when you call a method with <code>object.something()</code>, Rust
|
||
automatically adds in <code>&</code>, <code>&mut</code>, or <code>*</code> so <code>object</code> matches the signature of
|
||
the method. In other words, the following are the same:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug,Copy,Clone)]
|
||
</span><span class="boring">struct Point {
|
||
</span><span class="boring"> x: f64,
|
||
</span><span class="boring"> y: f64,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Point {
|
||
</span><span class="boring"> fn distance(&self, other: &Point) -> f64 {
|
||
</span><span class="boring"> let x_squared = f64::powi(other.x - self.x, 2);
|
||
</span><span class="boring"> let y_squared = f64::powi(other.y - self.y, 2);
|
||
</span><span class="boring">
|
||
</span><span class="boring"> f64::sqrt(x_squared + y_squared)
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">let p1 = Point { x: 0.0, y: 0.0 };
|
||
</span><span class="boring">let p2 = Point { x: 5.0, y: 6.5 };
|
||
</span>p1.distance(&p2);
|
||
(&p1).distance(&p2);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The first one looks much cleaner. This automatic referencing behavior works
|
||
because methods have a clear receiver—the type of <code>self</code>. Given the receiver
|
||
and name of a method, Rust can figure out definitively whether the method is
|
||
reading (<code>&self</code>), mutating (<code>&mut self</code>), or consuming (<code>self</code>). The fact
|
||
that Rust makes borrowing implicit for method receivers is a big part of
|
||
making ownership ergonomic in practice.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#methods-with-more-parameters" id="methods-with-more-parameters">Methods with More Parameters</a></h3>
|
||
<p>Let’s practice using methods by implementing a second method on the <code>Rectangle</code>
|
||
struct. This time, we want an instance of <code>Rectangle</code> to take another instance
|
||
of <code>Rectangle</code> and return <code>true</code> if the second <code>Rectangle</code> can fit completely
|
||
within <code>self</code>; otherwise it should return <code>false</code>. That is, we want to be able
|
||
to write the program shown in Listing 5-14, once we’ve defined the <code>can_hold</code>
|
||
method.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let rect1 = Rectangle { width: 30, height: 50 };
|
||
let rect2 = Rectangle { width: 10, height: 40 };
|
||
let rect3 = Rectangle { width: 60, height: 45 };
|
||
|
||
println!("Can rect1 hold rect2? {}", rect1.can_hold(&rect2));
|
||
println!("Can rect1 hold rect3? {}", rect1.can_hold(&rect3));
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 5-14: Using the as-yet-unwritten <code>can_hold</code>
|
||
method</span></p>
|
||
<p>And the expected output would look like the following, because both dimensions
|
||
of <code>rect2</code> are smaller than the dimensions of <code>rect1</code> but <code>rect3</code> is wider than
|
||
<code>rect1</code>:</p>
|
||
<pre><code class="language-text">Can rect1 hold rect2? true
|
||
Can rect1 hold rect3? false
|
||
</code></pre>
|
||
<p>We know we want to define a method, so it will be within the <code>impl Rectangle</code>
|
||
block. The method name will be <code>can_hold</code>, and it will take an immutable borrow
|
||
of another <code>Rectangle</code> as a parameter. We can tell what the type of the
|
||
parameter will be by looking at the code that calls the method:
|
||
<code>rect1.can_hold(&rect2)</code> passes in <code>&rect2</code>, which is an immutable borrow to
|
||
<code>rect2</code>, an instance of <code>Rectangle</code>. This makes sense because we only need to
|
||
read <code>rect2</code> (rather than write, which would mean we’d need a mutable borrow),
|
||
and we want <code>main</code> to retain ownership of <code>rect2</code> so we can use it again after
|
||
calling the <code>can_hold</code> method. The return value of <code>can_hold</code> will be a
|
||
Boolean, and the implementation will check whether the width and height of
|
||
<code>self</code> are both greater than the width and height of the other <code>Rectangle</code>,
|
||
respectively. Let’s add the new <code>can_hold</code> method to the <code>impl</code> block from
|
||
Listing 5-13, shown in Listing 5-15.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Rectangle {
|
||
</span><span class="boring"> width: u32,
|
||
</span><span class="boring"> height: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Rectangle {
|
||
fn area(&self) -> u32 {
|
||
self.width * self.height
|
||
}
|
||
|
||
fn can_hold(&self, other: &Rectangle) -> bool {
|
||
self.width > other.width && self.height > other.height
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-15: Implementing the <code>can_hold</code> method on
|
||
<code>Rectangle</code> that takes another <code>Rectangle</code> instance as a parameter</span></p>
|
||
<p>When we run this code with the <code>main</code> function in Listing 5-14, we’ll get our
|
||
desired output. Methods can take multiple parameters that we add to the
|
||
signature after the <code>self</code> parameter, and those parameters work just like
|
||
parameters in functions.</p>
|
||
<h3><a class="header" href="#associated-functions" id="associated-functions">Associated Functions</a></h3>
|
||
<p>Another useful feature of <code>impl</code> blocks is that we’re allowed to define
|
||
functions within <code>impl</code> blocks that <em>don’t</em> take <code>self</code> as a parameter. These
|
||
are called <em>associated functions</em> because they’re associated with the struct.
|
||
They’re still functions, not methods, because they don’t have an instance of
|
||
the struct to work with. You’ve already used the <code>String::from</code> associated
|
||
function.</p>
|
||
<p>Associated functions are often used for constructors that will return a new
|
||
instance of the struct. For example, we could provide an associated function
|
||
that would have one dimension parameter and use that as both width and height,
|
||
thus making it easier to create a square <code>Rectangle</code> rather than having to
|
||
specify the same value twice:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Rectangle {
|
||
</span><span class="boring"> width: u32,
|
||
</span><span class="boring"> height: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Rectangle {
|
||
fn square(size: u32) -> Rectangle {
|
||
Rectangle { width: size, height: size }
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>To call this associated function, we use the <code>::</code> syntax with the struct name;
|
||
<code>let sq = Rectangle::square(3);</code> is an example. This function is namespaced by
|
||
the struct: the <code>::</code> syntax is used for both associated functions and
|
||
namespaces created by modules. We’ll discuss modules in Chapter 7.</p>
|
||
<h3><a class="header" href="#multiple-impl-blocks" id="multiple-impl-blocks">Multiple <code>impl</code> Blocks</a></h3>
|
||
<p>Each struct is allowed to have multiple <code>impl</code> blocks. For example, Listing
|
||
5-15 is equivalent to the code shown in Listing 5-16, which has each method
|
||
in its own <code>impl</code> block.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Rectangle {
|
||
</span><span class="boring"> width: u32,
|
||
</span><span class="boring"> height: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Rectangle {
|
||
fn area(&self) -> u32 {
|
||
self.width * self.height
|
||
}
|
||
}
|
||
|
||
impl Rectangle {
|
||
fn can_hold(&self, other: &Rectangle) -> bool {
|
||
self.width > other.width && self.height > other.height
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 5-16: Rewriting Listing 5-15 using multiple <code>impl</code>
|
||
blocks</span></p>
|
||
<p>There’s no reason to separate these methods into multiple <code>impl</code> blocks here,
|
||
but this is valid syntax. We’ll see a case in which multiple <code>impl</code> blocks are
|
||
useful in Chapter 10, where we discuss generic types and traits.</p>
|
||
<h2><a class="header" href="#summary-4" id="summary-4">Summary</a></h2>
|
||
<p>Structs let you create custom types that are meaningful for your domain. By
|
||
using structs, you can keep associated pieces of data connected to each other
|
||
and name each piece to make your code clear. Methods let you specify the
|
||
behavior that instances of your structs have, and associated functions let you
|
||
namespace functionality that is particular to your struct without having an
|
||
instance available.</p>
|
||
<p>But structs aren’t the only way you can create custom types: let’s turn to
|
||
Rust’s enum feature to add another tool to your toolbox.</p>
|
||
<h1><a class="header" href="#enums-and-pattern-matching" id="enums-and-pattern-matching">Enums and Pattern Matching</a></h1>
|
||
<p>In this chapter we’ll look at <em>enumerations</em>, also referred to as <em>enums</em>.
|
||
Enums allow you to define a type by enumerating its possible <em>variants</em>. First,
|
||
we’ll define and use an enum to show how an enum can encode meaning along with
|
||
data. Next, we’ll explore a particularly useful enum, called <code>Option</code>, which
|
||
expresses that a value can be either something or nothing. Then we’ll look at
|
||
how pattern matching in the <code>match</code> expression makes it easy to run different
|
||
code for different values of an enum. Finally, we’ll cover how the <code>if let</code>
|
||
construct is another convenient and concise idiom available to you to handle
|
||
enums in your code.</p>
|
||
<p>Enums are a feature in many languages, but their capabilities differ in each
|
||
language. Rust’s enums are most similar to <em>algebraic data types</em> in functional
|
||
languages, such as F#, OCaml, and Haskell.</p>
|
||
<h2><a class="header" href="#defining-an-enum" id="defining-an-enum">Defining an Enum</a></h2>
|
||
<p>Let’s look at a situation we might want to express in code and see why enums
|
||
are useful and more appropriate than structs in this case. Say we need to work
|
||
with IP addresses. Currently, two major standards are used for IP addresses:
|
||
version four and version six. These are the only possibilities for an IP
|
||
address that our program will come across: we can <em>enumerate</em> all possible
|
||
variants, which is where enumeration gets its name.</p>
|
||
<p>Any IP address can be either a version four or a version six address, but not
|
||
both at the same time. That property of IP addresses makes the enum data
|
||
structure appropriate, because enum values can only be one of its variants.
|
||
Both version four and version six addresses are still fundamentally IP
|
||
addresses, so they should be treated as the same type when the code is handling
|
||
situations that apply to any kind of IP address.</p>
|
||
<p>We can express this concept in code by defining an <code>IpAddrKind</code> enumeration and
|
||
listing the possible kinds an IP address can be, <code>V4</code> and <code>V6</code>. These are the
|
||
variants of the enum:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum IpAddrKind {
|
||
V4,
|
||
V6,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><code>IpAddrKind</code> is now a custom data type that we can use elsewhere in our code.</p>
|
||
<h3><a class="header" href="#enum-values" id="enum-values">Enum Values</a></h3>
|
||
<p>We can create instances of each of the two variants of <code>IpAddrKind</code> like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">enum IpAddrKind {
|
||
</span><span class="boring"> V4,
|
||
</span><span class="boring"> V6,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>let four = IpAddrKind::V4;
|
||
let six = IpAddrKind::V6;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Note that the variants of the enum are namespaced under its identifier, and we
|
||
use a double colon to separate the two. The reason this is useful is that now
|
||
both values <code>IpAddrKind::V4</code> and <code>IpAddrKind::V6</code> are of the same type:
|
||
<code>IpAddrKind</code>. We can then, for instance, define a function that takes any
|
||
<code>IpAddrKind</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">enum IpAddrKind {
|
||
</span><span class="boring"> V4,
|
||
</span><span class="boring"> V6,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn route(ip_kind: IpAddrKind) { }
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>And we can call this function with either variant:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">enum IpAddrKind {
|
||
</span><span class="boring"> V4,
|
||
</span><span class="boring"> V6,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn route(ip_kind: IpAddrKind) { }
|
||
</span><span class="boring">
|
||
</span>route(IpAddrKind::V4);
|
||
route(IpAddrKind::V6);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Using enums has even more advantages. Thinking more about our IP address type,
|
||
at the moment we don’t have a way to store the actual IP address <em>data</em>; we
|
||
only know what <em>kind</em> it is. Given that you just learned about structs in
|
||
Chapter 5, you might tackle this problem as shown in Listing 6-1.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum IpAddrKind {
|
||
V4,
|
||
V6,
|
||
}
|
||
|
||
struct IpAddr {
|
||
kind: IpAddrKind,
|
||
address: String,
|
||
}
|
||
|
||
let home = IpAddr {
|
||
kind: IpAddrKind::V4,
|
||
address: String::from("127.0.0.1"),
|
||
};
|
||
|
||
let loopback = IpAddr {
|
||
kind: IpAddrKind::V6,
|
||
address: String::from("::1"),
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-1: Storing the data and <code>IpAddrKind</code> variant of
|
||
an IP address using a <code>struct</code></span></p>
|
||
<p>Here, we’ve defined a struct <code>IpAddr</code> that has two fields: a <code>kind</code> field that
|
||
is of type <code>IpAddrKind</code> (the enum we defined previously) and an <code>address</code> field
|
||
of type <code>String</code>. We have two instances of this struct. The first, <code>home</code>, has
|
||
the value <code>IpAddrKind::V4</code> as its <code>kind</code> with associated address data of
|
||
<code>127.0.0.1</code>. The second instance, <code>loopback</code>, has the other variant of
|
||
<code>IpAddrKind</code> as its <code>kind</code> value, <code>V6</code>, and has address <code>::1</code> associated with
|
||
it. We’ve used a struct to bundle the <code>kind</code> and <code>address</code> values together, so
|
||
now the variant is associated with the value.</p>
|
||
<p>We can represent the same concept in a more concise way using just an enum,
|
||
rather than an enum inside a struct, by putting data directly into each enum
|
||
variant. This new definition of the <code>IpAddr</code> enum says that both <code>V4</code> and <code>V6</code>
|
||
variants will have associated <code>String</code> values:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum IpAddr {
|
||
V4(String),
|
||
V6(String),
|
||
}
|
||
|
||
let home = IpAddr::V4(String::from("127.0.0.1"));
|
||
|
||
let loopback = IpAddr::V6(String::from("::1"));
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We attach data to each variant of the enum directly, so there is no need for an
|
||
extra struct.</p>
|
||
<p>There’s another advantage to using an enum rather than a struct: each variant
|
||
can have different types and amounts of associated data. Version four type IP
|
||
addresses will always have four numeric components that will have values
|
||
between 0 and 255. If we wanted to store <code>V4</code> addresses as four <code>u8</code> values but
|
||
still express <code>V6</code> addresses as one <code>String</code> value, we wouldn’t be able to with
|
||
a struct. Enums handle this case with ease:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum IpAddr {
|
||
V4(u8, u8, u8, u8),
|
||
V6(String),
|
||
}
|
||
|
||
let home = IpAddr::V4(127, 0, 0, 1);
|
||
|
||
let loopback = IpAddr::V6(String::from("::1"));
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We’ve shown several different ways to define data structures to store version
|
||
four and version six IP addresses. However, as it turns out, wanting to store
|
||
IP addresses and encode which kind they are is so common that <a href="../std/net/enum.IpAddr.html">the standard
|
||
library has a definition we can use!</a><!-- ignore --> Let’s look at how
|
||
the standard library defines <code>IpAddr</code>: it has the exact enum and variants that
|
||
we’ve defined and used, but it embeds the address data inside the variants in
|
||
the form of two different structs, which are defined differently for each
|
||
variant:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct Ipv4Addr {
|
||
// --snip--
|
||
}
|
||
|
||
struct Ipv6Addr {
|
||
// --snip--
|
||
}
|
||
|
||
enum IpAddr {
|
||
V4(Ipv4Addr),
|
||
V6(Ipv6Addr),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code illustrates that you can put any kind of data inside an enum variant:
|
||
strings, numeric types, or structs, for example. You can even include another
|
||
enum! Also, standard library types are often not much more complicated than
|
||
what you might come up with.</p>
|
||
<p>Note that even though the standard library contains a definition for <code>IpAddr</code>,
|
||
we can still create and use our own definition without conflict because we
|
||
haven’t brought the standard library’s definition into our scope. We’ll talk
|
||
more about bringing types into scope in Chapter 7.</p>
|
||
<p>Let’s look at another example of an enum in Listing 6-2: this one has a wide
|
||
variety of types embedded in its variants.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Message {
|
||
Quit,
|
||
Move { x: i32, y: i32 },
|
||
Write(String),
|
||
ChangeColor(i32, i32, i32),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-2: A <code>Message</code> enum whose variants each store
|
||
different amounts and types of values</span></p>
|
||
<p>This enum has four variants with different types:</p>
|
||
<ul>
|
||
<li><code>Quit</code> has no data associated with it at all.</li>
|
||
<li><code>Move</code> includes an anonymous struct inside it.</li>
|
||
<li><code>Write</code> includes a single <code>String</code>.</li>
|
||
<li><code>ChangeColor</code> includes three <code>i32</code> values.</li>
|
||
</ul>
|
||
<p>Defining an enum with variants such as the ones in Listing 6-2 is similar to
|
||
defining different kinds of struct definitions, except the enum doesn’t use the
|
||
<code>struct</code> keyword and all the variants are grouped together under the <code>Message</code>
|
||
type. The following structs could hold the same data that the preceding enum
|
||
variants hold:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct QuitMessage; // unit struct
|
||
struct MoveMessage {
|
||
x: i32,
|
||
y: i32,
|
||
}
|
||
struct WriteMessage(String); // tuple struct
|
||
struct ChangeColorMessage(i32, i32, i32); // tuple struct
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>But if we used the different structs, which each have their own type, we
|
||
couldn’t as easily define a function to take any of these kinds of messages as
|
||
we could with the <code>Message</code> enum defined in Listing 6-2, which is a single type.</p>
|
||
<p>There is one more similarity between enums and structs: just as we’re able to
|
||
define methods on structs using <code>impl</code>, we’re also able to define methods on
|
||
enums. Here’s a method named <code>call</code> that we could define on our <code>Message</code> enum:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">enum Message {
|
||
</span><span class="boring"> Quit,
|
||
</span><span class="boring"> Move { x: i32, y: i32 },
|
||
</span><span class="boring"> Write(String),
|
||
</span><span class="boring"> ChangeColor(i32, i32, i32),
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Message {
|
||
fn call(&self) {
|
||
// method body would be defined here
|
||
}
|
||
}
|
||
|
||
let m = Message::Write(String::from("hello"));
|
||
m.call();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The body of the method would use <code>self</code> to get the value that we called the
|
||
method on. In this example, we’ve created a variable <code>m</code> that has the value
|
||
<code>Message::Write(String::from("hello"))</code>, and that is what <code>self</code> will be in the
|
||
body of the <code>call</code> method when <code>m.call()</code> runs.</p>
|
||
<p>Let’s look at another enum in the standard library that is very common and
|
||
useful: <code>Option</code>.</p>
|
||
<h3><a class="header" href="#the-option-enum-and-its-advantages-over-null-values" id="the-option-enum-and-its-advantages-over-null-values">The <code>Option</code> Enum and Its Advantages Over Null Values</a></h3>
|
||
<p>In the previous section, we looked at how the <code>IpAddr</code> enum let us use Rust’s
|
||
type system to encode more information than just the data into our program.
|
||
This section explores a case study of <code>Option</code>, which is another enum defined
|
||
by the standard library. The <code>Option</code> type is used in many places because it
|
||
encodes the very common scenario in which a value could be something or it
|
||
could be nothing. Expressing this concept in terms of the type system means the
|
||
compiler can check whether you’ve handled all the cases you should be handling;
|
||
this functionality can prevent bugs that are extremely common in other
|
||
programming languages.</p>
|
||
<p>Programming language design is often thought of in terms of which features you
|
||
include, but the features you exclude are important too. Rust doesn’t have the
|
||
null feature that many other languages have. <em>Null</em> is a value that means there
|
||
is no value there. In languages with null, variables can always be in one of
|
||
two states: null or not-null.</p>
|
||
<p>In his 2009 presentation “Null References: The Billion Dollar Mistake,” Tony
|
||
Hoare, the inventor of null, has this to say:</p>
|
||
<blockquote>
|
||
<p>I call it my billion-dollar mistake. At that time, I was designing the first
|
||
comprehensive type system for references in an object-oriented language. My
|
||
goal was to ensure that all use of references should be absolutely safe, with
|
||
checking performed automatically by the compiler. But I couldn’t resist the
|
||
temptation to put in a null reference, simply because it was so easy to
|
||
implement. This has led to innumerable errors, vulnerabilities, and system
|
||
crashes, which have probably caused a billion dollars of pain and damage in
|
||
the last forty years.</p>
|
||
</blockquote>
|
||
<p>The problem with null values is that if you try to use a null value as a
|
||
not-null value, you’ll get an error of some kind. Because this null or not-null
|
||
property is pervasive, it’s extremely easy to make this kind of error.</p>
|
||
<p>However, the concept that null is trying to express is still a useful one: a
|
||
null is a value that is currently invalid or absent for some reason.</p>
|
||
<p>The problem isn’t really with the concept but with the particular
|
||
implementation. As such, Rust does not have nulls, but it does have an enum
|
||
that can encode the concept of a value being present or absent. This enum is
|
||
<code>Option<T></code>, and it is <a href="../std/option/enum.Option.html">defined by the standard library</a><!-- ignore -->
|
||
as follows:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Option<T> {
|
||
Some(T),
|
||
None,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>Option<T></code> enum is so useful that it’s even included in the prelude; you
|
||
don’t need to bring it into scope explicitly. In addition, so are its variants:
|
||
you can use <code>Some</code> and <code>None</code> directly without the <code>Option::</code> prefix. The
|
||
<code>Option<T></code> enum is still just a regular enum, and <code>Some(T)</code> and <code>None</code> are
|
||
still variants of type <code>Option<T></code>.</p>
|
||
<p>The <code><T></code> syntax is a feature of Rust we haven’t talked about yet. It’s a
|
||
generic type parameter, and we’ll cover generics in more detail in Chapter 10.
|
||
For now, all you need to know is that <code><T></code> means the <code>Some</code> variant of the
|
||
<code>Option</code> enum can hold one piece of data of any type. Here are some examples of
|
||
using <code>Option</code> values to hold number types and string types:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let some_number = Some(5);
|
||
let some_string = Some("a string");
|
||
|
||
let absent_number: Option<i32> = None;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>If we use <code>None</code> rather than <code>Some</code>, we need to tell Rust what type of
|
||
<code>Option<T></code> we have, because the compiler can’t infer the type that the <code>Some</code>
|
||
variant will hold by looking only at a <code>None</code> value.</p>
|
||
<p>When we have a <code>Some</code> value, we know that a value is present and the value is
|
||
held within the <code>Some</code>. When we have a <code>None</code> value, in some sense, it means
|
||
the same thing as null: we don’t have a valid value. So why is having
|
||
<code>Option<T></code> any better than having null?</p>
|
||
<p>In short, because <code>Option<T></code> and <code>T</code> (where <code>T</code> can be any type) are different
|
||
types, the compiler won’t let us use an <code>Option<T></code> value as if it were
|
||
definitely a valid value. For example, this code won’t compile because it’s
|
||
trying to add an <code>i8</code> to an <code>Option<i8></code>:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let x: i8 = 5;
|
||
let y: Option<i8> = Some(5);
|
||
|
||
let sum = x + y;
|
||
</code></pre>
|
||
<p>If we run this code, we get an error message like this:</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `i8: std::ops::Add<std::option::Option<i8>>` is
|
||
not satisfied
|
||
-->
|
||
|
|
||
5 | let sum = x + y;
|
||
| ^ no implementation for `i8 + std::option::Option<i8>`
|
||
|
|
||
</code></pre>
|
||
<p>Intense! In effect, this error message means that Rust doesn’t understand how
|
||
to add an <code>i8</code> and an <code>Option<i8></code>, because they’re different types. When we
|
||
have a value of a type like <code>i8</code> in Rust, the compiler will ensure that we
|
||
always have a valid value. We can proceed confidently without having to check
|
||
for null before using that value. Only when we have an <code>Option<i8></code> (or
|
||
whatever type of value we’re working with) do we have to worry about possibly
|
||
not having a value, and the compiler will make sure we handle that case before
|
||
using the value.</p>
|
||
<p>In other words, you have to convert an <code>Option<T></code> to a <code>T</code> before you can
|
||
perform <code>T</code> operations with it. Generally, this helps catch one of the most
|
||
common issues with null: assuming that something isn’t null when it actually
|
||
is.</p>
|
||
<p>Not having to worry about incorrectly assuming a not-null value helps you to be
|
||
more confident in your code. In order to have a value that can possibly be
|
||
null, you must explicitly opt in by making the type of that value <code>Option<T></code>.
|
||
Then, when you use that value, you are required to explicitly handle the case
|
||
when the value is null. Everywhere that a value has a type that isn’t an
|
||
<code>Option<T></code>, you <em>can</em> safely assume that the value isn’t null. This was a
|
||
deliberate design decision for Rust to limit null’s pervasiveness and increase
|
||
the safety of Rust code.</p>
|
||
<p>So, how do you get the <code>T</code> value out of a <code>Some</code> variant when you have a value
|
||
of type <code>Option<T></code> so you can use that value? The <code>Option<T></code> enum has a large
|
||
number of methods that are useful in a variety of situations; you can check
|
||
them out in <a href="../std/option/enum.Option.html">its documentation</a><!-- ignore -->. Becoming familiar with
|
||
the methods on <code>Option<T></code> will be extremely useful in your journey with Rust.</p>
|
||
<p>In general, in order to use an <code>Option<T></code> value, you want to have code that
|
||
will handle each variant. You want some code that will run only when you have a
|
||
<code>Some(T)</code> value, and this code is allowed to use the inner <code>T</code>. You want some
|
||
other code to run if you have a <code>None</code> value, and that code doesn’t have a <code>T</code>
|
||
value available. The <code>match</code> expression is a control flow construct that does
|
||
just this when used with enums: it will run different code depending on which
|
||
variant of the enum it has, and that code can use the data inside the matching
|
||
value.</p>
|
||
<h2><a class="header" href="#the-match-control-flow-operator" id="the-match-control-flow-operator">The <code>match</code> Control Flow Operator</a></h2>
|
||
<p>Rust has an extremely powerful control flow operator called <code>match</code> that allows
|
||
you to compare a value against a series of patterns and then execute code based
|
||
on which pattern matches. Patterns can be made up of literal values, variable
|
||
names, wildcards, and many other things; Chapter 18 covers all the different
|
||
kinds of patterns and what they do. The power of <code>match</code> comes from the
|
||
expressiveness of the patterns and the fact that the compiler confirms that all
|
||
possible cases are handled.</p>
|
||
<p>Think of a <code>match</code> expression as being like a coin-sorting machine: coins slide
|
||
down a track with variously sized holes along it, and each coin falls through
|
||
the first hole it encounters that it fits into. In the same way, values go
|
||
through each pattern in a <code>match</code>, and at the first pattern the value “fits,”
|
||
the value falls into the associated code block to be used during execution.</p>
|
||
<p>Because we just mentioned coins, let’s use them as an example using <code>match</code>! We
|
||
can write a function that can take an unknown United States coin and, in a
|
||
similar way as the counting machine, determine which coin it is and return its
|
||
value in cents, as shown here in Listing 6-3.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Coin {
|
||
Penny,
|
||
Nickel,
|
||
Dime,
|
||
Quarter,
|
||
}
|
||
|
||
fn value_in_cents(coin: Coin) -> u8 {
|
||
match coin {
|
||
Coin::Penny => 1,
|
||
Coin::Nickel => 5,
|
||
Coin::Dime => 10,
|
||
Coin::Quarter => 25,
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-3: An enum and a <code>match</code> expression that has
|
||
the variants of the enum as its patterns</span></p>
|
||
<p>Let’s break down the <code>match</code> in the <code>value_in_cents</code> function. First, we list
|
||
the <code>match</code> keyword followed by an expression, which in this case is the value
|
||
<code>coin</code>. This seems very similar to an expression used with <code>if</code>, but there’s a
|
||
big difference: with <code>if</code>, the expression needs to return a Boolean value, but
|
||
here, it can be any type. The type of <code>coin</code> in this example is the <code>Coin</code> enum
|
||
that we defined on line 1.</p>
|
||
<p>Next are the <code>match</code> arms. An arm has two parts: a pattern and some code. The
|
||
first arm here has a pattern that is the value <code>Coin::Penny</code> and then the <code>=></code>
|
||
operator that separates the pattern and the code to run. The code in this case
|
||
is just the value <code>1</code>. Each arm is separated from the next with a comma.</p>
|
||
<p>When the <code>match</code> expression executes, it compares the resulting value against
|
||
the pattern of each arm, in order. If a pattern matches the value, the code
|
||
associated with that pattern is executed. If that pattern doesn’t match the
|
||
value, execution continues to the next arm, much as in a coin-sorting machine.
|
||
We can have as many arms as we need: in Listing 6-3, our <code>match</code> has four arms.</p>
|
||
<p>The code associated with each arm is an expression, and the resulting value of
|
||
the expression in the matching arm is the value that gets returned for the
|
||
entire <code>match</code> expression.</p>
|
||
<p>Curly brackets typically aren’t used if the match arm code is short, as it is
|
||
in Listing 6-3 where each arm just returns a value. If you want to run multiple
|
||
lines of code in a match arm, you can use curly brackets. For example, the
|
||
following code would print “Lucky penny!” every time the method was called with
|
||
a <code>Coin::Penny</code> but would still return the last value of the block, <code>1</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">enum Coin {
|
||
</span><span class="boring"> Penny,
|
||
</span><span class="boring"> Nickel,
|
||
</span><span class="boring"> Dime,
|
||
</span><span class="boring"> Quarter,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn value_in_cents(coin: Coin) -> u8 {
|
||
match coin {
|
||
Coin::Penny => {
|
||
println!("Lucky penny!");
|
||
1
|
||
},
|
||
Coin::Nickel => 5,
|
||
Coin::Dime => 10,
|
||
Coin::Quarter => 25,
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<h3><a class="header" href="#patterns-that-bind-to-values" id="patterns-that-bind-to-values">Patterns that Bind to Values</a></h3>
|
||
<p>Another useful feature of match arms is that they can bind to the parts of the
|
||
values that match the pattern. This is how we can extract values out of enum
|
||
variants.</p>
|
||
<p>As an example, let’s change one of our enum variants to hold data inside it.
|
||
From 1999 through 2008, the United States minted quarters with different
|
||
designs for each of the 50 states on one side. No other coins got state
|
||
designs, so only quarters have this extra value. We can add this information to
|
||
our <code>enum</code> by changing the <code>Quarter</code> variant to include a <code>UsState</code> value stored
|
||
inside it, which we’ve done here in Listing 6-4.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[derive(Debug)] // so we can inspect the state in a minute
|
||
enum UsState {
|
||
Alabama,
|
||
Alaska,
|
||
// --snip--
|
||
}
|
||
|
||
enum Coin {
|
||
Penny,
|
||
Nickel,
|
||
Dime,
|
||
Quarter(UsState),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-4: A <code>Coin</code> enum in which the <code>Quarter</code> variant
|
||
also holds a <code>UsState</code> value</span></p>
|
||
<p>Let’s imagine that a friend of ours is trying to collect all 50 state quarters.
|
||
While we sort our loose change by coin type, we’ll also call out the name of
|
||
the state associated with each quarter so if it’s one our friend doesn’t have,
|
||
they can add it to their collection.</p>
|
||
<p>In the match expression for this code, we add a variable called <code>state</code> to the
|
||
pattern that matches values of the variant <code>Coin::Quarter</code>. When a
|
||
<code>Coin::Quarter</code> matches, the <code>state</code> variable will bind to the value of that
|
||
quarter’s state. Then we can use <code>state</code> in the code for that arm, like so:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">enum UsState {
|
||
</span><span class="boring"> Alabama,
|
||
</span><span class="boring"> Alaska,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">enum Coin {
|
||
</span><span class="boring"> Penny,
|
||
</span><span class="boring"> Nickel,
|
||
</span><span class="boring"> Dime,
|
||
</span><span class="boring"> Quarter(UsState),
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn value_in_cents(coin: Coin) -> u8 {
|
||
match coin {
|
||
Coin::Penny => 1,
|
||
Coin::Nickel => 5,
|
||
Coin::Dime => 10,
|
||
Coin::Quarter(state) => {
|
||
println!("State quarter from {:?}!", state);
|
||
25
|
||
},
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>If we were to call <code>value_in_cents(Coin::Quarter(UsState::Alaska))</code>, <code>coin</code>
|
||
would be <code>Coin::Quarter(UsState::Alaska)</code>. When we compare that value with each
|
||
of the match arms, none of them match until we reach <code>Coin::Quarter(state)</code>. At
|
||
that point, the binding for <code>state</code> will be the value <code>UsState::Alaska</code>. We can
|
||
then use that binding in the <code>println!</code> expression, thus getting the inner
|
||
state value out of the <code>Coin</code> enum variant for <code>Quarter</code>.</p>
|
||
<h3><a class="header" href="#matching-with-optiont" id="matching-with-optiont">Matching with <code>Option<T></code></a></h3>
|
||
<p>In the previous section, we wanted to get the inner <code>T</code> value out of the <code>Some</code>
|
||
case when using <code>Option<T></code>; we can also handle <code>Option<T></code> using <code>match</code> as we
|
||
did with the <code>Coin</code> enum! Instead of comparing coins, we’ll compare the
|
||
variants of <code>Option<T></code>, but the way that the <code>match</code> expression works remains
|
||
the same.</p>
|
||
<p>Let’s say we want to write a function that takes an <code>Option<i32></code> and, if
|
||
there’s a value inside, adds 1 to that value. If there isn’t a value inside,
|
||
the function should return the <code>None</code> value and not attempt to perform any
|
||
operations.</p>
|
||
<p>This function is very easy to write, thanks to <code>match</code>, and will look like
|
||
Listing 6-5.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn plus_one(x: Option<i32>) -> Option<i32> {
|
||
match x {
|
||
None => None,
|
||
Some(i) => Some(i + 1),
|
||
}
|
||
}
|
||
|
||
let five = Some(5);
|
||
let six = plus_one(five);
|
||
let none = plus_one(None);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-5: A function that uses a <code>match</code> expression on
|
||
an <code>Option<i32></code></span></p>
|
||
<p>Let’s examine the first execution of <code>plus_one</code> in more detail. When we call
|
||
<code>plus_one(five)</code>, the variable <code>x</code> in the body of <code>plus_one</code> will have the
|
||
value <code>Some(5)</code>. We then compare that against each match arm.</p>
|
||
<pre><code class="language-rust ignore">None => None,
|
||
</code></pre>
|
||
<p>The <code>Some(5)</code> value doesn’t match the pattern <code>None</code>, so we continue to the
|
||
next arm.</p>
|
||
<pre><code class="language-rust ignore">Some(i) => Some(i + 1),
|
||
</code></pre>
|
||
<p>Does <code>Some(5)</code> match <code>Some(i)</code>? Why yes it does! We have the same variant. The
|
||
<code>i</code> binds to the value contained in <code>Some</code>, so <code>i</code> takes the value <code>5</code>. The
|
||
code in the match arm is then executed, so we add 1 to the value of <code>i</code> and
|
||
create a new <code>Some</code> value with our total <code>6</code> inside.</p>
|
||
<p>Now let’s consider the second call of <code>plus_one</code> in Listing 6-5, where <code>x</code> is
|
||
<code>None</code>. We enter the <code>match</code> and compare to the first arm.</p>
|
||
<pre><code class="language-rust ignore">None => None,
|
||
</code></pre>
|
||
<p>It matches! There’s no value to add to, so the program stops and returns the
|
||
<code>None</code> value on the right side of <code>=></code>. Because the first arm matched, no other
|
||
arms are compared.</p>
|
||
<p>Combining <code>match</code> and enums is useful in many situations. You’ll see this
|
||
pattern a lot in Rust code: <code>match</code> against an enum, bind a variable to the
|
||
data inside, and then execute code based on it. It’s a bit tricky at first, but
|
||
once you get used to it, you’ll wish you had it in all languages. It’s
|
||
consistently a user favorite.</p>
|
||
<h3><a class="header" href="#matches-are-exhaustive" id="matches-are-exhaustive">Matches Are Exhaustive</a></h3>
|
||
<p>There’s one other aspect of <code>match</code> we need to discuss. Consider this version
|
||
of our <code>plus_one</code> function that has a bug and won’t compile:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn plus_one(x: Option<i32>) -> Option<i32> {
|
||
match x {
|
||
Some(i) => Some(i + 1),
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>We didn’t handle the <code>None</code> case, so this code will cause a bug. Luckily, it’s
|
||
a bug Rust knows how to catch. If we try to compile this code, we’ll get this
|
||
error:</p>
|
||
<pre><code class="language-text">error[E0004]: non-exhaustive patterns: `None` not covered
|
||
-->
|
||
|
|
||
6 | match x {
|
||
| ^ pattern `None` not covered
|
||
</code></pre>
|
||
<p>Rust knows that we didn’t cover every possible case and even knows which
|
||
pattern we forgot! Matches in Rust are <em>exhaustive</em>: we must exhaust every last
|
||
possibility in order for the code to be valid. Especially in the case of
|
||
<code>Option<T></code>, when Rust prevents us from forgetting to explicitly handle the
|
||
<code>None</code> case, it protects us from assuming that we have a value when we might
|
||
have null, thus making the billion-dollar mistake discussed earlier.</p>
|
||
<h3><a class="header" href="#the-_-placeholder" id="the-_-placeholder">The <code>_</code> Placeholder</a></h3>
|
||
<p>Rust also has a pattern we can use when we don’t want to list all possible
|
||
values. For example, a <code>u8</code> can have valid values of 0 through 255. If we only
|
||
care about the values 1, 3, 5, and 7, we don’t want to have to list out 0, 2,
|
||
4, 6, 8, 9 all the way up to 255. Fortunately, we don’t have to: we can use the
|
||
special pattern <code>_</code> instead:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let some_u8_value = 0u8;
|
||
match some_u8_value {
|
||
1 => println!("one"),
|
||
3 => println!("three"),
|
||
5 => println!("five"),
|
||
7 => println!("seven"),
|
||
_ => (),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>_</code> pattern will match any value. By putting it after our other arms, the
|
||
<code>_</code> will match all the possible cases that aren’t specified before it. The <code>()</code>
|
||
is just the unit value, so nothing will happen in the <code>_</code> case. As a result, we
|
||
can say that we want to do nothing for all the possible values that we don’t
|
||
list before the <code>_</code> placeholder.</p>
|
||
<p>However, the <code>match</code> expression can be a bit wordy in a situation in which we
|
||
care about only <em>one</em> of the cases. For this situation, Rust provides <code>if let</code>.</p>
|
||
<h2><a class="header" href="#concise-control-flow-with-if-let" id="concise-control-flow-with-if-let">Concise Control Flow with <code>if let</code></a></h2>
|
||
<p>The <code>if let</code> syntax lets you combine <code>if</code> and <code>let</code> into a less verbose way to
|
||
handle values that match one pattern while ignoring the rest. Consider the
|
||
program in Listing 6-6 that matches on an <code>Option<u8></code> value but only wants to
|
||
execute code if the value is 3.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let some_u8_value = Some(0u8);
|
||
match some_u8_value {
|
||
Some(3) => println!("three"),
|
||
_ => (),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 6-6: A <code>match</code> that only cares about executing
|
||
code when the value is <code>Some(3)</code></span></p>
|
||
<p>We want to do something with the <code>Some(3)</code> match but do nothing with any other
|
||
<code>Some<u8></code> value or the <code>None</code> value. To satisfy the <code>match</code> expression, we
|
||
have to add <code>_ => ()</code> after processing just one variant, which is a lot of
|
||
boilerplate code to add.</p>
|
||
<p>Instead, we could write this in a shorter way using <code>if let</code>. The following
|
||
code behaves the same as the <code>match</code> in Listing 6-6:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">let some_u8_value = Some(0u8);
|
||
</span>if let Some(3) = some_u8_value {
|
||
println!("three");
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The syntax <code>if let</code> takes a pattern and an expression separated by an equal
|
||
sign. It works the same way as a <code>match</code>, where the expression is given to the
|
||
<code>match</code> and the pattern is its first arm.</p>
|
||
<p>Using <code>if let</code> means less typing, less indentation, and less boilerplate code.
|
||
However, you lose the exhaustive checking that <code>match</code> enforces. Choosing
|
||
between <code>match</code> and <code>if let</code> depends on what you’re doing in your particular
|
||
situation and whether gaining conciseness is an appropriate trade-off for
|
||
losing exhaustive checking.</p>
|
||
<p>In other words, you can think of <code>if let</code> as syntax sugar for a <code>match</code> that
|
||
runs code when the value matches one pattern and then ignores all other values.</p>
|
||
<p>We can include an <code>else</code> with an <code>if let</code>. The block of code that goes with the
|
||
<code>else</code> is the same as the block of code that would go with the <code>_</code> case in the
|
||
<code>match</code> expression that is equivalent to the <code>if let</code> and <code>else</code>. Recall the
|
||
<code>Coin</code> enum definition in Listing 6-4, where the <code>Quarter</code> variant also held a
|
||
<code>UsState</code> value. If we wanted to count all non-quarter coins we see while also
|
||
announcing the state of the quarters, we could do that with a <code>match</code>
|
||
expression like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">enum UsState {
|
||
</span><span class="boring"> Alabama,
|
||
</span><span class="boring"> Alaska,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">enum Coin {
|
||
</span><span class="boring"> Penny,
|
||
</span><span class="boring"> Nickel,
|
||
</span><span class="boring"> Dime,
|
||
</span><span class="boring"> Quarter(UsState),
|
||
</span><span class="boring">}
|
||
</span><span class="boring">let coin = Coin::Penny;
|
||
</span>let mut count = 0;
|
||
match coin {
|
||
Coin::Quarter(state) => println!("State quarter from {:?}!", state),
|
||
_ => count += 1,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Or we could use an <code>if let</code> and <code>else</code> expression like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">enum UsState {
|
||
</span><span class="boring"> Alabama,
|
||
</span><span class="boring"> Alaska,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">enum Coin {
|
||
</span><span class="boring"> Penny,
|
||
</span><span class="boring"> Nickel,
|
||
</span><span class="boring"> Dime,
|
||
</span><span class="boring"> Quarter(UsState),
|
||
</span><span class="boring">}
|
||
</span><span class="boring">let coin = Coin::Penny;
|
||
</span>let mut count = 0;
|
||
if let Coin::Quarter(state) = coin {
|
||
println!("State quarter from {:?}!", state);
|
||
} else {
|
||
count += 1;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>If you have a situation in which your program has logic that is too verbose to
|
||
express using a <code>match</code>, remember that <code>if let</code> is in your Rust toolbox as well.</p>
|
||
<h2><a class="header" href="#summary-5" id="summary-5">Summary</a></h2>
|
||
<p>We’ve now covered how to use enums to create custom types that can be one of a
|
||
set of enumerated values. We’ve shown how the standard library’s <code>Option<T></code>
|
||
type helps you use the type system to prevent errors. When enum values have
|
||
data inside them, you can use <code>match</code> or <code>if let</code> to extract and use those
|
||
values, depending on how many cases you need to handle.</p>
|
||
<p>Your Rust programs can now express concepts in your domain using structs and
|
||
enums. Creating custom types to use in your API ensures type safety: the
|
||
compiler will make certain your functions get only values of the type each
|
||
function expects.</p>
|
||
<p>In order to provide a well-organized API to your users that is straightforward
|
||
to use and only exposes exactly what your users will need, let’s now turn to
|
||
Rust’s modules.</p>
|
||
<h1><a class="header" href="#managing-growing-projects-with-packages-crates-and-modules" id="managing-growing-projects-with-packages-crates-and-modules">Managing Growing Projects with Packages, Crates, and Modules</a></h1>
|
||
<p>As you write large programs, organizing your code will be important because
|
||
keeping track of your entire program in your head will become impossible. By
|
||
grouping related functionality and separating code with distinct features,
|
||
you’ll clarify where to find code that implements a particular feature and
|
||
where to go to change how a feature works.</p>
|
||
<p>The programs we’ve written so far have been in one module in one file. As a
|
||
project grows, you can organize code by splitting it into multiple modules and
|
||
then multiple files. A package can contain multiple binary crates and
|
||
optionally one library crate. As a package grows, you can extract parts into
|
||
separate crates that become external dependencies. This chapter covers all
|
||
these techniques. For very large projects of a set of interrelated packages
|
||
that evolve together, Cargo provides workspaces, which we’ll cover in the
|
||
<a href="ch14-03-cargo-workspaces.html">“Cargo Workspaces”</a><!-- ignore --> section in Chapter 14.</p>
|
||
<p>In addition to grouping functionality, encapsulating implementation details
|
||
lets you reuse code at a higher level: once you’ve implemented an operation,
|
||
other code can call that code via the code’s public interface without knowing
|
||
how the implementation works. The way you write code defines which parts are
|
||
public for other code to use and which parts are private implementation details
|
||
that you reserve the right to change. This is another way to limit the amount
|
||
of detail you have to keep in your head.</p>
|
||
<p>A related concept is scope: the nested context in which code is written has a
|
||
set of names that are defined as “in scope.” When reading, writing, and
|
||
compiling code, programmers and compilers need to know whether a particular
|
||
name at a particular spot refers to a variable, function, struct, enum, module,
|
||
constant, or other item and what that item means. You can create scopes and
|
||
change which names are in or out of scope. You can’t have two items with the
|
||
same name in the same scope; tools are available to resolve name conflicts.</p>
|
||
<p>Rust has a number of features that allow you to manage your code’s
|
||
organization, including which details are exposed, which details are private,
|
||
and what names are in each scope in your programs. These features, sometimes
|
||
collectively referred to as the <em>module system</em>, and include:</p>
|
||
<ul>
|
||
<li><strong>Packages:</strong> A Cargo feature that lets you build, test, and share crates</li>
|
||
<li><strong>Crates:</strong> A tree of modules that produces a library or executable</li>
|
||
<li><strong>Modules</strong> and <strong>use:</strong> Let you control the organization, scope, and
|
||
privacy of paths</li>
|
||
<li><strong>Paths:</strong> A way of naming an item, such as a struct, function, or module</li>
|
||
</ul>
|
||
<p>In this chapter, we’ll cover all these features, discuss how they interact, and
|
||
explain how to use them to manage scope. By the end, you should have a solid
|
||
understanding of the module system and be able to work with scopes like a pro!</p>
|
||
<h2><a class="header" href="#packages-and-crates" id="packages-and-crates">Packages and Crates</a></h2>
|
||
<p>The first parts of the module system we’ll cover are packages and crates. A
|
||
crate is a binary or library. The <em>crate root</em> is a source file that the Rust
|
||
compiler starts from and makes up the root module of your crate (we’ll explain
|
||
modules in depth in the <a href="ch07-02-defining-modules-to-control-scope-and-privacy.html">“Defining Modules to Control Scope and
|
||
Privacy”</a><!-- ignore -->) section. A <em>package</em> is one or more crates
|
||
that provide a set of functionality. A package contains a <em>Cargo.toml</em> file
|
||
that describes how to build those crates.</p>
|
||
<p>Several rules determine what a package can contain. A package <em>must</em> contain
|
||
zero or one library crates, and no more. It can contain as many binary crates
|
||
as you’d like, but it must contain at least one crate (either library or
|
||
binary).</p>
|
||
<p>Let’s walk through what happens when we create a package. First, we enter the
|
||
command <code>cargo new</code>:</p>
|
||
<pre><code class="language-text">$ cargo new my-project
|
||
Created binary (application) `my-project` package
|
||
$ ls my-project
|
||
Cargo.toml
|
||
src
|
||
$ ls my-project/src
|
||
main.rs
|
||
</code></pre>
|
||
<p>When we entered the command, Cargo created a <em>Cargo.toml</em> file, giving us a
|
||
package. Looking at the contents of <em>Cargo.toml</em>, there’s no mention of
|
||
<em>src/main.rs</em> because Cargo follows a convention that <em>src/main.rs</em> is the
|
||
crate root of a binary crate with the same name as the package. Likewise, Cargo
|
||
knows that if the package directory contains <em>src/lib.rs</em>, the package contains
|
||
a library crate with the same name as the package, and <em>src/lib.rs</em> is its
|
||
crate root. Cargo passes the crate root files to <code>rustc</code> to build the library
|
||
or binary.</p>
|
||
<p>Here, we have a package that only contains <em>src/main.rs</em>, meaning it only
|
||
contains a binary crate named <code>my-project</code>. If a package contains <em>src/main.rs</em>
|
||
and <em>src/lib.rs</em>, it has two crates: a library and a binary, both with the same
|
||
name as the package. A package can have multiple binary crates by placing files
|
||
in the <em>src/bin</em> directory: each file will be a separate binary crate.</p>
|
||
<p>A crate will group related functionality together in a scope so the
|
||
functionality is easy to share between multiple projects. For example, the
|
||
<code>rand</code> crate we used in <a href="ch02-00-guessing-game-tutorial.html#generating-a-random-number">Chapter 2</a><!-- ignore --> provides functionality
|
||
that generates random numbers. We can use that functionality in our own
|
||
projects by bringing the <code>rand</code> crate into our project’s scope. All the
|
||
functionality provided by the <code>rand</code> crate is accessible through the crate’s
|
||
name, <code>rand</code>.</p>
|
||
<p>Keeping a crate’s functionality in its own scope clarifies whether particular
|
||
functionality is defined in our crate or the <code>rand</code> crate and prevents
|
||
potential conflicts. For example, the <code>rand</code> crate provides a trait named
|
||
<code>Rng</code>. We can also define a <code>struct</code> named <code>Rng</code> in our own crate. Because a
|
||
crate’s functionality is namespaced in its own scope, when we add <code>rand</code> as a
|
||
dependency, the compiler isn’t confused about what the name <code>Rng</code> refers to. In
|
||
our crate, it refers to the <code>struct Rng</code> that we defined. We would access the
|
||
<code>Rng</code> trait from the <code>rand</code> crate as <code>rand::Rng</code>.</p>
|
||
<p>Let’s move on and talk about the module system!</p>
|
||
<h2><a class="header" href="#defining-modules-to-control-scope-and-privacy" id="defining-modules-to-control-scope-and-privacy">Defining Modules to Control Scope and Privacy</a></h2>
|
||
<p>In this section, we’ll talk about modules and other parts of the module system,
|
||
namely <em>paths</em> that allow you to name items; the <code>use</code> keyword that brings a
|
||
path into scope; and the <code>pub</code> keyword to make items public. We’ll also discuss
|
||
the <code>as</code> keyword, external packages, and the glob operator. For now, let’s
|
||
focus on modules!</p>
|
||
<p><em>Modules</em> let us organize code within a crate into groups for readability and
|
||
easy reuse. Modules also control the <em>privacy</em> of items, which is whether an
|
||
item can be used by outside code (<em>public</em>) or is an internal implementation
|
||
detail and not available for outside use (<em>private</em>).</p>
|
||
<p>As an example, let’s write a library crate that provides the functionality of a
|
||
restaurant. We’ll define the signatures of functions but leave their bodies
|
||
empty to concentrate on the organization of the code, rather than actually
|
||
implement a restaurant in code.</p>
|
||
<p>In the restaurant industry, some parts of a restaurant are referred to as
|
||
<em>front of house</em> and others as <em>back of house</em>. Front of house is where
|
||
customers are; this is where hosts seat customers, servers take orders and
|
||
payment, and bartenders make drinks. Back of house is where the chefs and cooks
|
||
work in the kitchen, dishwashers clean up, and managers do administrative work.</p>
|
||
<p>To structure our crate in the same way that a real restaurant works, we can
|
||
organize the functions into nested modules. Create a new library named
|
||
<code>restaurant</code> by running <code>cargo new --lib restaurant</code>; then put the code in
|
||
Listing 7-1 into <em>src/lib.rs</em> to define some modules and function signatures.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>mod front_of_house {
|
||
mod hosting {
|
||
fn add_to_waitlist() {}
|
||
|
||
fn seat_at_table() {}
|
||
}
|
||
|
||
mod serving {
|
||
fn take_order() {}
|
||
|
||
fn serve_order() {}
|
||
|
||
fn take_payment() {}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-1: A <code>front_of_house</code> module containing other
|
||
modules that then contain functions</span></p>
|
||
<p>We define a module by starting with the <code>mod</code> keyword and then specify the
|
||
name of the module (in this case, <code>front_of_house</code>) and place curly brackets
|
||
around the body of the module. Inside modules, we can have other modules, as in
|
||
this case with the modules <code>hosting</code> and <code>serving</code>. Modules can also hold
|
||
definitions for other items, such as structs, enums, constants, traits, or—as
|
||
in Listing 7-1—functions.</p>
|
||
<p>By using modules, we can group related definitions together and name why
|
||
they’re related. Programmers using this code would have an easier time finding
|
||
the definitions they wanted to use because they could navigate the code based
|
||
on the groups rather than having to read through all the definitions.
|
||
Programmers adding new functionality to this code would know where to place the
|
||
code to keep the program organized.</p>
|
||
<p>Earlier, we mentioned that <em>src/main.rs</em> and <em>src/lib.rs</em> are called crate
|
||
roots. The reason for their name is that the contents of either of these two
|
||
files form a module named <code>crate</code> at the root of the crate’s module structure,
|
||
known as the <em>module tree</em>.</p>
|
||
<p>Listing 7-2 shows the module tree for the structure in Listing 7-1.</p>
|
||
<pre><code class="language-text">crate
|
||
└── front_of_house
|
||
├── hosting
|
||
│ ├── add_to_waitlist
|
||
│ └── seat_at_table
|
||
└── serving
|
||
├── take_order
|
||
├── serve_order
|
||
└── take_payment
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-2: The module tree for the code in Listing
|
||
7-1</span></p>
|
||
<p>This tree shows how some of the modules nest inside one another (for example,
|
||
<code>hosting</code> nests inside <code>front_of_house</code>). The tree also shows that some modules
|
||
are <em>siblings</em> to each other, meaning they’re defined in the same module
|
||
(<code>hosting</code> and <code>serving</code> are defined within <code>front_of_house</code>). To continue the
|
||
family metaphor, if module A is contained inside module B, we say that module A
|
||
is the <em>child</em> of module B and that module B is the <em>parent</em> of module A.
|
||
Notice that the entire module tree is rooted under the implicit module named
|
||
<code>crate</code>.</p>
|
||
<p>The module tree might remind you of the filesystem’s directory tree on your
|
||
computer; this is a very apt comparison! Just like directories in a filesystem,
|
||
you use modules to organize your code. And just like files in a directory, we
|
||
need a way to find our modules.</p>
|
||
<h2><a class="header" href="#paths-for-referring-to-an-item-in-the-module-tree" id="paths-for-referring-to-an-item-in-the-module-tree">Paths for Referring to an Item in the Module Tree</a></h2>
|
||
<p>To show Rust where to find an item in a module tree, we use a path in the same
|
||
way we use a path when navigating a filesystem. If we want to call a function,
|
||
we need to know its path.</p>
|
||
<p>A path can take two forms:</p>
|
||
<ul>
|
||
<li>An <em>absolute path</em> starts from a crate root by using a crate name or a
|
||
literal <code>crate</code>.</li>
|
||
<li>A <em>relative path</em> starts from the current module and uses <code>self</code>, <code>super</code>, or
|
||
an identifier in the current module.</li>
|
||
</ul>
|
||
<p>Both absolute and relative paths are followed by one or more identifiers
|
||
separated by double colons (<code>::</code>).</p>
|
||
<p>Let’s return to the example in Listing 7-1. How do we call the
|
||
<code>add_to_waitlist</code> function? This is the same as asking, what’s the path of the
|
||
<code>add_to_waitlist</code> function? In Listing 7-3, we simplified our code a bit by
|
||
removing some of the modules and functions. We’ll show two ways to call the
|
||
<code>add_to_waitlist</code> function from a new function <code>eat_at_restaurant</code> defined in
|
||
the crate root. The <code>eat_at_restaurant</code> function is part of our library crate’s
|
||
public API, so we mark it with the <code>pub</code> keyword. In the <a href="ch07-03-paths-for-referring-to-an-item-in-the-module-tree.html#exposing-paths-with-the-pub-keyword">”Exposing Paths with
|
||
the <code>pub</code> Keyword”</a><!-- ignore --> section, we’ll go into more detail
|
||
about <code>pub</code>. Note that this example won’t compile just yet; we’ll explain why
|
||
in a bit.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">mod front_of_house {
|
||
mod hosting {
|
||
fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
pub fn eat_at_restaurant() {
|
||
// Absolute path
|
||
crate::front_of_house::hosting::add_to_waitlist();
|
||
|
||
// Relative path
|
||
front_of_house::hosting::add_to_waitlist();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-3: Calling the <code>add_to_waitlist</code> function using
|
||
absolute and relative paths</span></p>
|
||
<p>The first time we call the <code>add_to_waitlist</code> function in <code>eat_at_restaurant</code>,
|
||
we use an absolute path. The <code>add_to_waitlist</code> function is defined in the same
|
||
crate as <code>eat_at_restaurant</code>, which means we can use the <code>crate</code> keyword to
|
||
start an absolute path.</p>
|
||
<p>After <code>crate</code>, we include each of the successive modules until we make our way
|
||
to <code>add_to_waitlist</code>. You can imagine a filesystem with the same structure, and
|
||
we’d specify the path <code>/front_of_house/hosting/add_to_waitlist</code> to run the
|
||
<code>add_to_waitlist</code> program; using the <code>crate</code> name to start from the crate root
|
||
is like using <code>/</code> to start from the filesystem root in your shell.</p>
|
||
<p>The second time we call <code>add_to_waitlist</code> in <code>eat_at_restaurant</code>, we use a
|
||
relative path. The path starts with <code>front_of_house</code>, the name of the module
|
||
defined at the same level of the module tree as <code>eat_at_restaurant</code>. Here the
|
||
filesystem equivalent would be using the path
|
||
<code>front_of_house/hosting/add_to_waitlist</code>. Starting with a name means that the
|
||
path is relative.</p>
|
||
<p>Choosing whether to use a relative or absolute path is a decision you’ll make
|
||
based on your project. The decision should depend on whether you’re more likely
|
||
to move item definition code separately from or together with the code that
|
||
uses the item. For example, if we move the <code>front_of_house</code> module and the
|
||
<code>eat_at_restaurant</code> function into a module named <code>customer_experience</code>, we’d
|
||
need to update the absolute path to <code>add_to_waitlist</code>, but the relative path
|
||
would still be valid. However, if we moved the <code>eat_at_restaurant</code> function
|
||
separately into a module named <code>dining</code>, the absolute path to the
|
||
<code>add_to_waitlist</code> call would stay the same, but the relative path would need to
|
||
be updated. Our preference is to specify absolute paths because it’s more
|
||
likely to move code definitions and item calls independently of each other.</p>
|
||
<p>Let’s try to compile Listing 7-3 and find out why it won’t compile yet! The
|
||
error we get is shown in Listing 7-4.</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling restaurant v0.1.0 (file:///projects/restaurant)
|
||
error[E0603]: module `hosting` is private
|
||
--> src/lib.rs:9:28
|
||
|
|
||
9 | crate::front_of_house::hosting::add_to_waitlist();
|
||
| ^^^^^^^
|
||
|
||
error[E0603]: module `hosting` is private
|
||
--> src/lib.rs:12:21
|
||
|
|
||
12 | front_of_house::hosting::add_to_waitlist();
|
||
| ^^^^^^^
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-4: Compiler errors from building the code in
|
||
Listing 7-3</span></p>
|
||
<p>The error messages say that module <code>hosting</code> is private. In other words, we
|
||
have the correct paths for the <code>hosting</code> module and the <code>add_to_waitlist</code>
|
||
function, but Rust won’t let us use them because it doesn’t have access to the
|
||
private sections.</p>
|
||
<p>Modules aren’t useful only for organizing your code. They also define Rust’s
|
||
<em>privacy boundary</em>: the line that encapsulates the implementation details
|
||
external code isn’t allowed to know about, call, or rely on. So, if you want to
|
||
make an item like a function or struct private, you put it in a module.</p>
|
||
<p>The way privacy works in Rust is that all items (functions, methods, structs,
|
||
enums, modules, and constants) are private by default. Items in a parent module
|
||
can’t use the private items inside child modules, but items in child modules
|
||
can use the items in their ancestor modules. The reason is that child modules
|
||
wrap and hide their implementation details, but the child modules can see the
|
||
context in which they’re defined. To continue with the restaurant metaphor,
|
||
think of the privacy rules as being like the back office of a restaurant: what
|
||
goes on in there is private to restaurant customers, but office managers can
|
||
see and do everything in the restaurant in which they operate.</p>
|
||
<p>Rust chose to have the module system function this way so that hiding inner
|
||
implementation details is the default. That way, you know which parts of the
|
||
inner code you can change without breaking outer code. But you can expose inner
|
||
parts of child modules code to outer ancestor modules by using the <code>pub</code>
|
||
keyword to make an item public.</p>
|
||
<h3><a class="header" href="#exposing-paths-with-the-pub-keyword" id="exposing-paths-with-the-pub-keyword">Exposing Paths with the <code>pub</code> Keyword</a></h3>
|
||
<p>Let’s return to the error in Listing 7-4 that told us the <code>hosting</code> module is
|
||
private. We want the <code>eat_at_restaurant</code> function in the parent module to have
|
||
access to the <code>add_to_waitlist</code> function in the child module, so we mark the
|
||
<code>hosting</code> module with the <code>pub</code> keyword, as shown in Listing 7-5.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">mod front_of_house {
|
||
pub mod hosting {
|
||
fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
pub fn eat_at_restaurant() {
|
||
// Absolute path
|
||
crate::front_of_house::hosting::add_to_waitlist();
|
||
|
||
// Relative path
|
||
front_of_house::hosting::add_to_waitlist();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-5: Declaring the <code>hosting</code> module as <code>pub</code> to
|
||
use it from <code>eat_at_restaurant</code></span></p>
|
||
<p>Unfortunately, the code in Listing 7-5 still results in an error, as shown in
|
||
Listing 7-6.</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling restaurant v0.1.0 (file:///projects/restaurant)
|
||
error[E0603]: function `add_to_waitlist` is private
|
||
--> src/lib.rs:9:37
|
||
|
|
||
9 | crate::front_of_house::hosting::add_to_waitlist();
|
||
| ^^^^^^^^^^^^^^^
|
||
|
||
error[E0603]: function `add_to_waitlist` is private
|
||
--> src/lib.rs:12:30
|
||
|
|
||
12 | front_of_house::hosting::add_to_waitlist();
|
||
| ^^^^^^^^^^^^^^^
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-6: Compiler errors from building the code in
|
||
Listing 7-5</span></p>
|
||
<p>What happened? Adding the <code>pub</code> keyword in front of <code>mod hosting</code> makes the
|
||
module public. With this change, if we can access <code>front_of_house</code>, we can
|
||
access <code>hosting</code>. But the <em>contents</em> of <code>hosting</code> are still private; making the
|
||
module public doesn’t make its contents public. The <code>pub</code> keyword on a module
|
||
only lets code in its ancestor modules refer to it.</p>
|
||
<p>The errors in Listing 7-6 say that the <code>add_to_waitlist</code> function is private.
|
||
The privacy rules apply to structs, enums, functions, and methods as well as
|
||
modules.</p>
|
||
<p>Let’s also make the <code>add_to_waitlist</code> function public by adding the <code>pub</code>
|
||
keyword before its definition, as in Listing 7-7.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">mod front_of_house {
|
||
pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
pub fn eat_at_restaurant() {
|
||
// Absolute path
|
||
crate::front_of_house::hosting::add_to_waitlist();
|
||
|
||
// Relative path
|
||
front_of_house::hosting::add_to_waitlist();
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-7: Adding the <code>pub</code> keyword to <code>mod hosting</code>
|
||
and <code>fn add_to_waitlist</code> lets us call the function from
|
||
<code>eat_at_restaurant</code></span></p>
|
||
<p>Now the code will compile! Let’s look at the absolute and the relative path and
|
||
double-check why adding the <code>pub</code> keyword lets us use these paths in
|
||
<code>add_to_waitlist</code> with respect to the privacy rules.</p>
|
||
<p>In the absolute path, we start with <code>crate</code>, the root of our crate’s module
|
||
tree. Then the <code>front_of_house</code> module is defined in the crate root. The
|
||
<code>front_of_house</code> module isn’t public, but because the <code>eat_at_restaurant</code>
|
||
function is defined in the same module as <code>front_of_house</code> (that is,
|
||
<code>eat_at_restaurant</code> and <code>front_of_house</code> are siblings), we can refer to
|
||
<code>front_of_house</code> from <code>eat_at_restaurant</code>. Next is the <code>hosting</code> module marked
|
||
with <code>pub</code>. We can access the parent module of <code>hosting</code>, so we can access
|
||
<code>hosting</code>. Finally, the <code>add_to_waitlist</code> function is marked with <code>pub</code> and we
|
||
can access its parent module, so this function call works!</p>
|
||
<p>In the relative path, the logic is the same as the absolute path except for the
|
||
first step: rather than starting from the crate root, the path starts from
|
||
<code>front_of_house</code>. The <code>front_of_house</code> module is defined within the same module
|
||
as <code>eat_at_restaurant</code>, so the relative path starting from the module in which
|
||
<code>eat_at_restaurant</code> is defined works. Then, because <code>hosting</code> and
|
||
<code>add_to_waitlist</code> are marked with <code>pub</code>, the rest of the path works, and this
|
||
function call is valid!</p>
|
||
<h3><a class="header" href="#starting-relative-paths-with-super" id="starting-relative-paths-with-super">Starting Relative Paths with <code>super</code></a></h3>
|
||
<p>We can also construct relative paths that begin in the parent module by using
|
||
<code>super</code> at the start of the path. This is like starting a filesystem path with
|
||
the <code>..</code> syntax. Why would we want to do this?</p>
|
||
<p>Consider the code in Listing 7-8 that models the situation in which a chef
|
||
fixes an incorrect order and personally brings it out to the customer. The
|
||
function <code>fix_incorrect_order</code> calls the function <code>serve_order</code> by specifying
|
||
the path to <code>serve_order</code> starting with <code>super</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn serve_order() {}
|
||
|
||
mod back_of_house {
|
||
fn fix_incorrect_order() {
|
||
cook_order();
|
||
super::serve_order();
|
||
}
|
||
|
||
fn cook_order() {}
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-8: Calling a function using a relative path
|
||
starting with <code>super</code></span></p>
|
||
<p>The <code>fix_incorrect_order</code> function is in the <code>back_of_house</code> module, so we can
|
||
use <code>super</code> to go to the parent module of <code>back_of_house</code>, which in this case
|
||
is <code>crate</code>, the root. From there, we look for <code>serve_order</code> and find it.
|
||
Success! We think the <code>back_of_house</code> module and the <code>serve_order</code> function are
|
||
likely to stay in the same relationship to each other and get moved together
|
||
should we decide to reorganize the crate’s module tree. Therefore, we used
|
||
<code>super</code> so we’ll have fewer places to update code in the future if this code
|
||
gets moved to a different module.</p>
|
||
<h3><a class="header" href="#making-structs-and-enums-public" id="making-structs-and-enums-public">Making Structs and Enums Public</a></h3>
|
||
<p>We can also use <code>pub</code> to designate structs and enums as public, but there are a
|
||
few extra details. If we use <code>pub</code> before a struct definition, we make the
|
||
struct public, but the struct’s fields will still be private. We can make each
|
||
field public or not on a case-by-case basis. In Listing 7-9, we’ve defined a
|
||
public <code>back_of_house::Breakfast</code> struct with a public <code>toast</code> field but a
|
||
private <code>seasonal_fruit</code> field. This models the case in a restaurant where the
|
||
customer can pick the type of bread that comes with a meal, but the chef
|
||
decides which fruit accompanies the meal based on what’s in season and in
|
||
stock. The available fruit changes quickly, so customers can’t choose the fruit
|
||
or even see which fruit they’ll get.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>mod back_of_house {
|
||
pub struct Breakfast {
|
||
pub toast: String,
|
||
seasonal_fruit: String,
|
||
}
|
||
|
||
impl Breakfast {
|
||
pub fn summer(toast: &str) -> Breakfast {
|
||
Breakfast {
|
||
toast: String::from(toast),
|
||
seasonal_fruit: String::from("peaches"),
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn eat_at_restaurant() {
|
||
// Order a breakfast in the summer with Rye toast
|
||
let mut meal = back_of_house::Breakfast::summer("Rye");
|
||
// Change our mind about what bread we'd like
|
||
meal.toast = String::from("Wheat");
|
||
println!("I'd like {} toast please", meal.toast);
|
||
|
||
// The next line won't compile if we uncomment it; we're not allowed
|
||
// to see or modify the seasonal fruit that comes with the meal
|
||
// meal.seasonal_fruit = String::from("blueberries");
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-9: A struct with some public fields and some
|
||
private fields</span></p>
|
||
<p>Because the <code>toast</code> field in the <code>back_of_house::Breakfast</code> struct is public,
|
||
in <code>eat_at_restaurant</code> we can write and read to the <code>toast</code> field using dot
|
||
notation. Notice that we can’t use the <code>seasonal_fruit</code> field in
|
||
<code>eat_at_restaurant</code> because <code>seasonal_fruit</code> is private. Try uncommenting the
|
||
line modifying the <code>seasonal_fruit</code> field value to see what error you get!</p>
|
||
<p>Also, note that because <code>back_of_house::Breakfast</code> has a private field, the
|
||
struct needs to provide a public associated function that constructs an
|
||
instance of <code>Breakfast</code> (we’ve named it <code>summer</code> here). If <code>Breakfast</code> didn’t
|
||
have such a function, we couldn’t create an instance of <code>Breakfast</code> in
|
||
<code>eat_at_restaurant</code> because we couldn’t set the value of the private
|
||
<code>seasonal_fruit</code> field in <code>eat_at_restaurant</code>.</p>
|
||
<p>In contrast, if we make an enum public, all of its variants are then public. We
|
||
only need the <code>pub</code> before the <code>enum</code> keyword, as shown in Listing 7-10.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>mod back_of_house {
|
||
pub enum Appetizer {
|
||
Soup,
|
||
Salad,
|
||
}
|
||
}
|
||
|
||
pub fn eat_at_restaurant() {
|
||
let order1 = back_of_house::Appetizer::Soup;
|
||
let order2 = back_of_house::Appetizer::Salad;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-10: Designating an enum as public makes all its
|
||
variants public</span></p>
|
||
<p>Because we made the <code>Appetizer</code> enum public, we can use the <code>Soup</code> and <code>Salad</code>
|
||
variants in <code>eat_at_restaurant</code>. Enums aren’t very useful unless their variants
|
||
are public; it would be annoying to have to annotate all enum variants with
|
||
<code>pub</code> in every case, so the default for enum variants is to be public. Structs
|
||
are often useful without their fields being public, so struct fields follow the
|
||
general rule of everything being private by default unless annotated with <code>pub</code>.</p>
|
||
<p>There’s one more situation involving <code>pub</code> that we haven’t covered, and that is
|
||
our last module system feature: the <code>use</code> keyword. We’ll cover <code>use</code> by itself
|
||
first, and then we’ll show how to combine <code>pub</code> and <code>use</code>.</p>
|
||
<h2><a class="header" href="#bringing-paths-into-scope-with-the-use-keyword" id="bringing-paths-into-scope-with-the-use-keyword">Bringing Paths into Scope with the <code>use</code> Keyword</a></h2>
|
||
<p>It might seem like the paths we’ve written to call functions so far are
|
||
inconveniently long and repetitive. For example, in Listing 7-7, whether we
|
||
chose the absolute or relative path to the <code>add_to_waitlist</code> function, every
|
||
time we wanted to call <code>add_to_waitlist</code> we had to specify <code>front_of_house</code> and
|
||
<code>hosting</code> too. Fortunately, there’s a way to simplify this process. We can
|
||
bring a path into a scope once and then call the items in that path as if
|
||
they’re local items with the <code>use</code> keyword.</p>
|
||
<p>In Listing 7-11, we bring the <code>crate::front_of_house::hosting</code> module into the
|
||
scope of the <code>eat_at_restaurant</code> function so we only have to specify
|
||
<code>hosting::add_to_waitlist</code> to call the <code>add_to_waitlist</code> function in
|
||
<code>eat_at_restaurant</code>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">mod front_of_house {
|
||
pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
use crate::front_of_house::hosting;
|
||
|
||
pub fn eat_at_restaurant() {
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-11: Bringing a module into scope with
|
||
<code>use</code></span></p>
|
||
<p>Adding <code>use</code> and a path in a scope is similar to creating a symbolic link in
|
||
the filesystem. By adding <code>use crate::front_of_house::hosting</code> in the crate
|
||
root, <code>hosting</code> is now a valid name in that scope, just as though the <code>hosting</code>
|
||
module had been defined in the crate root. Paths brought into scope with <code>use</code>
|
||
also check privacy, like any other paths.</p>
|
||
<p>You can also bring an item into scope with <code>use</code> and a relative path. Listing
|
||
7-12 shows how to specify a relative path to get the same behavior as in
|
||
Listing 7-11.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">mod front_of_house {
|
||
pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
use front_of_house::hosting;
|
||
|
||
pub fn eat_at_restaurant() {
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-12: Bringing a module into scope with <code>use</code> and
|
||
a relative path</span></p>
|
||
<h3><a class="header" href="#creating-idiomatic-use-paths" id="creating-idiomatic-use-paths">Creating Idiomatic <code>use</code> Paths</a></h3>
|
||
<p>In Listing 7-11, you might have wondered why we specified <code>use crate::front_of_house::hosting</code> and then called <code>hosting::add_to_waitlist</code> in
|
||
<code>eat_at_restaurant</code> rather than specifying the <code>use</code> path all the way out to
|
||
the <code>add_to_waitlist</code> function to achieve the same result, as in Listing 7-13.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">mod front_of_house {
|
||
pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
use crate::front_of_house::hosting::add_to_waitlist;
|
||
|
||
pub fn eat_at_restaurant() {
|
||
add_to_waitlist();
|
||
add_to_waitlist();
|
||
add_to_waitlist();
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-13: Bringing the <code>add_to_waitlist</code> function
|
||
into scope with <code>use</code>, which is unidiomatic</span></p>
|
||
<p>Although both Listing 7-11 and 7-13 accomplish the same task, Listing 7-11 is
|
||
the idiomatic way to bring a function into scope with <code>use</code>. Bringing the
|
||
function’s parent module into scope with <code>use</code> so we have to specify the parent
|
||
module when calling the function makes it clear that the function isn’t locally
|
||
defined while still minimizing repetition of the full path. The code in Listing
|
||
7-13 is unclear as to where <code>add_to_waitlist</code> is defined.</p>
|
||
<p>On the other hand, when bringing in structs, enums, and other items with <code>use</code>,
|
||
it’s idiomatic to specify the full path. Listing 7-14 shows the idiomatic way
|
||
to bring the standard library’s <code>HashMap</code> struct into the scope of a binary
|
||
crate.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::collections::HashMap;
|
||
|
||
fn main() {
|
||
let mut map = HashMap::new();
|
||
map.insert(1, 2);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 7-14: Bringing <code>HashMap</code> into scope in an
|
||
idiomatic way</span></p>
|
||
<p>There’s no strong reason behind this idiom: it’s just the convention that has
|
||
emerged, and folks have gotten used to reading and writing Rust code this way.</p>
|
||
<p>The exception to this idiom is if we’re bringing two items with the same name
|
||
into scope with <code>use</code> statements, because Rust doesn’t allow that. Listing 7-15
|
||
shows how to bring two <code>Result</code> types into scope that have the same name but
|
||
different parent modules and how to refer to them.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::fmt;
|
||
use std::io;
|
||
|
||
fn function1() -> fmt::Result {
|
||
// --snip--
|
||
<span class="boring"> Ok(())
|
||
</span>}
|
||
|
||
fn function2() -> io::Result<()> {
|
||
// --snip--
|
||
<span class="boring"> Ok(())
|
||
</span>}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-15: Bringing two types with the same name into
|
||
the same scope requires using their parent modules.</span></p>
|
||
<p>As you can see, using the parent modules distinguishes the two <code>Result</code> types.
|
||
If instead we specified <code>use std::fmt::Result</code> and <code>use std::io::Result</code>, we’d
|
||
have two <code>Result</code> types in the same scope and Rust wouldn’t know which one we
|
||
meant when we used <code>Result</code>.</p>
|
||
<h3><a class="header" href="#providing-new-names-with-the-as-keyword" id="providing-new-names-with-the-as-keyword">Providing New Names with the <code>as</code> Keyword</a></h3>
|
||
<p>There’s another solution to the problem of bringing two types of the same name
|
||
into the same scope with <code>use</code>: after the path, we can specify <code>as</code> and a new
|
||
local name, or alias, for the type. Listing 7-16 shows another way to write the
|
||
code in Listing 7-15 by renaming one of the two <code>Result</code> types using <code>as</code>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::fmt::Result;
|
||
use std::io::Result as IoResult;
|
||
|
||
fn function1() -> Result {
|
||
// --snip--
|
||
<span class="boring"> Ok(())
|
||
</span>}
|
||
|
||
fn function2() -> IoResult<()> {
|
||
// --snip--
|
||
<span class="boring"> Ok(())
|
||
</span>}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-16: Renaming a type when it’s brought into
|
||
scope with the <code>as</code> keyword</span></p>
|
||
<p>In the second <code>use</code> statement, we chose the new name <code>IoResult</code> for the
|
||
<code>std::io::Result</code> type, which won’t conflict with the <code>Result</code> from <code>std::fmt</code>
|
||
that we’ve also brought into scope. Listing 7-15 and Listing 7-16 are
|
||
considered idiomatic, so the choice is up to you!</p>
|
||
<h3><a class="header" href="#re-exporting-names-with-pub-use" id="re-exporting-names-with-pub-use">Re-exporting Names with <code>pub use</code></a></h3>
|
||
<p>When we bring a name into scope with the <code>use</code> keyword, the name available in
|
||
the new scope is private. To enable the code that calls our code to refer to
|
||
that name as if it had been defined in that code’s scope, we can combine <code>pub</code>
|
||
and <code>use</code>. This technique is called <em>re-exporting</em> because we’re bringing
|
||
an item into scope but also making that item available for others to bring into
|
||
their scope.</p>
|
||
<p>Listing 7-17 shows the code in Listing 7-11 with <code>use</code> in the root module
|
||
changed to <code>pub use</code>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">mod front_of_house {
|
||
pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
}
|
||
|
||
pub use crate::front_of_house::hosting;
|
||
|
||
pub fn eat_at_restaurant() {
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-17: Making a name available for any code to use
|
||
from a new scope with <code>pub use</code></span></p>
|
||
<p>By using <code>pub use</code>, external code can now call the <code>add_to_waitlist</code> function
|
||
using <code>hosting::add_to_waitlist</code>. If we hadn’t specified <code>pub use</code>, the
|
||
<code>eat_at_restaurant</code> function could call <code>hosting::add_to_waitlist</code> in its
|
||
scope, but external code couldn’t take advantage of this new path.</p>
|
||
<p>Re-exporting is useful when the internal structure of your code is different
|
||
from how programmers calling your code would think about the domain. For
|
||
example, in this restaurant metaphor, the people running the restaurant think
|
||
about “front of house” and “back of house.” But customers visiting a restaurant
|
||
probably won’t think about the parts of the restaurant in those terms. With
|
||
<code>pub use</code>, we can write our code with one structure but expose a different
|
||
structure. Doing so makes our library well organized for programmers working on
|
||
the library and programmers calling the library.</p>
|
||
<h3><a class="header" href="#using-external-packages" id="using-external-packages">Using External Packages</a></h3>
|
||
<p>In Chapter 2, we programmed a guessing game project that used an external
|
||
package called <code>rand</code> to get random numbers. To use <code>rand</code> in our project, we
|
||
added this line to <em>Cargo.toml</em>:</p>
|
||
<!-- When updating the version of `rand` used, also update the version of
|
||
`rand` used in these files so they all match:
|
||
* ch02-00-guessing-game-tutorial.md
|
||
* ch14-03-cargo-workspaces.md
|
||
-->
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
rand = "0.5.5"
|
||
</code></pre>
|
||
<p>Adding <code>rand</code> as a dependency in <em>Cargo.toml</em> tells Cargo to download the
|
||
<code>rand</code> package and any dependencies from <a href="https://crates.io/">crates.io</a> and
|
||
make <code>rand</code> available to our project.</p>
|
||
<p>Then, to bring <code>rand</code> definitions into the scope of our package, we added a
|
||
<code>use</code> line starting with the name of the package, <code>rand</code>, and listed the items
|
||
we wanted to bring into scope. Recall that in the <a href="ch02-00-guessing-game-tutorial.html#generating-a-random-number">“Generating a Random
|
||
Number”</a><!-- ignore --> section in Chapter 2, we brought the <code>Rng</code> trait
|
||
into scope and called the <code>rand::thread_rng</code> function:</p>
|
||
<pre><code class="language-rust ignore">use rand::Rng;
|
||
fn main() {
|
||
let secret_number = rand::thread_rng().gen_range(1, 101);
|
||
}
|
||
</code></pre>
|
||
<p>Members of the Rust community have made many packages available at
|
||
<a href="https://crates.io/">crates.io</a>, and pulling any of them into your package
|
||
involves these same steps: listing them in your package’s <em>Cargo.toml</em> file and
|
||
using <code>use</code> to bring items into scope.</p>
|
||
<p>Note that the standard library (<code>std</code>) is also a crate that’s external to our
|
||
package. Because the standard library is shipped with the Rust language, we
|
||
don’t need to change <em>Cargo.toml</em> to include <code>std</code>. But we do need to refer to
|
||
it with <code>use</code> to bring items from there into our package’s scope. For example,
|
||
with <code>HashMap</code> we would use this line:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This is an absolute path starting with <code>std</code>, the name of the standard library
|
||
crate.</p>
|
||
<h3><a class="header" href="#using-nested-paths-to-clean-up-large-use-lists" id="using-nested-paths-to-clean-up-large-use-lists">Using Nested Paths to Clean Up Large <code>use</code> Lists</a></h3>
|
||
<p>If we’re using multiple items defined in the same package or same module,
|
||
listing each item on its own line can take up a lot of vertical space in our
|
||
files. For example, these two <code>use</code> statements we had in the Guessing Game in
|
||
Listing 2-4 bring items from <code>std</code> into scope:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::cmp::Ordering;
|
||
// ---snip---
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Instead, we can use nested paths to bring the same items into scope in one
|
||
line. We do this by specifying the common part of the path, followed by two
|
||
colons, and then curly brackets around a list of the parts of the paths that
|
||
differ, as shown in Listing 7-18.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::{cmp::Ordering, io};
|
||
// ---snip---
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-18: Specifying a nested path to bring multiple
|
||
items with the same prefix into scope</span></p>
|
||
<p>In bigger programs, bringing many items into scope from the same package or
|
||
module using nested paths can reduce the number of separate <code>use</code> statements
|
||
needed by a lot!</p>
|
||
<p>We can use a nested path at any level in a path, which is useful when combining
|
||
two <code>use</code> statements that share a subpath. For example, Listing 7-19 shows two
|
||
<code>use</code> statements: one that brings <code>std::io</code> into scope and one that brings
|
||
<code>std::io::Write</code> into scope.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::io::Write;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-19: Two <code>use</code> statements where one is a subpath
|
||
of the other</span></p>
|
||
<p>The common part of these two paths is <code>std::io</code>, and that’s the complete first
|
||
path. To merge these two paths into one <code>use</code> statement, we can use <code>self</code> in
|
||
the nested path, as shown in Listing 7-20.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io::{self, Write};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-20: Combining the paths in Listing 7-19 into
|
||
one <code>use</code> statement</span></p>
|
||
<p>This line brings <code>std::io</code> and <code>std::io::Write</code> into scope.</p>
|
||
<h3><a class="header" href="#the-glob-operator" id="the-glob-operator">The Glob Operator</a></h3>
|
||
<p>If we want to bring <em>all</em> public items defined in a path into scope, we can
|
||
specify that path followed by <code>*</code>, the glob operator:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::*;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This <code>use</code> statement brings all public items defined in <code>std::collections</code> into
|
||
the current scope. Be careful when using the glob operator! Glob can make it
|
||
harder to tell what names are in scope and where a name used in your program
|
||
was defined.</p>
|
||
<p>The glob operator is often used when testing to bring everything under test
|
||
into the <code>tests</code> module; we’ll talk about that in the <a href="ch11-01-writing-tests.html#how-to-write-tests">“How to Write
|
||
Tests”</a><!-- ignore --> section in Chapter 11. The glob operator
|
||
is also sometimes used as part of the prelude pattern: see <a href="../std/prelude/index.html#other-preludes">the standard
|
||
library documentation</a><!-- ignore -->
|
||
for more information on that pattern.</p>
|
||
<h2><a class="header" href="#separating-modules-into-different-files" id="separating-modules-into-different-files">Separating Modules into Different Files</a></h2>
|
||
<p>So far, all the examples in this chapter defined multiple modules in one file.
|
||
When modules get large, you might want to move their definitions to a separate
|
||
file to make the code easier to navigate.</p>
|
||
<p>For example, let’s start from the code in Listing 7-17 and move the
|
||
<code>front_of_house</code> module to its own file <em>src/front_of_house.rs</em> by changing the
|
||
crate root file so it contains the code shown in Listing 7-21. In this case,
|
||
the crate root file is <em>src/lib.rs</em>, but this procedure also works with binary
|
||
crates whose crate root file is <em>src/main.rs</em>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">mod front_of_house;
|
||
|
||
pub use crate::front_of_house::hosting;
|
||
|
||
pub fn eat_at_restaurant() {
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
hosting::add_to_waitlist();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 7-21: Declaring the <code>front_of_house</code> module whose
|
||
body will be in <em>src/front_of_house.rs</em></span></p>
|
||
<p>And <em>src/front_of_house.rs</em> gets the definitions from the body of the
|
||
<code>front_of_house</code> module, as shown in Listing 7-22.</p>
|
||
<p><span class="filename">Filename: src/front_of_house.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub mod hosting {
|
||
pub fn add_to_waitlist() {}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 7-22: Definitions inside the <code>front_of_house</code>
|
||
module in <em>src/front_of_house.rs</em></span></p>
|
||
<p>Using a semicolon after <code>mod front_of_house</code> rather than using a block tells
|
||
Rust to load the contents of the module from another file with the same name as
|
||
the module. To continue with our example and extract the <code>hosting</code> module to
|
||
its own file as well, we change <em>src/front_of_house.rs</em> to contain only the
|
||
declaration of the <code>hosting</code> module:</p>
|
||
<p><span class="filename">Filename: src/front_of_house.rs</span></p>
|
||
<pre><code>pub mod hosting;
|
||
</code></pre>
|
||
<p>Then we create a <em>src/front_of_house</em> directory and a file
|
||
<em>src/front_of_house/hosting.rs</em> to contain the definitions made in the
|
||
<code>hosting</code> module:</p>
|
||
<p><span class="filename">Filename: src/front_of_house/hosting.rs</span></p>
|
||
<pre><code>pub fn add_to_waitlist() {}
|
||
</code></pre>
|
||
<p>The module tree remains the same, and the function calls in <code>eat_at_restaurant</code>
|
||
will work without any modification, even though the definitions live in
|
||
different files. This technique lets you move modules to new files as they grow
|
||
in size.</p>
|
||
<p>Note that the <code>pub use crate::front_of_house::hosting</code> statement in
|
||
<em>src/lib.rs</em> also hasn’t changed, nor does <code>use</code> have any impact on what files
|
||
are compiled as part of the crate. The <code>mod</code> keyword declares modules, and Rust
|
||
looks in a file with the same name as the module for the code that goes into
|
||
that module.</p>
|
||
<h2><a class="header" href="#summary-6" id="summary-6">Summary</a></h2>
|
||
<p>Rust lets you split a package into multiple crates and a crate into modules
|
||
so you can refer to items defined in one module from another module. You can do
|
||
this by specifying absolute or relative paths. These paths can be brought into
|
||
scope with a <code>use</code> statement so you can use a shorter path for multiple uses of
|
||
the item in that scope. Module code is private by default, but you can make
|
||
definitions public by adding the <code>pub</code> keyword.</p>
|
||
<p>In the next chapter, we’ll look at some collection data structures in the
|
||
standard library that you can use in your neatly organized code.</p>
|
||
<h1><a class="header" href="#common-collections" id="common-collections">Common Collections</a></h1>
|
||
<p>Rust’s standard library includes a number of very useful data structures called
|
||
<em>collections</em>. Most other data types represent one specific value, but
|
||
collections can contain multiple values. Unlike the built-in array and tuple
|
||
types, the data these collections point to is stored on the heap, which means
|
||
the amount of data does not need to be known at compile time and can grow or
|
||
shrink as the program runs. Each kind of collection has different capabilities
|
||
and costs, and choosing an appropriate one for your current situation is a
|
||
skill you’ll develop over time. In this chapter, we’ll discuss three
|
||
collections that are used very often in Rust programs:</p>
|
||
<ul>
|
||
<li>A <em>vector</em> allows you to store a variable number of values next to each other.</li>
|
||
<li>A <em>string</em> is a collection of characters. We’ve mentioned the <code>String</code> type
|
||
previously, but in this chapter we’ll talk about it in depth.</li>
|
||
<li>A <em>hash map</em> allows you to associate a value with a particular key. It’s a
|
||
particular implementation of the more general data structure called a <em>map</em>.</li>
|
||
</ul>
|
||
<p>To learn about the other kinds of collections provided by the standard library,
|
||
see <a href="../std/collections/index.html">the documentation</a>.</p>
|
||
<p>We’ll discuss how to create and update vectors, strings, and hash maps, as well
|
||
as what makes each special.</p>
|
||
<h2><a class="header" href="#storing-lists-of-values-with-vectors" id="storing-lists-of-values-with-vectors">Storing Lists of Values with Vectors</a></h2>
|
||
<p>The first collection type we’ll look at is <code>Vec<T></code>, also known as a <em>vector</em>.
|
||
Vectors allow you to store more than one value in a single data structure that
|
||
puts all the values next to each other in memory. Vectors can only store values
|
||
of the same type. They are useful when you have a list of items, such as the
|
||
lines of text in a file or the prices of items in a shopping cart.</p>
|
||
<h3><a class="header" href="#creating-a-new-vector" id="creating-a-new-vector">Creating a New Vector</a></h3>
|
||
<p>To create a new, empty vector, we can call the <code>Vec::new</code> function, as shown in
|
||
Listing 8-1.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v: Vec<i32> = Vec::new();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-1: Creating a new, empty vector to hold values
|
||
of type <code>i32</code></span></p>
|
||
<p>Note that we added a type annotation here. Because we aren’t inserting any
|
||
values into this vector, Rust doesn’t know what kind of elements we intend to
|
||
store. This is an important point. Vectors are implemented using generics;
|
||
we’ll cover how to use generics with your own types in Chapter 10. For now,
|
||
know that the <code>Vec<T></code> type provided by the standard library can hold any type,
|
||
and when a specific vector holds a specific type, the type is specified within
|
||
angle brackets. In Listing 8-1, we’ve told Rust that the <code>Vec<T></code> in <code>v</code> will
|
||
hold elements of the <code>i32</code> type.</p>
|
||
<p>In more realistic code, Rust can often infer the type of value you want to
|
||
store once you insert values, so you rarely need to do this type annotation.
|
||
It’s more common to create a <code>Vec<T></code> that has initial values, and Rust
|
||
provides the <code>vec!</code> macro for convenience. The macro will create a new vector
|
||
that holds the values you give it. Listing 8-2 creates a new <code>Vec<i32></code> that
|
||
holds the values <code>1</code>, <code>2</code>, and <code>3</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v = vec![1, 2, 3];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-2: Creating a new vector containing
|
||
values</span></p>
|
||
<p>Because we’ve given initial <code>i32</code> values, Rust can infer that the type of <code>v</code>
|
||
is <code>Vec<i32></code>, and the type annotation isn’t necessary. Next, we’ll look at how
|
||
to modify a vector.</p>
|
||
<h3><a class="header" href="#updating-a-vector" id="updating-a-vector">Updating a Vector</a></h3>
|
||
<p>To create a vector and then add elements to it, we can use the <code>push</code> method,
|
||
as shown in Listing 8-3.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut v = Vec::new();
|
||
|
||
v.push(5);
|
||
v.push(6);
|
||
v.push(7);
|
||
v.push(8);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-3: Using the <code>push</code> method to add values to a
|
||
vector</span></p>
|
||
<p>As with any variable, if we want to be able to change its value, we need to
|
||
make it mutable using the <code>mut</code> keyword, as discussed in Chapter 3. The numbers
|
||
we place inside are all of type <code>i32</code>, and Rust infers this from the data, so
|
||
we don’t need the <code>Vec<i32></code> annotation.</p>
|
||
<h3><a class="header" href="#dropping-a-vector-drops-its-elements" id="dropping-a-vector-drops-its-elements">Dropping a Vector Drops Its Elements</a></h3>
|
||
<p>Like any other <code>struct</code>, a vector is freed when it goes out of scope, as
|
||
annotated in Listing 8-4.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>{
|
||
let v = vec![1, 2, 3, 4];
|
||
|
||
// do stuff with v
|
||
|
||
} // <- v goes out of scope and is freed here
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-4: Showing where the vector and its elements
|
||
are dropped</span></p>
|
||
<p>When the vector gets dropped, all of its contents are also dropped, meaning
|
||
those integers it holds will be cleaned up. This may seem like a
|
||
straightforward point but can get a bit more complicated when you start to
|
||
introduce references to the elements of the vector. Let’s tackle that next!</p>
|
||
<h3><a class="header" href="#reading-elements-of-vectors" id="reading-elements-of-vectors">Reading Elements of Vectors</a></h3>
|
||
<p>Now that you know how to create, update, and destroy vectors, knowing how to
|
||
read their contents is a good next step. There are two ways to reference a
|
||
value stored in a vector. In the examples, we’ve annotated the types of the
|
||
values that are returned from these functions for extra clarity.</p>
|
||
<p>Listing 8-5 shows both methods of accessing a value in a vector, either with
|
||
indexing syntax or the <code>get</code> method.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v = vec![1, 2, 3, 4, 5];
|
||
|
||
let third: &i32 = &v[2];
|
||
println!("The third element is {}", third);
|
||
|
||
match v.get(2) {
|
||
Some(third) => println!("The third element is {}", third),
|
||
None => println!("There is no third element."),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-5: Using indexing syntax or the <code>get</code> method to
|
||
access an item in a vector</span></p>
|
||
<p>Note two details here. First, we use the index value of <code>2</code> to get the third
|
||
element: vectors are indexed by number, starting at zero. Second, the two ways
|
||
to get the third element are by using <code>&</code> and <code>[]</code>, which gives us a reference,
|
||
or by using the <code>get</code> method with the index passed as an argument, which gives
|
||
us an <code>Option<&T></code>.</p>
|
||
<p>Rust has two ways to reference an element so you can choose how the program
|
||
behaves when you try to use an index value that the vector doesn’t have an
|
||
element for. As an example, let’s see what a program will do if it has a vector
|
||
that holds five elements and then tries to access an element at index 100, as
|
||
shown in Listing 8-6.</p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic panics">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v = vec![1, 2, 3, 4, 5];
|
||
|
||
let does_not_exist = &v[100];
|
||
let does_not_exist = v.get(100);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-6: Attempting to access the element at index
|
||
100 in a vector containing five elements</span></p>
|
||
<p>When we run this code, the first <code>[]</code> method will cause the program to panic
|
||
because it references a nonexistent element. This method is best used when you
|
||
want your program to crash if there’s an attempt to access an element past the
|
||
end of the vector.</p>
|
||
<p>When the <code>get</code> method is passed an index that is outside the vector, it returns
|
||
<code>None</code> without panicking. You would use this method if accessing an element
|
||
beyond the range of the vector happens occasionally under normal circumstances.
|
||
Your code will then have logic to handle having either <code>Some(&element)</code> or
|
||
<code>None</code>, as discussed in Chapter 6. For example, the index could be coming from
|
||
a person entering a number. If they accidentally enter a number that’s too
|
||
large and the program gets a <code>None</code> value, you could tell the user how many
|
||
items are in the current vector and give them another chance to enter a valid
|
||
value. That would be more user-friendly than crashing the program due to a typo!</p>
|
||
<p>When the program has a valid reference, the borrow checker enforces the
|
||
ownership and borrowing rules (covered in Chapter 4) to ensure this reference
|
||
and any other references to the contents of the vector remain valid. Recall the
|
||
rule that states you can’t have mutable and immutable references in the same
|
||
scope. That rule applies in Listing 8-7, where we hold an immutable reference to
|
||
the first element in a vector and try to add an element to the end, which won’t
|
||
work.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let mut v = vec![1, 2, 3, 4, 5];
|
||
|
||
let first = &v[0];
|
||
|
||
v.push(6);
|
||
|
||
println!("The first element is: {}", first);
|
||
</code></pre>
|
||
<p><span class="caption">Listing 8-7: Attempting to add an element to a vector
|
||
while holding a reference to an item</span></p>
|
||
<p>Compiling this code will result in this error:</p>
|
||
<pre><code class="language-text">error[E0502]: cannot borrow `v` as mutable because it is also borrowed as immutable
|
||
--> src/main.rs:6:5
|
||
|
|
||
4 | let first = &v[0];
|
||
| - immutable borrow occurs here
|
||
5 |
|
||
6 | v.push(6);
|
||
| ^^^^^^^^^ mutable borrow occurs here
|
||
7 |
|
||
8 | println!("The first element is: {}", first);
|
||
| ----- immutable borrow later used here
|
||
</code></pre>
|
||
<p>The code in Listing 8-7 might look like it should work: why should a reference
|
||
to the first element care about what changes at the end of the vector? This
|
||
error is due to the way vectors work: adding a new element onto the end of the
|
||
vector might require allocating new memory and copying the old elements to the
|
||
new space, if there isn’t enough room to put all the elements next to each
|
||
other where the vector currently is. In that case, the reference to the first
|
||
element would be pointing to deallocated memory. The borrowing rules prevent
|
||
programs from ending up in that situation.</p>
|
||
<blockquote>
|
||
<p>Note: For more on the implementation details of the <code>Vec<T></code> type, see “The
|
||
Rustonomicon” at https://doc.rust-lang.org/stable/nomicon/vec.html.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#iterating-over-the-values-in-a-vector" id="iterating-over-the-values-in-a-vector">Iterating over the Values in a Vector</a></h3>
|
||
<p>If we want to access each element in a vector in turn, we can iterate through
|
||
all of the elements rather than use indices to access one at a time. Listing
|
||
8-8 shows how to use a <code>for</code> loop to get immutable references to each element
|
||
in a vector of <code>i32</code> values and print them.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v = vec![100, 32, 57];
|
||
for i in &v {
|
||
println!("{}", i);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-8: Printing each element in a vector by
|
||
iterating over the elements using a <code>for</code> loop</span></p>
|
||
<p>We can also iterate over mutable references to each element in a mutable vector
|
||
in order to make changes to all the elements. The <code>for</code> loop in Listing 8-9
|
||
will add <code>50</code> to each element.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut v = vec![100, 32, 57];
|
||
for i in &mut v {
|
||
*i += 50;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-9: Iterating over mutable references to
|
||
elements in a vector</span></p>
|
||
<p>To change the value that the mutable reference refers to, we have to use the
|
||
dereference operator (<code>*</code>) to get to the value in <code>i</code> before we can use the
|
||
<code>+=</code> operator. We’ll talk more about the dereference operator in the
|
||
<a href="ch15-02-deref.html#following-the-pointer-to-the-value-with-the-dereference-operator">“Following the Pointer to the Value with the Dereference Operator”</a>
|
||
section of Chapter 15.</p>
|
||
<h3><a class="header" href="#using-an-enum-to-store-multiple-types" id="using-an-enum-to-store-multiple-types">Using an Enum to Store Multiple Types</a></h3>
|
||
<p>At the beginning of this chapter, we said that vectors can only store values
|
||
that are the same type. This can be inconvenient; there are definitely use
|
||
cases for needing to store a list of items of different types. Fortunately, the
|
||
variants of an enum are defined under the same enum type, so when we need to
|
||
store elements of a different type in a vector, we can define and use an enum!</p>
|
||
<p>For example, say we want to get values from a row in a spreadsheet in which
|
||
some of the columns in the row contain integers, some floating-point numbers,
|
||
and some strings. We can define an enum whose variants will hold the different
|
||
value types, and then all the enum variants will be considered the same type:
|
||
that of the enum. Then we can create a vector that holds that enum and so,
|
||
ultimately, holds different types. We’ve demonstrated this in Listing 8-10.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum SpreadsheetCell {
|
||
Int(i32),
|
||
Float(f64),
|
||
Text(String),
|
||
}
|
||
|
||
let row = vec![
|
||
SpreadsheetCell::Int(3),
|
||
SpreadsheetCell::Text(String::from("blue")),
|
||
SpreadsheetCell::Float(10.12),
|
||
];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-10: Defining an <code>enum</code> to store values of
|
||
different types in one vector</span></p>
|
||
<p>Rust needs to know what types will be in the vector at compile time so it knows
|
||
exactly how much memory on the heap will be needed to store each element. A
|
||
secondary advantage is that we can be explicit about what types are allowed in
|
||
this vector. If Rust allowed a vector to hold any type, there would be a chance
|
||
that one or more of the types would cause errors with the operations performed
|
||
on the elements of the vector. Using an enum plus a <code>match</code> expression means
|
||
that Rust will ensure at compile time that every possible case is handled, as
|
||
discussed in Chapter 6.</p>
|
||
<p>When you’re writing a program, if you don’t know the exhaustive set of types
|
||
the program will get at runtime to store in a vector, the enum technique won’t
|
||
work. Instead, you can use a trait object, which we’ll cover in Chapter 17.</p>
|
||
<p>Now that we’ve discussed some of the most common ways to use vectors, be sure
|
||
to review the API documentation for all the many useful methods defined on
|
||
<code>Vec<T></code> by the standard library. For example, in addition to <code>push</code>, a <code>pop</code>
|
||
method removes and returns the last element. Let’s move on to the next
|
||
collection type: <code>String</code>!</p>
|
||
<h2><a class="header" href="#storing-utf-8-encoded-text-with-strings" id="storing-utf-8-encoded-text-with-strings">Storing UTF-8 Encoded Text with Strings</a></h2>
|
||
<p>We talked about strings in Chapter 4, but we’ll look at them in more depth now.
|
||
New Rustaceans commonly get stuck on strings for a combination of three
|
||
reasons: Rust’s propensity for exposing possible errors, strings being a more
|
||
complicated data structure than many programmers give them credit for, and
|
||
UTF-8. These factors combine in a way that can seem difficult when you’re
|
||
coming from other programming languages.</p>
|
||
<p>It’s useful to discuss strings in the context of collections because strings
|
||
are implemented as a collection of bytes, plus some methods to provide useful
|
||
functionality when those bytes are interpreted as text. In this section, we’ll
|
||
talk about the operations on <code>String</code> that every collection type has, such as
|
||
creating, updating, and reading. We’ll also discuss the ways in which <code>String</code>
|
||
is different from the other collections, namely how indexing into a <code>String</code> is
|
||
complicated by the differences between how people and computers interpret
|
||
<code>String</code> data.</p>
|
||
<h3><a class="header" href="#what-is-a-string" id="what-is-a-string">What Is a String?</a></h3>
|
||
<p>We’ll first define what we mean by the term <em>string</em>. Rust has only one string
|
||
type in the core language, which is the string slice <code>str</code> that is usually seen
|
||
in its borrowed form <code>&str</code>. In Chapter 4, we talked about <em>string slices</em>,
|
||
which are references to some UTF-8 encoded string data stored elsewhere. String
|
||
literals, for example, are stored in the program’s binary and are therefore
|
||
string slices.</p>
|
||
<p>The <code>String</code> type, which is provided by Rust’s standard library rather than
|
||
coded into the core language, is a growable, mutable, owned, UTF-8 encoded
|
||
string type. When Rustaceans refer to “strings” in Rust, they usually mean the
|
||
<code>String</code> and the string slice <code>&str</code> types, not just one of those types.
|
||
Although this section is largely about <code>String</code>, both types are used heavily in
|
||
Rust’s standard library, and both <code>String</code> and string slices are UTF-8 encoded.</p>
|
||
<p>Rust’s standard library also includes a number of other string types, such as
|
||
<code>OsString</code>, <code>OsStr</code>, <code>CString</code>, and <code>CStr</code>. Library crates can provide even
|
||
more options for storing string data. See how those names all end in <code>String</code>
|
||
or <code>Str</code>? They refer to owned and borrowed variants, just like the <code>String</code> and
|
||
<code>str</code> types you’ve seen previously. These string types can store text in
|
||
different encodings or be represented in memory in a different way, for
|
||
example. We won’t discuss these other string types in this chapter; see their
|
||
API documentation for more about how to use them and when each is appropriate.</p>
|
||
<h3><a class="header" href="#creating-a-new-string" id="creating-a-new-string">Creating a New String</a></h3>
|
||
<p>Many of the same operations available with <code>Vec<T></code> are available with <code>String</code>
|
||
as well, starting with the <code>new</code> function to create a string, shown in Listing
|
||
8-11.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s = String::new();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-11: Creating a new, empty <code>String</code></span></p>
|
||
<p>This line creates a new empty string called <code>s</code>, which we can then load data
|
||
into. Often, we’ll have some initial data that we want to start the string
|
||
with. For that, we use the <code>to_string</code> method, which is available on any type
|
||
that implements the <code>Display</code> trait, as string literals do. Listing 8-12 shows
|
||
two examples.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let data = "initial contents";
|
||
|
||
let s = data.to_string();
|
||
|
||
// the method also works on a literal directly:
|
||
let s = "initial contents".to_string();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-12: Using the <code>to_string</code> method to create a
|
||
<code>String</code> from a string literal</span></p>
|
||
<p>This code creates a string containing <code>initial contents</code>.</p>
|
||
<p>We can also use the function <code>String::from</code> to create a <code>String</code> from a string
|
||
literal. The code in Listing 8-13 is equivalent to the code from Listing 8-12
|
||
that uses <code>to_string</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = String::from("initial contents");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-13: Using the <code>String::from</code> function to create
|
||
a <code>String</code> from a string literal</span></p>
|
||
<p>Because strings are used for so many things, we can use many different generic
|
||
APIs for strings, providing us with a lot of options. Some of them can seem
|
||
redundant, but they all have their place! In this case, <code>String::from</code> and
|
||
<code>to_string</code> do the same thing, so which you choose is a matter of style.</p>
|
||
<p>Remember that strings are UTF-8 encoded, so we can include any properly encoded
|
||
data in them, as shown in Listing 8-14.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let hello = String::from("السلام عليكم");
|
||
let hello = String::from("Dobrý den");
|
||
let hello = String::from("Hello");
|
||
let hello = String::from("שָׁלוֹם");
|
||
let hello = String::from("नमस्ते");
|
||
let hello = String::from("こんにちは");
|
||
let hello = String::from("안녕하세요");
|
||
let hello = String::from("你好");
|
||
let hello = String::from("Olá");
|
||
let hello = String::from("Здравствуйте");
|
||
let hello = String::from("Hola");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-14: Storing greetings in different languages in
|
||
strings</span></p>
|
||
<p>All of these are valid <code>String</code> values.</p>
|
||
<h3><a class="header" href="#updating-a-string" id="updating-a-string">Updating a String</a></h3>
|
||
<p>A <code>String</code> can grow in size and its contents can change, just like the contents
|
||
of a <code>Vec<T></code>, if you push more data into it. In addition, you can conveniently
|
||
use the <code>+</code> operator or the <code>format!</code> macro to concatenate <code>String</code> values.</p>
|
||
<h4><a class="header" href="#appending-to-a-string-with-push_str-and-push" id="appending-to-a-string-with-push_str-and-push">Appending to a String with <code>push_str</code> and <code>push</code></a></h4>
|
||
<p>We can grow a <code>String</code> by using the <code>push_str</code> method to append a string slice,
|
||
as shown in Listing 8-15.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s = String::from("foo");
|
||
s.push_str("bar");
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-15: Appending a string slice to a <code>String</code>
|
||
using the <code>push_str</code> method</span></p>
|
||
<p>After these two lines, <code>s</code> will contain <code>foobar</code>. The <code>push_str</code> method takes a
|
||
string slice because we don’t necessarily want to take ownership of the
|
||
parameter. For example, the code in Listing 8-16 shows that it would be
|
||
unfortunate if we weren’t able to use <code>s2</code> after appending its contents to <code>s1</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s1 = String::from("foo");
|
||
let s2 = "bar";
|
||
s1.push_str(s2);
|
||
println!("s2 is {}", s2);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-16: Using a string slice after appending its
|
||
contents to a <code>String</code></span></p>
|
||
<p>If the <code>push_str</code> method took ownership of <code>s2</code>, we wouldn’t be able to print
|
||
its value on the last line. However, this code works as we’d expect!</p>
|
||
<p>The <code>push</code> method takes a single character as a parameter and adds it to the
|
||
<code>String</code>. Listing 8-17 shows code that adds the letter <em>l</em> to a <code>String</code> using
|
||
the <code>push</code> method.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut s = String::from("lo");
|
||
s.push('l');
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-17: Adding one character to a <code>String</code> value
|
||
using <code>push</code></span></p>
|
||
<p>As a result of this code, <code>s</code> will contain <code>lol</code>.</p>
|
||
<h4><a class="header" href="#concatenation-with-the--operator-or-the-format-macro" id="concatenation-with-the--operator-or-the-format-macro">Concatenation with the <code>+</code> Operator or the <code>format!</code> Macro</a></h4>
|
||
<p>Often, you’ll want to combine two existing strings. One way is to use the <code>+</code>
|
||
operator, as shown in Listing 8-18.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s1 = String::from("Hello, ");
|
||
let s2 = String::from("world!");
|
||
let s3 = s1 + &s2; // note s1 has been moved here and can no longer be used
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-18: Using the <code>+</code> operator to combine two
|
||
<code>String</code> values into a new <code>String</code> value</span></p>
|
||
<p>The string <code>s3</code> will contain <code>Hello, world!</code> as a result of this code. The
|
||
reason <code>s1</code> is no longer valid after the addition and the reason we used a
|
||
reference to <code>s2</code> has to do with the signature of the method that gets called
|
||
when we use the <code>+</code> operator. The <code>+</code> operator uses the <code>add</code> method, whose
|
||
signature looks something like this:</p>
|
||
<pre><code class="language-rust ignore">fn add(self, s: &str) -> String {
|
||
</code></pre>
|
||
<p>This isn’t the exact signature that’s in the standard library: in the standard
|
||
library, <code>add</code> is defined using generics. Here, we’re looking at the signature
|
||
of <code>add</code> with concrete types substituted for the generic ones, which is what
|
||
happens when we call this method with <code>String</code> values. We’ll discuss generics
|
||
in Chapter 10. This signature gives us the clues we need to understand the
|
||
tricky bits of the <code>+</code> operator.</p>
|
||
<p>First, <code>s2</code> has an <code>&</code>, meaning that we’re adding a <em>reference</em> of the second
|
||
string to the first string because of the <code>s</code> parameter in the <code>add</code> function:
|
||
we can only add a <code>&str</code> to a <code>String</code>; we can’t add two <code>String</code> values
|
||
together. But wait—the type of <code>&s2</code> is <code>&String</code>, not <code>&str</code>, as specified in
|
||
the second parameter to <code>add</code>. So why does Listing 8-18 compile?</p>
|
||
<p>The reason we’re able to use <code>&s2</code> in the call to <code>add</code> is that the compiler
|
||
can <em>coerce</em> the <code>&String</code> argument into a <code>&str</code>. When we call the <code>add</code>
|
||
method, Rust uses a <em>deref coercion</em>, which here turns <code>&s2</code> into <code>&s2[..]</code>.
|
||
We’ll discuss deref coercion in more depth in Chapter 15. Because <code>add</code> does
|
||
not take ownership of the <code>s</code> parameter, <code>s2</code> will still be a valid <code>String</code>
|
||
after this operation.</p>
|
||
<p>Second, we can see in the signature that <code>add</code> takes ownership of <code>self</code>,
|
||
because <code>self</code> does <em>not</em> have an <code>&</code>. This means <code>s1</code> in Listing 8-18 will be
|
||
moved into the <code>add</code> call and no longer be valid after that. So although <code>let s3 = s1 + &s2;</code> looks like it will copy both strings and create a new one, this
|
||
statement actually takes ownership of <code>s1</code>, appends a copy of the contents of
|
||
<code>s2</code>, and then returns ownership of the result. In other words, it looks like
|
||
it’s making a lot of copies but isn’t; the implementation is more efficient
|
||
than copying.</p>
|
||
<p>If we need to concatenate multiple strings, the behavior of the <code>+</code> operator
|
||
gets unwieldy:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s1 = String::from("tic");
|
||
let s2 = String::from("tac");
|
||
let s3 = String::from("toe");
|
||
|
||
let s = s1 + "-" + &s2 + "-" + &s3;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>At this point, <code>s</code> will be <code>tic-tac-toe</code>. With all of the <code>+</code> and <code>"</code>
|
||
characters, it’s difficult to see what’s going on. For more complicated string
|
||
combining, we can use the <code>format!</code> macro:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s1 = String::from("tic");
|
||
let s2 = String::from("tac");
|
||
let s3 = String::from("toe");
|
||
|
||
let s = format!("{}-{}-{}", s1, s2, s3);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code also sets <code>s</code> to <code>tic-tac-toe</code>. The <code>format!</code> macro works in the same
|
||
way as <code>println!</code>, but instead of printing the output to the screen, it returns
|
||
a <code>String</code> with the contents. The version of the code using <code>format!</code> is much
|
||
easier to read and doesn’t take ownership of any of its parameters.</p>
|
||
<h3><a class="header" href="#indexing-into-strings" id="indexing-into-strings">Indexing into Strings</a></h3>
|
||
<p>In many other programming languages, accessing individual characters in a
|
||
string by referencing them by index is a valid and common operation. However,
|
||
if you try to access parts of a <code>String</code> using indexing syntax in Rust, you’ll
|
||
get an error. Consider the invalid code in Listing 8-19.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let s1 = String::from("hello");
|
||
let h = s1[0];
|
||
</code></pre>
|
||
<p><span class="caption">Listing 8-19: Attempting to use indexing syntax with a
|
||
String</span></p>
|
||
<p>This code will result in the following error:</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `std::string::String: std::ops::Index<{integer}>` is not satisfied
|
||
-->
|
||
|
|
||
3 | let h = s1[0];
|
||
| ^^^^^ the type `std::string::String` cannot be indexed by `{integer}`
|
||
|
|
||
= help: the trait `std::ops::Index<{integer}>` is not implemented for `std::string::String`
|
||
</code></pre>
|
||
<p>The error and the note tell the story: Rust strings don’t support indexing. But
|
||
why not? To answer that question, we need to discuss how Rust stores strings in
|
||
memory.</p>
|
||
<h4><a class="header" href="#internal-representation" id="internal-representation">Internal Representation</a></h4>
|
||
<p>A <code>String</code> is a wrapper over a <code>Vec<u8></code>. Let’s look at some of our properly
|
||
encoded UTF-8 example strings from Listing 8-14. First, this one:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let len = String::from("Hola").len();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>In this case, <code>len</code> will be 4, which means the vector storing the string “Hola”
|
||
is 4 bytes long. Each of these letters takes 1 byte when encoded in UTF-8. But
|
||
what about the following line? (Note that this string begins with the capital
|
||
Cyrillic letter Ze, not the Arabic number 3.)</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let len = String::from("Здравствуйте").len();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Asked how long the string is, you might say 12. However, Rust’s answer is 24:
|
||
that’s the number of bytes it takes to encode “Здравствуйте” in UTF-8, because
|
||
each Unicode scalar value in that string takes 2 bytes of storage. Therefore,
|
||
an index into the string’s bytes will not always correlate to a valid Unicode
|
||
scalar value. To demonstrate, consider this invalid Rust code:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let hello = "Здравствуйте";
|
||
let answer = &hello[0];
|
||
</code></pre>
|
||
<p>What should the value of <code>answer</code> be? Should it be <code>З</code>, the first letter? When
|
||
encoded in UTF-8, the first byte of <code>З</code> is <code>208</code> and the second is <code>151</code>, so
|
||
<code>answer</code> should in fact be <code>208</code>, but <code>208</code> is not a valid character on its
|
||
own. Returning <code>208</code> is likely not what a user would want if they asked for the
|
||
first letter of this string; however, that’s the only data that Rust has at
|
||
byte index 0. Users generally don’t want the byte value returned, even if the
|
||
string contains only Latin letters: if <code>&"hello"[0]</code> were valid code that
|
||
returned the byte value, it would return <code>104</code>, not <code>h</code>. To avoid returning an
|
||
unexpected value and causing bugs that might not be discovered immediately,
|
||
Rust doesn’t compile this code at all and prevents misunderstandings early in
|
||
the development process.</p>
|
||
<h4><a class="header" href="#bytes-and-scalar-values-and-grapheme-clusters-oh-my" id="bytes-and-scalar-values-and-grapheme-clusters-oh-my">Bytes and Scalar Values and Grapheme Clusters! Oh My!</a></h4>
|
||
<p>Another point about UTF-8 is that there are actually three relevant ways to
|
||
look at strings from Rust’s perspective: as bytes, scalar values, and grapheme
|
||
clusters (the closest thing to what we would call <em>letters</em>).</p>
|
||
<p>If we look at the Hindi word “नमस्ते” written in the Devanagari script, it is
|
||
stored as a vector of <code>u8</code> values that looks like this:</p>
|
||
<pre><code class="language-text">[224, 164, 168, 224, 164, 174, 224, 164, 184, 224, 165, 141, 224, 164, 164,
|
||
224, 165, 135]
|
||
</code></pre>
|
||
<p>That’s 18 bytes and is how computers ultimately store this data. If we look at
|
||
them as Unicode scalar values, which are what Rust’s <code>char</code> type is, those
|
||
bytes look like this:</p>
|
||
<pre><code class="language-text">['न', 'म', 'स', '्', 'त', 'े']
|
||
</code></pre>
|
||
<p>There are six <code>char</code> values here, but the fourth and sixth are not letters:
|
||
they’re diacritics that don’t make sense on their own. Finally, if we look at
|
||
them as grapheme clusters, we’d get what a person would call the four letters
|
||
that make up the Hindi word:</p>
|
||
<pre><code class="language-text">["न", "म", "स्", "ते"]
|
||
</code></pre>
|
||
<p>Rust provides different ways of interpreting the raw string data that computers
|
||
store so that each program can choose the interpretation it needs, no matter
|
||
what human language the data is in.</p>
|
||
<p>A final reason Rust doesn’t allow us to index into a <code>String</code> to get a
|
||
character is that indexing operations are expected to always take constant time
|
||
(O(1)). But it isn’t possible to guarantee that performance with a <code>String</code>,
|
||
because Rust would have to walk through the contents from the beginning to the
|
||
index to determine how many valid characters there were.</p>
|
||
<h3><a class="header" href="#slicing-strings" id="slicing-strings">Slicing Strings</a></h3>
|
||
<p>Indexing into a string is often a bad idea because it’s not clear what the
|
||
return type of the string-indexing operation should be: a byte value, a
|
||
character, a grapheme cluster, or a string slice. Therefore, Rust asks you to
|
||
be more specific if you really need to use indices to create string slices. To
|
||
be more specific in your indexing and indicate that you want a string slice,
|
||
rather than indexing using <code>[]</code> with a single number, you can use <code>[]</code> with a
|
||
range to create a string slice containing particular bytes:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let hello = "Здравствуйте";
|
||
|
||
let s = &hello[0..4];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Here, <code>s</code> will be a <code>&str</code> that contains the first 4 bytes of the string.
|
||
Earlier, we mentioned that each of these characters was 2 bytes, which means
|
||
<code>s</code> will be <code>Зд</code>.</p>
|
||
<p>What would happen if we used <code>&hello[0..1]</code>? The answer: Rust would panic at
|
||
runtime in the same way as if an invalid index were accessed in a vector:</p>
|
||
<pre><code class="language-text">thread 'main' panicked at 'byte index 1 is not a char boundary; it is inside 'З' (bytes 0..2) of `Здравствуйте`', src/libcore/str/mod.rs:2188:4
|
||
</code></pre>
|
||
<p>You should use ranges to create string slices with caution, because doing so
|
||
can crash your program.</p>
|
||
<h3><a class="header" href="#methods-for-iterating-over-strings" id="methods-for-iterating-over-strings">Methods for Iterating Over Strings</a></h3>
|
||
<p>Fortunately, you can access elements in a string in other ways.</p>
|
||
<p>If you need to perform operations on individual Unicode scalar values, the best
|
||
way to do so is to use the <code>chars</code> method. Calling <code>chars</code> on “नमस्ते” separates
|
||
out and returns six values of type <code>char</code>, and you can iterate over the result
|
||
to access each element:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>for c in "नमस्ते".chars() {
|
||
println!("{}", c);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code will print the following:</p>
|
||
<pre><code class="language-text">न
|
||
म
|
||
स
|
||
्
|
||
त
|
||
े
|
||
</code></pre>
|
||
<p>The <code>bytes</code> method returns each raw byte, which might be appropriate for your
|
||
domain:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>for b in "नमस्ते".bytes() {
|
||
println!("{}", b);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code will print the 18 bytes that make up this <code>String</code>:</p>
|
||
<pre><code class="language-text">224
|
||
164
|
||
// --snip--
|
||
165
|
||
135
|
||
</code></pre>
|
||
<p>But be sure to remember that valid Unicode scalar values may be made up of more
|
||
than 1 byte.</p>
|
||
<p>Getting grapheme clusters from strings is complex, so this functionality is not
|
||
provided by the standard library. Crates are available on
|
||
<a href="https://crates.io/">crates.io</a> if this is the functionality you need.</p>
|
||
<h3><a class="header" href="#strings-are-not-so-simple" id="strings-are-not-so-simple">Strings Are Not So Simple</a></h3>
|
||
<p>To summarize, strings are complicated. Different programming languages make
|
||
different choices about how to present this complexity to the programmer. Rust
|
||
has chosen to make the correct handling of <code>String</code> data the default behavior
|
||
for all Rust programs, which means programmers have to put more thought into
|
||
handling UTF-8 data upfront. This trade-off exposes more of the complexity of
|
||
strings than is apparent in other programming languages, but it prevents you
|
||
from having to handle errors involving non-ASCII characters later in your
|
||
development life cycle.</p>
|
||
<p>Let’s switch to something a bit less complex: hash maps!</p>
|
||
<h2><a class="header" href="#storing-keys-with-associated-values-in-hash-maps" id="storing-keys-with-associated-values-in-hash-maps">Storing Keys with Associated Values in Hash Maps</a></h2>
|
||
<p>The last of our common collections is the <em>hash map</em>. The type <code>HashMap<K, V></code>
|
||
stores a mapping of keys of type <code>K</code> to values of type <code>V</code>. It does this via a
|
||
<em>hashing function</em>, which determines how it places these keys and values into
|
||
memory. Many programming languages support this kind of data structure, but
|
||
they often use a different name, such as hash, map, object, hash table,
|
||
dictionary, or associative array, just to name a few.</p>
|
||
<p>Hash maps are useful when you want to look up data not by using an index, as
|
||
you can with vectors, but by using a key that can be of any type. For example,
|
||
in a game, you could keep track of each team’s score in a hash map in which
|
||
each key is a team’s name and the values are each team’s score. Given a team
|
||
name, you can retrieve its score.</p>
|
||
<p>We’ll go over the basic API of hash maps in this section, but many more goodies
|
||
are hiding in the functions defined on <code>HashMap<K, V></code> by the standard library.
|
||
As always, check the standard library documentation for more information.</p>
|
||
<h3><a class="header" href="#creating-a-new-hash-map" id="creating-a-new-hash-map">Creating a New Hash Map</a></h3>
|
||
<p>You can create an empty hash map with <code>new</code> and add elements with <code>insert</code>. In
|
||
Listing 8-20, we’re keeping track of the scores of two teams whose names are
|
||
Blue and Yellow. The Blue team starts with 10 points, and the Yellow team
|
||
starts with 50.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let mut scores = HashMap::new();
|
||
|
||
scores.insert(String::from("Blue"), 10);
|
||
scores.insert(String::from("Yellow"), 50);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-20: Creating a new hash map and inserting some
|
||
keys and values</span></p>
|
||
<p>Note that we need to first <code>use</code> the <code>HashMap</code> from the collections portion of
|
||
the standard library. Of our three common collections, this one is the least
|
||
often used, so it’s not included in the features brought into scope
|
||
automatically in the prelude. Hash maps also have less support from the
|
||
standard library; there’s no built-in macro to construct them, for example.</p>
|
||
<p>Just like vectors, hash maps store their data on the heap. This <code>HashMap</code> has
|
||
keys of type <code>String</code> and values of type <code>i32</code>. Like vectors, hash maps are
|
||
homogeneous: all of the keys must have the same type, and all of the values
|
||
must have the same type.</p>
|
||
<p>Another way of constructing a hash map is by using the <code>collect</code> method on a
|
||
vector of tuples, where each tuple consists of a key and its value. The
|
||
<code>collect</code> method gathers data into a number of collection types, including
|
||
<code>HashMap</code>. For example, if we had the team names and initial scores in two
|
||
separate vectors, we could use the <code>zip</code> method to create a vector of tuples
|
||
where “Blue” is paired with 10, and so forth. Then we could use the <code>collect</code>
|
||
method to turn that vector of tuples into a hash map, as shown in Listing 8-21.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let teams = vec![String::from("Blue"), String::from("Yellow")];
|
||
let initial_scores = vec![10, 50];
|
||
|
||
let scores: HashMap<_, _> = teams.iter().zip(initial_scores.iter()).collect();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-21: Creating a hash map from a list of teams
|
||
and a list of scores</span></p>
|
||
<p>The type annotation <code>HashMap<_, _></code> is needed here because it’s possible to
|
||
<code>collect</code> into many different data structures and Rust doesn’t know which you
|
||
want unless you specify. For the parameters for the key and value types,
|
||
however, we use underscores, and Rust can infer the types that the hash map
|
||
contains based on the types of the data in the vectors.</p>
|
||
<h3><a class="header" href="#hash-maps-and-ownership" id="hash-maps-and-ownership">Hash Maps and Ownership</a></h3>
|
||
<p>For types that implement the <code>Copy</code> trait, like <code>i32</code>, the values are copied
|
||
into the hash map. For owned values like <code>String</code>, the values will be moved and
|
||
the hash map will be the owner of those values, as demonstrated in Listing 8-22.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let field_name = String::from("Favorite color");
|
||
let field_value = String::from("Blue");
|
||
|
||
let mut map = HashMap::new();
|
||
map.insert(field_name, field_value);
|
||
// field_name and field_value are invalid at this point, try using them and
|
||
// see what compiler error you get!
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-22: Showing that keys and values are owned by
|
||
the hash map once they’re inserted</span></p>
|
||
<p>We aren’t able to use the variables <code>field_name</code> and <code>field_value</code> after
|
||
they’ve been moved into the hash map with the call to <code>insert</code>.</p>
|
||
<p>If we insert references to values into the hash map, the values won’t be moved
|
||
into the hash map. The values that the references point to must be valid for at
|
||
least as long as the hash map is valid. We’ll talk more about these issues in
|
||
the <a href="ch10-03-lifetime-syntax.html#validating-references-with-lifetimes">“Validating References with
|
||
Lifetimes”</a><!-- ignore --> section in
|
||
Chapter 10.</p>
|
||
<h3><a class="header" href="#accessing-values-in-a-hash-map" id="accessing-values-in-a-hash-map">Accessing Values in a Hash Map</a></h3>
|
||
<p>We can get a value out of the hash map by providing its key to the <code>get</code>
|
||
method, as shown in Listing 8-23.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let mut scores = HashMap::new();
|
||
|
||
scores.insert(String::from("Blue"), 10);
|
||
scores.insert(String::from("Yellow"), 50);
|
||
|
||
let team_name = String::from("Blue");
|
||
let score = scores.get(&team_name);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-23: Accessing the score for the Blue team
|
||
stored in the hash map</span></p>
|
||
<p>Here, <code>score</code> will have the value that’s associated with the Blue team, and the
|
||
result will be <code>Some(&10)</code>. The result is wrapped in <code>Some</code> because <code>get</code>
|
||
returns an <code>Option<&V></code>; if there’s no value for that key in the hash map,
|
||
<code>get</code> will return <code>None</code>. The program will need to handle the <code>Option</code> in one
|
||
of the ways that we covered in Chapter 6.</p>
|
||
<p>We can iterate over each key/value pair in a hash map in a similar manner as we
|
||
do with vectors, using a <code>for</code> loop:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let mut scores = HashMap::new();
|
||
|
||
scores.insert(String::from("Blue"), 10);
|
||
scores.insert(String::from("Yellow"), 50);
|
||
|
||
for (key, value) in &scores {
|
||
println!("{}: {}", key, value);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code will print each pair in an arbitrary order:</p>
|
||
<pre><code class="language-text">Yellow: 50
|
||
Blue: 10
|
||
</code></pre>
|
||
<h3><a class="header" href="#updating-a-hash-map" id="updating-a-hash-map">Updating a Hash Map</a></h3>
|
||
<p>Although the number of keys and values is growable, each key can only have one
|
||
value associated with it at a time. When you want to change the data in a hash
|
||
map, you have to decide how to handle the case when a key already has a value
|
||
assigned. You could replace the old value with the new value, completely
|
||
disregarding the old value. You could keep the old value and ignore the new
|
||
value, only adding the new value if the key <em>doesn’t</em> already have a value. Or
|
||
you could combine the old value and the new value. Let’s look at how to do each
|
||
of these!</p>
|
||
<h4><a class="header" href="#overwriting-a-value" id="overwriting-a-value">Overwriting a Value</a></h4>
|
||
<p>If we insert a key and a value into a hash map and then insert that same key
|
||
with a different value, the value associated with that key will be replaced.
|
||
Even though the code in Listing 8-24 calls <code>insert</code> twice, the hash map will
|
||
only contain one key/value pair because we’re inserting the value for the Blue
|
||
team’s key both times.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let mut scores = HashMap::new();
|
||
|
||
scores.insert(String::from("Blue"), 10);
|
||
scores.insert(String::from("Blue"), 25);
|
||
|
||
println!("{:?}", scores);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-24: Replacing a value stored with a particular
|
||
key</span></p>
|
||
<p>This code will print <code>{"Blue": 25}</code>. The original value of <code>10</code> has been
|
||
overwritten.</p>
|
||
<h4><a class="header" href="#only-inserting-a-value-if-the-key-has-no-value" id="only-inserting-a-value-if-the-key-has-no-value">Only Inserting a Value If the Key Has No Value</a></h4>
|
||
<p>It’s common to check whether a particular key has a value and, if it doesn’t,
|
||
insert a value for it. Hash maps have a special API for this called <code>entry</code>
|
||
that takes the key you want to check as a parameter. The return value of the
|
||
<code>entry</code> method is an enum called <code>Entry</code> that represents a value that might or
|
||
might not exist. Let’s say we want to check whether the key for the Yellow team
|
||
has a value associated with it. If it doesn’t, we want to insert the value 50,
|
||
and the same for the Blue team. Using the <code>entry</code> API, the code looks like
|
||
Listing 8-25.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let mut scores = HashMap::new();
|
||
scores.insert(String::from("Blue"), 10);
|
||
|
||
scores.entry(String::from("Yellow")).or_insert(50);
|
||
scores.entry(String::from("Blue")).or_insert(50);
|
||
|
||
println!("{:?}", scores);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-25: Using the <code>entry</code> method to only insert if
|
||
the key does not already have a value</span></p>
|
||
<p>The <code>or_insert</code> method on <code>Entry</code> is defined to return a mutable reference to
|
||
the value for the corresponding <code>Entry</code> key if that key exists, and if not,
|
||
inserts the parameter as the new value for this key and returns a mutable
|
||
reference to the new value. This technique is much cleaner than writing the
|
||
logic ourselves and, in addition, plays more nicely with the borrow checker.</p>
|
||
<p>Running the code in Listing 8-25 will print <code>{"Yellow": 50, "Blue": 10}</code>. The
|
||
first call to <code>entry</code> will insert the key for the Yellow team with the value
|
||
50 because the Yellow team doesn’t have a value already. The second call to
|
||
<code>entry</code> will not change the hash map because the Blue team already has the
|
||
value 10.</p>
|
||
<h4><a class="header" href="#updating-a-value-based-on-the-old-value" id="updating-a-value-based-on-the-old-value">Updating a Value Based on the Old Value</a></h4>
|
||
<p>Another common use case for hash maps is to look up a key’s value and then
|
||
update it based on the old value. For instance, Listing 8-26 shows code that
|
||
counts how many times each word appears in some text. We use a hash map with
|
||
the words as keys and increment the value to keep track of how many times we’ve
|
||
seen that word. If it’s the first time we’ve seen a word, we’ll first insert
|
||
the value 0.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::collections::HashMap;
|
||
|
||
let text = "hello world wonderful world";
|
||
|
||
let mut map = HashMap::new();
|
||
|
||
for word in text.split_whitespace() {
|
||
let count = map.entry(word).or_insert(0);
|
||
*count += 1;
|
||
}
|
||
|
||
println!("{:?}", map);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 8-26: Counting occurrences of words using a hash
|
||
map that stores words and counts</span></p>
|
||
<p>This code will print <code>{"world": 2, "hello": 1, "wonderful": 1}</code>. The
|
||
<code>or_insert</code> method actually returns a mutable reference (<code>&mut V</code>) to the value
|
||
for this key. Here we store that mutable reference in the <code>count</code> variable, so
|
||
in order to assign to that value, we must first dereference <code>count</code> using the
|
||
asterisk (<code>*</code>). The mutable reference goes out of scope at the end of the <code>for</code>
|
||
loop, so all of these changes are safe and allowed by the borrowing rules.</p>
|
||
<h3><a class="header" href="#hashing-functions" id="hashing-functions">Hashing Functions</a></h3>
|
||
<p>By default, <code>HashMap</code> uses a “cryptographically strong”<sup class="footnote-reference"><a href="#siphash">1</a></sup> hashing
|
||
function that can provide resistance to Denial of Service (DoS) attacks. This
|
||
is not the fastest hashing algorithm available, but the trade-off for better
|
||
security that comes with the drop in performance is worth it. If you profile
|
||
your code and find that the default hash function is too slow for your
|
||
purposes, you can switch to another function by specifying a different
|
||
<em>hasher</em>. A hasher is a type that implements the <code>BuildHasher</code> trait. We’ll
|
||
talk about traits and how to implement them in Chapter 10. You don’t
|
||
necessarily have to implement your own hasher from scratch;
|
||
<a href="https://crates.io/">crates.io</a> has libraries shared by other Rust users that
|
||
provide hashers implementing many common hashing algorithms.</p>
|
||
<div class="footnote-definition" id="siphash"><sup class="footnote-definition-label">1</sup>
|
||
<p><a href="https://www.131002.net/siphash/siphash.pdf">https://www.131002.net/siphash/siphash.pdf</a></p>
|
||
</div>
|
||
<h2><a class="header" href="#summary-7" id="summary-7">Summary</a></h2>
|
||
<p>Vectors, strings, and hash maps will provide a large amount of functionality
|
||
necessary in programs when you need to store, access, and modify data. Here are
|
||
some exercises you should now be equipped to solve:</p>
|
||
<ul>
|
||
<li>Given a list of integers, use a vector and return the mean (the average
|
||
value), median (when sorted, the value in the middle position), and mode (the
|
||
value that occurs most often; a hash map will be helpful here) of the list.</li>
|
||
<li>Convert strings to pig latin. The first consonant of each word is moved to
|
||
the end of the word and “ay” is added, so “first” becomes “irst-fay.” Words
|
||
that start with a vowel have “hay” added to the end instead (“apple” becomes
|
||
“apple-hay”). Keep in mind the details about UTF-8 encoding!</li>
|
||
<li>Using a hash map and vectors, create a text interface to allow a user to add
|
||
employee names to a department in a company. For example, “Add Sally to
|
||
Engineering” or “Add Amir to Sales.” Then let the user retrieve a list of all
|
||
people in a department or all people in the company by department, sorted
|
||
alphabetically.</li>
|
||
</ul>
|
||
<p>The standard library API documentation describes methods that vectors, strings,
|
||
and hash maps have that will be helpful for these exercises!</p>
|
||
<p>We’re getting into more complex programs in which operations can fail, so, it’s
|
||
a perfect time to discuss error handling. We’ll do that next!</p>
|
||
<h1><a class="header" href="#error-handling" id="error-handling">Error Handling</a></h1>
|
||
<p>Rust’s commitment to reliability extends to error handling. Errors are a fact
|
||
of life in software, so Rust has a number of features for handling situations
|
||
in which something goes wrong. In many cases, Rust requires you to acknowledge
|
||
the possibility of an error and take some action before your code will compile.
|
||
This requirement makes your program more robust by ensuring that you’ll
|
||
discover errors and handle them appropriately before you’ve deployed your code
|
||
to production!</p>
|
||
<p>Rust groups errors into two major categories: <em>recoverable</em> and <em>unrecoverable</em>
|
||
errors. For a recoverable error, such as a file not found error, it’s
|
||
reasonable to report the problem to the user and retry the operation.
|
||
Unrecoverable errors are always symptoms of bugs, like trying to access a
|
||
location beyond the end of an array.</p>
|
||
<p>Most languages don’t distinguish between these two kinds of errors and handle
|
||
both in the same way, using mechanisms such as exceptions. Rust doesn’t have
|
||
exceptions. Instead, it has the type <code>Result<T, E></code> for recoverable errors and
|
||
the <code>panic!</code> macro that stops execution when the program encounters an
|
||
unrecoverable error. This chapter covers calling <code>panic!</code> first and then talks
|
||
about returning <code>Result<T, E></code> values. Additionally, we’ll explore
|
||
considerations when deciding whether to try to recover from an error or to stop
|
||
execution.</p>
|
||
<h2><a class="header" href="#unrecoverable-errors-with-panic" id="unrecoverable-errors-with-panic">Unrecoverable Errors with <code>panic!</code></a></h2>
|
||
<p>Sometimes, bad things happen in your code, and there’s nothing you can do about
|
||
it. In these cases, Rust has the <code>panic!</code> macro. When the <code>panic!</code> macro
|
||
executes, your program will print a failure message, unwind and clean up the
|
||
stack, and then quit. This most commonly occurs when a bug of some kind has
|
||
been detected and it’s not clear to the programmer how to handle the error.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#unwinding-the-stack-or-aborting-in-response-to-a-panic" id="unwinding-the-stack-or-aborting-in-response-to-a-panic">Unwinding the Stack or Aborting in Response to a Panic</a></h3>
|
||
<p>By default, when a panic occurs, the program starts <em>unwinding</em>, which
|
||
means Rust walks back up the stack and cleans up the data from each function
|
||
it encounters. But this walking back and cleanup is a lot of work. The
|
||
alternative is to immediately <em>abort</em>, which ends the program without
|
||
cleaning up. Memory that the program was using will then need to be cleaned
|
||
up by the operating system. If in your project you need to make the resulting
|
||
binary as small as possible, you can switch from unwinding to aborting upon a
|
||
panic by adding <code>panic = 'abort'</code> to the appropriate <code>[profile]</code> sections in
|
||
your <em>Cargo.toml</em> file. For example, if you want to abort on panic in release
|
||
mode, add this:</p>
|
||
<pre><code class="language-toml">[profile.release]
|
||
panic = 'abort'
|
||
</code></pre>
|
||
</blockquote>
|
||
<p>Let’s try calling <code>panic!</code> in a simple program:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic panics">fn main() {
|
||
panic!("crash and burn");
|
||
}
|
||
</code></pre></pre>
|
||
<p>When you run the program, you’ll see something like this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling panic v0.1.0 (file:///projects/panic)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.25s
|
||
Running `target/debug/panic`
|
||
thread 'main' panicked at 'crash and burn', src/main.rs:2:5
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>The call to <code>panic!</code> causes the error message contained in the last two lines.
|
||
The first line shows our panic message and the place in our source code where
|
||
the panic occurred: <em>src/main.rs:2:5</em> indicates that it’s the second line,
|
||
fifth character of our <em>src/main.rs</em> file.</p>
|
||
<p>In this case, the line indicated is part of our code, and if we go to that
|
||
line, we see the <code>panic!</code> macro call. In other cases, the <code>panic!</code> call might
|
||
be in code that our code calls, and the filename and line number reported by
|
||
the error message will be someone else’s code where the <code>panic!</code> macro is
|
||
called, not the line of our code that eventually led to the <code>panic!</code> call. We
|
||
can use the backtrace of the functions the <code>panic!</code> call came from to figure
|
||
out the part of our code that is causing the problem. We’ll discuss what a
|
||
backtrace is in more detail next.</p>
|
||
<h3><a class="header" href="#using-a-panic-backtrace" id="using-a-panic-backtrace">Using a <code>panic!</code> Backtrace</a></h3>
|
||
<p>Let’s look at another example to see what it’s like when a <code>panic!</code> call comes
|
||
from a library because of a bug in our code instead of from our code calling
|
||
the macro directly. Listing 9-1 has some code that attempts to access an
|
||
element by index in a vector.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic panics">fn main() {
|
||
let v = vec![1, 2, 3];
|
||
|
||
v[99];
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 9-1: Attempting to access an element beyond the
|
||
end of a vector, which will cause a call to <code>panic!</code></span></p>
|
||
<p>Here, we’re attempting to access the 100th element of our vector (which is at
|
||
index 99 because indexing starts at zero), but it has only 3 elements. In this
|
||
situation, Rust will panic. Using <code>[]</code> is supposed to return an element, but if
|
||
you pass an invalid index, there’s no element that Rust could return here that
|
||
would be correct.</p>
|
||
<p>Other languages, like C, will attempt to give you exactly what you asked for in
|
||
this situation, even though it isn’t what you want: you’ll get whatever is at
|
||
the location in memory that would correspond to that element in the vector,
|
||
even though the memory doesn’t belong to the vector. This is called a <em>buffer
|
||
overread</em> and can lead to security vulnerabilities if an attacker is able to
|
||
manipulate the index in such a way as to read data they shouldn’t be allowed to
|
||
that is stored after the array.</p>
|
||
<p>To protect your program from this sort of vulnerability, if you try to read an
|
||
element at an index that doesn’t exist, Rust will stop execution and refuse to
|
||
continue. Let’s try it and see:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling panic v0.1.0 (file:///projects/panic)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.27s
|
||
Running `target/debug/panic`
|
||
thread 'main' panicked at 'index out of bounds: the len is 3 but the index is 99', libcore/slice/mod.rs:2448:10
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>This error points at a file we didn’t write, <em>libcore/slice/mod.rs</em>. That’s the
|
||
implementation of <code>slice</code> in the Rust source code. The code that gets run when
|
||
we use <code>[]</code> on our vector <code>v</code> is in <em>libcore/slice/mod.rs</em>, and that is where
|
||
the <code>panic!</code> is actually happening.</p>
|
||
<p>The next note line tells us that we can set the <code>RUST_BACKTRACE</code> environment
|
||
variable to get a backtrace of exactly what happened to cause the error. A
|
||
<em>backtrace</em> is a list of all the functions that have been called to get to this
|
||
point. Backtraces in Rust work as they do in other languages: the key to
|
||
reading the backtrace is to start from the top and read until you see files you
|
||
wrote. That’s the spot where the problem originated. The lines above the lines
|
||
mentioning your files are code that your code called; the lines below are code
|
||
that called your code. These lines might include core Rust code, standard
|
||
library code, or crates that you’re using. Let’s try getting a backtrace by
|
||
setting the <code>RUST_BACKTRACE</code> environment variable to any value except 0.
|
||
Listing 9-2 shows output similar to what you’ll see.</p>
|
||
<pre><code class="language-text">$ RUST_BACKTRACE=1 cargo run
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.00s
|
||
Running `target/debug/panic`
|
||
thread 'main' panicked at 'index out of bounds: the len is 3 but the index is 99', libcore/slice/mod.rs:2448:10
|
||
stack backtrace:
|
||
0: std::sys::unix::backtrace::tracing::imp::unwind_backtrace
|
||
at libstd/sys/unix/backtrace/tracing/gcc_s.rs:49
|
||
1: std::sys_common::backtrace::print
|
||
at libstd/sys_common/backtrace.rs:71
|
||
at libstd/sys_common/backtrace.rs:59
|
||
2: std::panicking::default_hook::{{closure}}
|
||
at libstd/panicking.rs:211
|
||
3: std::panicking::default_hook
|
||
at libstd/panicking.rs:227
|
||
4: <std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::get
|
||
at libstd/panicking.rs:476
|
||
5: std::panicking::continue_panic_fmt
|
||
at libstd/panicking.rs:390
|
||
6: std::panicking::try::do_call
|
||
at libstd/panicking.rs:325
|
||
7: core::ptr::drop_in_place
|
||
at libcore/panicking.rs:77
|
||
8: core::ptr::drop_in_place
|
||
at libcore/panicking.rs:59
|
||
9: <usize as core::slice::SliceIndex<[T]>>::index
|
||
at libcore/slice/mod.rs:2448
|
||
10: core::slice::<impl core::ops::index::Index<I> for [T]>::index
|
||
at libcore/slice/mod.rs:2316
|
||
11: <alloc::vec::Vec<T> as core::ops::index::Index<I>>::index
|
||
at liballoc/vec.rs:1653
|
||
12: panic::main
|
||
at src/main.rs:4
|
||
13: std::rt::lang_start::{{closure}}
|
||
at libstd/rt.rs:74
|
||
14: std::panicking::try::do_call
|
||
at libstd/rt.rs:59
|
||
at libstd/panicking.rs:310
|
||
15: macho_symbol_search
|
||
at libpanic_unwind/lib.rs:102
|
||
16: std::alloc::default_alloc_error_hook
|
||
at libstd/panicking.rs:289
|
||
at libstd/panic.rs:392
|
||
at libstd/rt.rs:58
|
||
17: std::rt::lang_start
|
||
at libstd/rt.rs:74
|
||
18: panic::main
|
||
</code></pre>
|
||
<p><span class="caption">Listing 9-2: The backtrace generated by a call to
|
||
<code>panic!</code> displayed when the environment variable <code>RUST_BACKTRACE</code> is set</span></p>
|
||
<p>That’s a lot of output! The exact output you see might be different depending
|
||
on your operating system and Rust version. In order to get backtraces with this
|
||
information, debug symbols must be enabled. Debug symbols are enabled by
|
||
default when using <code>cargo build</code> or <code>cargo run</code> without the <code>--release</code> flag,
|
||
as we have here.</p>
|
||
<p>In the output in Listing 9-2, line 12 of the backtrace points to the line in
|
||
our project that’s causing the problem: line 4 of <em>src/main.rs</em>. If we don’t
|
||
want our program to panic, the location pointed to by the first line mentioning
|
||
a file we wrote is where we should start investigating. In Listing 9-1, where
|
||
we deliberately wrote code that would panic in order to demonstrate how to use
|
||
backtraces, the way to fix the panic is to not request an element at index 99
|
||
from a vector that only contains 3 items. When your code panics in the future,
|
||
you’ll need to figure out what action the code is taking with what values to
|
||
cause the panic and what the code should do instead.</p>
|
||
<p>We’ll come back to <code>panic!</code> and when we should and should not use <code>panic!</code> to
|
||
handle error conditions in the <a href="ch09-03-to-panic-or-not-to-panic.html#to-panic-or-not-to-panic">“To <code>panic!</code> or Not to
|
||
<code>panic!</code>”</a><!-- ignore --> section later in this
|
||
chapter. Next, we’ll look at how to recover from an error using <code>Result</code>.</p>
|
||
<h2><a class="header" href="#recoverable-errors-with-result" id="recoverable-errors-with-result">Recoverable Errors with <code>Result</code></a></h2>
|
||
<p>Most errors aren’t serious enough to require the program to stop entirely.
|
||
Sometimes, when a function fails, it’s for a reason that you can easily
|
||
interpret and respond to. For example, if you try to open a file and that
|
||
operation fails because the file doesn’t exist, you might want to create the
|
||
file instead of terminating the process.</p>
|
||
<p>Recall from <a href="ch02-00-guessing-game-tutorial.html#handling-potential-failure-with-the-result-type">“Handling Potential Failure with the <code>Result</code>
|
||
Type”</a><!-- ignore --> in Chapter 2 that the <code>Result</code> enum is
|
||
defined as having two variants, <code>Ok</code> and <code>Err</code>, as follows:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Result<T, E> {
|
||
Ok(T),
|
||
Err(E),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>T</code> and <code>E</code> are generic type parameters: we’ll discuss generics in more
|
||
detail in Chapter 10. What you need to know right now is that <code>T</code> represents
|
||
the type of the value that will be returned in a success case within the <code>Ok</code>
|
||
variant, and <code>E</code> represents the type of the error that will be returned in a
|
||
failure case within the <code>Err</code> variant. Because <code>Result</code> has these generic type
|
||
parameters, we can use the <code>Result</code> type and the functions that the standard
|
||
library has defined on it in many different situations where the successful
|
||
value and error value we want to return may differ.</p>
|
||
<p>Let’s call a function that returns a <code>Result</code> value because the function could
|
||
fail. In Listing 9-3 we try to open a file.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::fs::File;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt");
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 9-3: Opening a file</span></p>
|
||
<p>How do we know <code>File::open</code> returns a <code>Result</code>? We could look at the <a href="../std/index.html">standard
|
||
library API documentation</a><!-- ignore -->, or we could ask
|
||
the compiler! If we give <code>f</code> a type annotation that we know is <em>not</em> the return
|
||
type of the function and then try to compile the code, the compiler will tell
|
||
us that the types don’t match. The error message will then tell us what the
|
||
type of <code>f</code> <em>is</em>. Let’s try it! We know that the return type of <code>File::open</code>
|
||
isn’t of type <code>u32</code>, so let’s change the <code>let f</code> statement to this:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let f: u32 = File::open("hello.txt");
|
||
</code></pre>
|
||
<p>Attempting to compile now gives us the following output:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:4:18
|
||
|
|
||
4 | let f: u32 = File::open("hello.txt");
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^ expected u32, found enum
|
||
`std::result::Result`
|
||
|
|
||
= note: expected type `u32`
|
||
found type `std::result::Result<std::fs::File, std::io::Error>`
|
||
</code></pre>
|
||
<p>This tells us the return type of the <code>File::open</code> function is a <code>Result<T, E></code>.
|
||
The generic parameter <code>T</code> has been filled in here with the type of the success
|
||
value, <code>std::fs::File</code>, which is a file handle. The type of <code>E</code> used in the
|
||
error value is <code>std::io::Error</code>.</p>
|
||
<p>This return type means the call to <code>File::open</code> might succeed and return a file
|
||
handle that we can read from or write to. The function call also might fail:
|
||
for example, the file might not exist, or we might not have permission to
|
||
access the file. The <code>File::open</code> function needs to have a way to tell us
|
||
whether it succeeded or failed and at the same time give us either the file
|
||
handle or error information. This information is exactly what the <code>Result</code> enum
|
||
conveys.</p>
|
||
<p>In the case where <code>File::open</code> succeeds, the value in the variable <code>f</code> will be
|
||
an instance of <code>Ok</code> that contains a file handle. In the case where it fails,
|
||
the value in <code>f</code> will be an instance of <code>Err</code> that contains more information
|
||
about the kind of error that happened.</p>
|
||
<p>We need to add to the code in Listing 9-3 to take different actions depending
|
||
on the value <code>File::open</code> returns. Listing 9-4 shows one way to handle the
|
||
<code>Result</code> using a basic tool, the <code>match</code> expression that we discussed in
|
||
Chapter 6.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic">use std::fs::File;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt");
|
||
|
||
let f = match f {
|
||
Ok(file) => file,
|
||
Err(error) => {
|
||
panic!("Problem opening the file: {:?}", error)
|
||
},
|
||
};
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 9-4: Using a <code>match</code> expression to handle the
|
||
<code>Result</code> variants that might be returned</span></p>
|
||
<p>Note that, like the <code>Option</code> enum, the <code>Result</code> enum and its variants have been
|
||
brought into scope by the prelude, so we don’t need to specify <code>Result::</code>
|
||
before the <code>Ok</code> and <code>Err</code> variants in the <code>match</code> arms.</p>
|
||
<p>Here we tell Rust that when the result is <code>Ok</code>, return the inner <code>file</code> value
|
||
out of the <code>Ok</code> variant, and we then assign that file handle value to the
|
||
variable <code>f</code>. After the <code>match</code>, we can use the file handle for reading or
|
||
writing.</p>
|
||
<p>The other arm of the <code>match</code> handles the case where we get an <code>Err</code> value from
|
||
<code>File::open</code>. In this example, we’ve chosen to call the <code>panic!</code> macro. If
|
||
there’s no file named <em>hello.txt</em> in our current directory and we run this
|
||
code, we’ll see the following output from the <code>panic!</code> macro:</p>
|
||
<pre><code class="language-text">thread 'main' panicked at 'Problem opening the file: Error { repr:
|
||
Os { code: 2, message: "No such file or directory" } }', src/main.rs:9:12
|
||
</code></pre>
|
||
<p>As usual, this output tells us exactly what has gone wrong.</p>
|
||
<h3><a class="header" href="#matching-on-different-errors" id="matching-on-different-errors">Matching on Different Errors</a></h3>
|
||
<p>The code in Listing 9-4 will <code>panic!</code> no matter why <code>File::open</code> failed. What
|
||
we want to do instead is take different actions for different failure reasons:
|
||
if <code>File::open</code> failed because the file doesn’t exist, we want to create the
|
||
file and return the handle to the new file. If <code>File::open</code> failed for any
|
||
other reason—for example, because we didn’t have permission to open the file—we
|
||
still want the code to <code>panic!</code> in the same way as it did in Listing 9-4. Look
|
||
at Listing 9-5, which adds an inner <code>match</code> expression.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<!-- ignore this test because otherwise it creates hello.txt which causes other
|
||
tests to fail lol -->
|
||
<pre><code class="language-rust ignore">use std::fs::File;
|
||
use std::io::ErrorKind;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt");
|
||
|
||
let f = match f {
|
||
Ok(file) => file,
|
||
Err(error) => match error.kind() {
|
||
ErrorKind::NotFound => match File::create("hello.txt") {
|
||
Ok(fc) => fc,
|
||
Err(e) => panic!("Problem creating the file: {:?}", e),
|
||
},
|
||
other_error => panic!("Problem opening the file: {:?}", other_error),
|
||
},
|
||
};
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 9-5: Handling different kinds of errors in
|
||
different ways</span></p>
|
||
<p>The type of the value that <code>File::open</code> returns inside the <code>Err</code> variant is
|
||
<code>io::Error</code>, which is a struct provided by the standard library. This struct
|
||
has a method <code>kind</code> that we can call to get an <code>io::ErrorKind</code> value. The enum
|
||
<code>io::ErrorKind</code> is provided by the standard library and has variants
|
||
representing the different kinds of errors that might result from an <code>io</code>
|
||
operation. The variant we want to use is <code>ErrorKind::NotFound</code>, which indicates
|
||
the file we’re trying to open doesn’t exist yet. So we match on <code>f</code>, but we
|
||
also have an inner match on <code>error.kind()</code>.</p>
|
||
<p>The condition we want to check in the inner match is whether the value returned
|
||
by <code>error.kind()</code> is the <code>NotFound</code> variant of the <code>ErrorKind</code> enum. If it is,
|
||
we try to create the file with <code>File::create</code>. However, because <code>File::create</code>
|
||
could also fail, we need a second arm in the inner <code>match</code> expression. When the
|
||
file can’t be created, a different error message is printed. The second arm of
|
||
the outer <code>match</code> stays the same, so the program panics on any error besides
|
||
the missing file error.</p>
|
||
<p>That’s a lot of <code>match</code>! The <code>match</code> expression is very useful but also very
|
||
much a primitive. In Chapter 13, you’ll learn about closures; the <code>Result<T, E></code> type has many methods that accept a closure and are implemented using
|
||
<code>match</code> expressions. Using those methods will make your code more concise. A
|
||
more seasoned Rustacean might write this code instead of Listing 9-5:</p>
|
||
<pre><code class="language-rust ignore">use std::fs::File;
|
||
use std::io::ErrorKind;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt").unwrap_or_else(|error| {
|
||
if error.kind() == ErrorKind::NotFound {
|
||
File::create("hello.txt").unwrap_or_else(|error| {
|
||
panic!("Problem creating the file: {:?}", error);
|
||
})
|
||
} else {
|
||
panic!("Problem opening the file: {:?}", error);
|
||
}
|
||
});
|
||
}
|
||
</code></pre>
|
||
<p>Although this code has the same behavior as Listing 9-5, it doesn’t contain any
|
||
<code>match</code> expressions and is cleaner to read. Come back to this example after
|
||
you’ve read Chapter 13, and look up the <code>unwrap_or_else</code> method in the standard
|
||
library documentation. Many more of these methods can clean up huge nested
|
||
<code>match</code> expressions when you’re dealing with errors.</p>
|
||
<h3><a class="header" href="#shortcuts-for-panic-on-error-unwrap-and-expect" id="shortcuts-for-panic-on-error-unwrap-and-expect">Shortcuts for Panic on Error: <code>unwrap</code> and <code>expect</code></a></h3>
|
||
<p>Using <code>match</code> works well enough, but it can be a bit verbose and doesn’t always
|
||
communicate intent well. The <code>Result<T, E></code> type has many helper methods
|
||
defined on it to do various tasks. One of those methods, called <code>unwrap</code>, is a
|
||
shortcut method that is implemented just like the <code>match</code> expression we wrote in
|
||
Listing 9-4. If the <code>Result</code> value is the <code>Ok</code> variant, <code>unwrap</code> will return
|
||
the value inside the <code>Ok</code>. If the <code>Result</code> is the <code>Err</code> variant, <code>unwrap</code> will
|
||
call the <code>panic!</code> macro for us. Here is an example of <code>unwrap</code> in action:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic">use std::fs::File;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt").unwrap();
|
||
}
|
||
</code></pre></pre>
|
||
<p>If we run this code without a <em>hello.txt</em> file, we’ll see an error message from
|
||
the <code>panic!</code> call that the <code>unwrap</code> method makes:</p>
|
||
<pre><code class="language-text">thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Error {
|
||
repr: Os { code: 2, message: "No such file or directory" } }',
|
||
src/libcore/result.rs:906:4
|
||
</code></pre>
|
||
<p>Another method, <code>expect</code>, which is similar to <code>unwrap</code>, lets us also choose the
|
||
<code>panic!</code> error message. Using <code>expect</code> instead of <code>unwrap</code> and providing good
|
||
error messages can convey your intent and make tracking down the source of a
|
||
panic easier. The syntax of <code>expect</code> looks like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic">use std::fs::File;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt").expect("Failed to open hello.txt");
|
||
}
|
||
</code></pre></pre>
|
||
<p>We use <code>expect</code> in the same way as <code>unwrap</code>: to return the file handle or call
|
||
the <code>panic!</code> macro. The error message used by <code>expect</code> in its call to <code>panic!</code>
|
||
will be the parameter that we pass to <code>expect</code>, rather than the default
|
||
<code>panic!</code> message that <code>unwrap</code> uses. Here’s what it looks like:</p>
|
||
<pre><code class="language-text">thread 'main' panicked at 'Failed to open hello.txt: Error { repr: Os { code:
|
||
2, message: "No such file or directory" } }', src/libcore/result.rs:906:4
|
||
</code></pre>
|
||
<p>Because this error message starts with the text we specified, <code>Failed to open hello.txt</code>, it will be easier to find where in the code this error message is
|
||
coming from. If we use <code>unwrap</code> in multiple places, it can take more time to
|
||
figure out exactly which <code>unwrap</code> is causing the panic because all <code>unwrap</code>
|
||
calls that panic print the same message.</p>
|
||
<h3><a class="header" href="#propagating-errors" id="propagating-errors">Propagating Errors</a></h3>
|
||
<p>When you’re writing a function whose implementation calls something that might
|
||
fail, instead of handling the error within this function, you can return the
|
||
error to the calling code so that it can decide what to do. This is known as
|
||
<em>propagating</em> the error and gives more control to the calling code, where there
|
||
might be more information or logic that dictates how the error should be
|
||
handled than what you have available in the context of your code.</p>
|
||
<p>For example, Listing 9-6 shows a function that reads a username from a file. If
|
||
the file doesn’t exist or can’t be read, this function will return those errors
|
||
to the code that called this function.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::io::Read;
|
||
use std::fs::File;
|
||
|
||
fn read_username_from_file() -> Result<String, io::Error> {
|
||
let f = File::open("hello.txt");
|
||
|
||
let mut f = match f {
|
||
Ok(file) => file,
|
||
Err(e) => return Err(e),
|
||
};
|
||
|
||
let mut s = String::new();
|
||
|
||
match f.read_to_string(&mut s) {
|
||
Ok(_) => Ok(s),
|
||
Err(e) => Err(e),
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 9-6: A function that returns errors to the
|
||
calling code using <code>match</code></span></p>
|
||
<p>This function can be written in a much shorter way, but we’re going to start by
|
||
doing a lot of it manually in order to explore error handling; at the end,
|
||
we’ll show the shorter way. Let’s look at the return type of the function first:
|
||
<code>Result<String, io::Error></code>. This means the function is returning a value of
|
||
the type <code>Result<T, E></code> where the generic parameter <code>T</code> has been filled in
|
||
with the concrete type <code>String</code> and the generic type <code>E</code> has been filled in
|
||
with the concrete type <code>io::Error</code>. If this function succeeds without any
|
||
problems, the code that calls this function will receive an <code>Ok</code> value that
|
||
holds a <code>String</code>—the username that this function read from the file. If this
|
||
function encounters any problems, the code that calls this function will
|
||
receive an <code>Err</code> value that holds an instance of <code>io::Error</code> that contains
|
||
more information about what the problems were. We chose <code>io::Error</code> as the
|
||
return type of this function because that happens to be the type of the error
|
||
value returned from both of the operations we’re calling in this function’s
|
||
body that might fail: the <code>File::open</code> function and the <code>read_to_string</code>
|
||
method.</p>
|
||
<p>The body of the function starts by calling the <code>File::open</code> function. Then we
|
||
handle the <code>Result</code> value returned with a <code>match</code> similar to the <code>match</code> in
|
||
Listing 9-4, only instead of calling <code>panic!</code> in the <code>Err</code> case, we return
|
||
early from this function and pass the error value from <code>File::open</code> back to the
|
||
calling code as this function’s error value. If <code>File::open</code> succeeds, we store
|
||
the file handle in the variable <code>f</code> and continue.</p>
|
||
<p>Then we create a new <code>String</code> in variable <code>s</code> and call the <code>read_to_string</code>
|
||
method on the file handle in <code>f</code> to read the contents of the file into <code>s</code>. The
|
||
<code>read_to_string</code> method also returns a <code>Result</code> because it might fail, even
|
||
though <code>File::open</code> succeeded. So we need another <code>match</code> to handle that
|
||
<code>Result</code>: if <code>read_to_string</code> succeeds, then our function has succeeded, and we
|
||
return the username from the file that’s now in <code>s</code> wrapped in an <code>Ok</code>. If
|
||
<code>read_to_string</code> fails, we return the error value in the same way that we
|
||
returned the error value in the <code>match</code> that handled the return value of
|
||
<code>File::open</code>. However, we don’t need to explicitly say <code>return</code>, because this
|
||
is the last expression in the function.</p>
|
||
<p>The code that calls this code will then handle getting either an <code>Ok</code> value
|
||
that contains a username or an <code>Err</code> value that contains an <code>io::Error</code>. We
|
||
don’t know what the calling code will do with those values. If the calling code
|
||
gets an <code>Err</code> value, it could call <code>panic!</code> and crash the program, use a
|
||
default username, or look up the username from somewhere other than a file, for
|
||
example. We don’t have enough information on what the calling code is actually
|
||
trying to do, so we propagate all the success or error information upward for
|
||
it to handle appropriately.</p>
|
||
<p>This pattern of propagating errors is so common in Rust that Rust provides the
|
||
question mark operator <code>?</code> to make this easier.</p>
|
||
<h4><a class="header" href="#a-shortcut-for-propagating-errors-the--operator" id="a-shortcut-for-propagating-errors-the--operator">A Shortcut for Propagating Errors: the <code>?</code> Operator</a></h4>
|
||
<p>Listing 9-7 shows an implementation of <code>read_username_from_file</code> that has the
|
||
same functionality as it had in Listing 9-6, but this implementation uses the
|
||
<code>?</code> operator.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::io::Read;
|
||
use std::fs::File;
|
||
|
||
fn read_username_from_file() -> Result<String, io::Error> {
|
||
let mut f = File::open("hello.txt")?;
|
||
let mut s = String::new();
|
||
f.read_to_string(&mut s)?;
|
||
Ok(s)
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 9-7: A function that returns errors to the
|
||
calling code using the <code>?</code> operator</span></p>
|
||
<p>The <code>?</code> placed after a <code>Result</code> value is defined to work in almost the same way
|
||
as the <code>match</code> expressions we defined to handle the <code>Result</code> values in Listing
|
||
9-6. If the value of the <code>Result</code> is an <code>Ok</code>, the value inside the <code>Ok</code> will
|
||
get returned from this expression, and the program will continue. If the value
|
||
is an <code>Err</code>, the <code>Err</code> will be returned from the whole function as if we had
|
||
used the <code>return</code> keyword so the error value gets propagated to the calling
|
||
code.</p>
|
||
<p>There is a difference between what the <code>match</code> expression from Listing 9-6 and
|
||
the <code>?</code> operator do: error values that have the <code>?</code> operator called on them go
|
||
through the <code>from</code> function, defined in the <code>From</code> trait in the standard
|
||
library, which is used to convert errors from one type into another. When the
|
||
<code>?</code> operator calls the <code>from</code> function, the error type received is converted
|
||
into the error type defined in the return type of the current function. This is
|
||
useful when a function returns one error type to represent all the ways a
|
||
function might fail, even if parts might fail for many different reasons. As
|
||
long as each error type implements the <code>from</code> function to define how to convert
|
||
itself to the returned error type, the <code>?</code> operator takes care of the
|
||
conversion automatically.</p>
|
||
<p>In the context of Listing 9-7, the <code>?</code> at the end of the <code>File::open</code> call will
|
||
return the value inside an <code>Ok</code> to the variable <code>f</code>. If an error occurs, the
|
||
<code>?</code> operator will return early out of the whole function and give any <code>Err</code>
|
||
value to the calling code. The same thing applies to the <code>?</code> at the end of the
|
||
<code>read_to_string</code> call.</p>
|
||
<p>The <code>?</code> operator eliminates a lot of boilerplate and makes this function’s
|
||
implementation simpler. We could even shorten this code further by chaining
|
||
method calls immediately after the <code>?</code>, as shown in Listing 9-8.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::io::Read;
|
||
use std::fs::File;
|
||
|
||
fn read_username_from_file() -> Result<String, io::Error> {
|
||
let mut s = String::new();
|
||
|
||
File::open("hello.txt")?.read_to_string(&mut s)?;
|
||
|
||
Ok(s)
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 9-8: Chaining method calls after the <code>?</code>
|
||
operator</span></p>
|
||
<p>We’ve moved the creation of the new <code>String</code> in <code>s</code> to the beginning of the
|
||
function; that part hasn’t changed. Instead of creating a variable <code>f</code>, we’ve
|
||
chained the call to <code>read_to_string</code> directly onto the result of
|
||
<code>File::open("hello.txt")?</code>. We still have a <code>?</code> at the end of the
|
||
<code>read_to_string</code> call, and we still return an <code>Ok</code> value containing the
|
||
username in <code>s</code> when both <code>File::open</code> and <code>read_to_string</code> succeed rather than
|
||
returning errors. The functionality is again the same as in Listing 9-6 and
|
||
Listing 9-7; this is just a different, more ergonomic way to write it.</p>
|
||
<p>Speaking of different ways to write this function, Listing 9-9 shows that
|
||
there’s a way to make this even shorter.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io;
|
||
use std::fs;
|
||
|
||
fn read_username_from_file() -> Result<String, io::Error> {
|
||
fs::read_to_string("hello.txt")
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 9-9: Using <code>fs::read_to_string</code> instead of
|
||
opening and then reading the file</span></p>
|
||
<p>Reading a file into a string is a fairly common operation, so Rust provides the
|
||
convenient <code>fs::read_to_string</code> function that opens the file, creates a new
|
||
<code>String</code>, reads the contents of the file, puts the contents into that <code>String</code>,
|
||
and returns it. Of course, using <code>fs::read_to_string</code> doesn’t give us the
|
||
opportunity to explain all the error handling, so we did it the longer way
|
||
first.</p>
|
||
<h4><a class="header" href="#the--operator-can-be-used-in-functions-that-return-result" id="the--operator-can-be-used-in-functions-that-return-result">The <code>?</code> Operator Can Be Used in Functions That Return <code>Result</code></a></h4>
|
||
<p>The <code>?</code> operator can be used in functions that have a return type of
|
||
<code>Result</code>, because it is defined to work in the same way as the <code>match</code>
|
||
expression we defined in Listing 9-6. The part of the <code>match</code> that requires a
|
||
return type of <code>Result</code> is <code>return Err(e)</code>, so the return type of the function
|
||
can be a <code>Result</code> to be compatible with this <code>return</code>.</p>
|
||
<p>Let’s look at what happens if we use the <code>?</code> operator in the <code>main</code> function,
|
||
which you’ll recall has a return type of <code>()</code>:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::fs::File;
|
||
|
||
fn main() {
|
||
let f = File::open("hello.txt")?;
|
||
}
|
||
</code></pre>
|
||
<p>When we compile this code, we get the following error message:</p>
|
||
<pre><code class="language-text">error[E0277]: the `?` operator can only be used in a function that returns
|
||
`Result` or `Option` (or another type that implements `std::ops::Try`)
|
||
--> src/main.rs:4:13
|
||
|
|
||
4 | let f = File::open("hello.txt")?;
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^ cannot use the `?` operator in a
|
||
function that returns `()`
|
||
|
|
||
= help: the trait `std::ops::Try` is not implemented for `()`
|
||
= note: required by `std::ops::Try::from_error`
|
||
</code></pre>
|
||
<p>This error points out that we’re only allowed to use the <code>?</code> operator in a
|
||
function that returns <code>Result</code> or <code>Option</code> or another type that implements
|
||
<code>std::ops::Try</code>. When you’re writing code in a function
|
||
that doesn’t return one of these types, and you want to use <code>?</code> when you call other
|
||
functions that return <code>Result<T, E></code>, you have two choices to fix this problem.
|
||
One technique is to change the return type of your function to be <code>Result<T, E></code> if you have no restrictions preventing that. The other technique is to use
|
||
a <code>match</code> or one of the <code>Result<T, E></code> methods to handle the <code>Result<T, E></code> in
|
||
whatever way is appropriate.</p>
|
||
<p>The <code>main</code> function is special, and there are restrictions on what its return
|
||
type must be. One valid return type for main is <code>()</code>, and conveniently, another
|
||
valid return type is <code>Result<T, E></code>, as shown here:</p>
|
||
<pre><code class="language-rust ignore">use std::error::Error;
|
||
use std::fs::File;
|
||
|
||
fn main() -> Result<(), Box<dyn Error>> {
|
||
let f = File::open("hello.txt")?;
|
||
|
||
Ok(())
|
||
}
|
||
</code></pre>
|
||
<p>The <code>Box<dyn Error></code> type is called a trait object, which we’ll talk about in
|
||
the <a href="ch17-02-trait-objects.html#using-trait-objects-that-allow-for-values-of-different-types">“Using Trait Objects that Allow for Values of Different
|
||
Types”</a><!-- ignore --> section in Chapter 17. For now, you can
|
||
read <code>Box<dyn Error></code> to mean “any kind of error.” Using <code>?</code> in a <code>main</code>
|
||
function with this return type is allowed.</p>
|
||
<p>Now that we’ve discussed the details of calling <code>panic!</code> or returning <code>Result</code>,
|
||
let’s return to the topic of how to decide which is appropriate to use in which
|
||
cases.</p>
|
||
<h2><a class="header" href="#to-panic-or-not-to-panic" id="to-panic-or-not-to-panic">To <code>panic!</code> or Not to <code>panic!</code></a></h2>
|
||
<p>So how do you decide when you should call <code>panic!</code> and when you should return
|
||
<code>Result</code>? When code panics, there’s no way to recover. You could call <code>panic!</code>
|
||
for any error situation, whether there’s a possible way to recover or not, but
|
||
then you’re making the decision on behalf of the code calling your code that a
|
||
situation is unrecoverable. When you choose to return a <code>Result</code> value, you
|
||
give the calling code options rather than making the decision for it. The
|
||
calling code could choose to attempt to recover in a way that’s appropriate for
|
||
its situation, or it could decide that an <code>Err</code> value in this case is
|
||
unrecoverable, so it can call <code>panic!</code> and turn your recoverable error into an
|
||
unrecoverable one. Therefore, returning <code>Result</code> is a good default choice when
|
||
you’re defining a function that might fail.</p>
|
||
<p>In rare situations, it’s more appropriate to write code that panics instead of
|
||
returning a <code>Result</code>. Let’s explore why it’s appropriate to panic in examples,
|
||
prototype code, and tests. Then we’ll discuss situations in which the compiler
|
||
can’t tell that failure is impossible, but you as a human can. The chapter will
|
||
conclude with some general guidelines on how to decide whether to panic in
|
||
library code.</p>
|
||
<h3><a class="header" href="#examples-prototype-code-and-tests" id="examples-prototype-code-and-tests">Examples, Prototype Code, and Tests</a></h3>
|
||
<p>When you’re writing an example to illustrate some concept, having robust
|
||
error-handling code in the example as well can make the example less clear. In
|
||
examples, it’s understood that a call to a method like <code>unwrap</code> that could
|
||
panic is meant as a placeholder for the way you’d want your application to
|
||
handle errors, which can differ based on what the rest of your code is doing.</p>
|
||
<p>Similarly, the <code>unwrap</code> and <code>expect</code> methods are very handy when prototyping,
|
||
before you’re ready to decide how to handle errors. They leave clear markers in
|
||
your code for when you’re ready to make your program more robust.</p>
|
||
<p>If a method call fails in a test, you’d want the whole test to fail, even if
|
||
that method isn’t the functionality under test. Because <code>panic!</code> is how a test
|
||
is marked as a failure, calling <code>unwrap</code> or <code>expect</code> is exactly what should
|
||
happen.</p>
|
||
<h3><a class="header" href="#cases-in-which-you-have-more-information-than-the-compiler" id="cases-in-which-you-have-more-information-than-the-compiler">Cases in Which You Have More Information Than the Compiler</a></h3>
|
||
<p>It would also be appropriate to call <code>unwrap</code> when you have some other logic
|
||
that ensures the <code>Result</code> will have an <code>Ok</code> value, but the logic isn’t
|
||
something the compiler understands. You’ll still have a <code>Result</code> value that you
|
||
need to handle: whatever operation you’re calling still has the possibility of
|
||
failing in general, even though it’s logically impossible in your particular
|
||
situation. If you can ensure by manually inspecting the code that you’ll never
|
||
have an <code>Err</code> variant, it’s perfectly acceptable to call <code>unwrap</code>. Here’s an
|
||
example:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::net::IpAddr;
|
||
|
||
let home: IpAddr = "127.0.0.1".parse().unwrap();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We’re creating an <code>IpAddr</code> instance by parsing a hardcoded string. We can see
|
||
that <code>127.0.0.1</code> is a valid IP address, so it’s acceptable to use <code>unwrap</code>
|
||
here. However, having a hardcoded, valid string doesn’t change the return type
|
||
of the <code>parse</code> method: we still get a <code>Result</code> value, and the compiler will
|
||
still make us handle the <code>Result</code> as if the <code>Err</code> variant is a possibility
|
||
because the compiler isn’t smart enough to see that this string is always a
|
||
valid IP address. If the IP address string came from a user rather than being
|
||
hardcoded into the program and therefore <em>did</em> have a possibility of failure,
|
||
we’d definitely want to handle the <code>Result</code> in a more robust way instead.</p>
|
||
<h3><a class="header" href="#guidelines-for-error-handling" id="guidelines-for-error-handling">Guidelines for Error Handling</a></h3>
|
||
<p>It’s advisable to have your code panic when it’s possible that your code
|
||
could end up in a bad state. In this context, a <em>bad state</em> is when some
|
||
assumption, guarantee, contract, or invariant has been broken, such as when
|
||
invalid values, contradictory values, or missing values are passed to your
|
||
code—plus one or more of the following:</p>
|
||
<ul>
|
||
<li>The bad state is not something that’s <em>expected</em> to happen occasionally.</li>
|
||
<li>Your code after this point needs to rely on not being in this bad state.</li>
|
||
<li>There’s not a good way to encode this information in the types you use.</li>
|
||
</ul>
|
||
<p>If someone calls your code and passes in values that don’t make sense, the best
|
||
choice might be to call <code>panic!</code> and alert the person using your library to the
|
||
bug in their code so they can fix it during development. Similarly, <code>panic!</code> is
|
||
often appropriate if you’re calling external code that is out of your control
|
||
and it returns an invalid state that you have no way of fixing.</p>
|
||
<p>However, when failure is expected, it’s more appropriate to return a <code>Result</code>
|
||
than to make a <code>panic!</code> call. Examples include a parser being given malformed
|
||
data or an HTTP request returning a status that indicates you have hit a rate
|
||
limit. In these cases, returning a <code>Result</code> indicates that failure is an
|
||
expected possibility that the calling code must decide how to handle.</p>
|
||
<p>When your code performs operations on values, your code should verify the
|
||
values are valid first and panic if the values aren’t valid. This is mostly for
|
||
safety reasons: attempting to operate on invalid data can expose your code to
|
||
vulnerabilities. This is the main reason the standard library will call
|
||
<code>panic!</code> if you attempt an out-of-bounds memory access: trying to access memory
|
||
that doesn’t belong to the current data structure is a common security problem.
|
||
Functions often have <em>contracts</em>: their behavior is only guaranteed if the
|
||
inputs meet particular requirements. Panicking when the contract is violated
|
||
makes sense because a contract violation always indicates a caller-side bug and
|
||
it’s not a kind of error you want the calling code to have to explicitly
|
||
handle. In fact, there’s no reasonable way for calling code to recover; the
|
||
calling <em>programmers</em> need to fix the code. Contracts for a function,
|
||
especially when a violation will cause a panic, should be explained in the API
|
||
documentation for the function.</p>
|
||
<p>However, having lots of error checks in all of your functions would be verbose
|
||
and annoying. Fortunately, you can use Rust’s type system (and thus the type
|
||
checking the compiler does) to do many of the checks for you. If your function
|
||
has a particular type as a parameter, you can proceed with your code’s logic
|
||
knowing that the compiler has already ensured you have a valid value. For
|
||
example, if you have a type rather than an <code>Option</code>, your program expects to
|
||
have <em>something</em> rather than <em>nothing</em>. Your code then doesn’t have to handle
|
||
two cases for the <code>Some</code> and <code>None</code> variants: it will only have one case for
|
||
definitely having a value. Code trying to pass nothing to your function won’t
|
||
even compile, so your function doesn’t have to check for that case at runtime.
|
||
Another example is using an unsigned integer type such as <code>u32</code>, which ensures
|
||
the parameter is never negative.</p>
|
||
<h3><a class="header" href="#creating-custom-types-for-validation" id="creating-custom-types-for-validation">Creating Custom Types for Validation</a></h3>
|
||
<p>Let’s take the idea of using Rust’s type system to ensure we have a valid value
|
||
one step further and look at creating a custom type for validation. Recall the
|
||
guessing game in Chapter 2 in which our code asked the user to guess a number
|
||
between 1 and 100. We never validated that the user’s guess was between those
|
||
numbers before checking it against our secret number; we only validated that
|
||
the guess was positive. In this case, the consequences were not very dire: our
|
||
output of “Too high” or “Too low” would still be correct. But it would be a
|
||
useful enhancement to guide the user toward valid guesses and have different
|
||
behavior when a user guesses a number that’s out of range versus when a user
|
||
types, for example, letters instead.</p>
|
||
<p>One way to do this would be to parse the guess as an <code>i32</code> instead of only a
|
||
<code>u32</code> to allow potentially negative numbers, and then add a check for the
|
||
number being in range, like so:</p>
|
||
<pre><code class="language-rust ignore">loop {
|
||
// --snip--
|
||
|
||
let guess: i32 = match guess.trim().parse() {
|
||
Ok(num) => num,
|
||
Err(_) => continue,
|
||
};
|
||
|
||
if guess < 1 || guess > 100 {
|
||
println!("The secret number will be between 1 and 100.");
|
||
continue;
|
||
}
|
||
|
||
match guess.cmp(&secret_number) {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>The <code>if</code> expression checks whether our value is out of range, tells the user
|
||
about the problem, and calls <code>continue</code> to start the next iteration of the loop
|
||
and ask for another guess. After the <code>if</code> expression, we can proceed with the
|
||
comparisons between <code>guess</code> and the secret number knowing that <code>guess</code> is
|
||
between 1 and 100.</p>
|
||
<p>However, this is not an ideal solution: if it was absolutely critical that the
|
||
program only operated on values between 1 and 100, and it had many functions
|
||
with this requirement, having a check like this in every function would be
|
||
tedious (and might impact performance).</p>
|
||
<p>Instead, we can make a new type and put the validations in a function to create
|
||
an instance of the type rather than repeating the validations everywhere. That
|
||
way, it’s safe for functions to use the new type in their signatures and
|
||
confidently use the values they receive. Listing 9-10 shows one way to define a
|
||
<code>Guess</code> type that will only create an instance of <code>Guess</code> if the <code>new</code> function
|
||
receives a value between 1 and 100.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct Guess {
|
||
value: i32,
|
||
}
|
||
|
||
impl Guess {
|
||
pub fn new(value: i32) -> Guess {
|
||
if value < 1 || value > 100 {
|
||
panic!("Guess value must be between 1 and 100, got {}.", value);
|
||
}
|
||
|
||
Guess {
|
||
value
|
||
}
|
||
}
|
||
|
||
pub fn value(&self) -> i32 {
|
||
self.value
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 9-10: A <code>Guess</code> type that will only continue with
|
||
values between 1 and 100</span></p>
|
||
<p>First, we define a struct named <code>Guess</code> that has a field named <code>value</code> that
|
||
holds an <code>i32</code>. This is where the number will be stored.</p>
|
||
<p>Then we implement an associated function named <code>new</code> on <code>Guess</code> that creates
|
||
instances of <code>Guess</code> values. The <code>new</code> function is defined to have one
|
||
parameter named <code>value</code> of type <code>i32</code> and to return a <code>Guess</code>. The code in the
|
||
body of the <code>new</code> function tests <code>value</code> to make sure it’s between 1 and 100.
|
||
If <code>value</code> doesn’t pass this test, we make a <code>panic!</code> call, which will alert
|
||
the programmer who is writing the calling code that they have a bug they need
|
||
to fix, because creating a <code>Guess</code> with a <code>value</code> outside this range would
|
||
violate the contract that <code>Guess::new</code> is relying on. The conditions in which
|
||
<code>Guess::new</code> might panic should be discussed in its public-facing API
|
||
documentation; we’ll cover documentation conventions indicating the possibility
|
||
of a <code>panic!</code> in the API documentation that you create in Chapter 14. If
|
||
<code>value</code> does pass the test, we create a new <code>Guess</code> with its <code>value</code> field set
|
||
to the <code>value</code> parameter and return the <code>Guess</code>.</p>
|
||
<p>Next, we implement a method named <code>value</code> that borrows <code>self</code>, doesn’t have any
|
||
other parameters, and returns an <code>i32</code>. This kind of method is sometimes called
|
||
a <em>getter</em>, because its purpose is to get some data from its fields and return
|
||
it. This public method is necessary because the <code>value</code> field of the <code>Guess</code>
|
||
struct is private. It’s important that the <code>value</code> field be private so code
|
||
using the <code>Guess</code> struct is not allowed to set <code>value</code> directly: code outside
|
||
the module <em>must</em> use the <code>Guess::new</code> function to create an instance of
|
||
<code>Guess</code>, thereby ensuring there’s no way for a <code>Guess</code> to have a <code>value</code> that
|
||
hasn’t been checked by the conditions in the <code>Guess::new</code> function.</p>
|
||
<p>A function that has a parameter or returns only numbers between 1 and 100 could
|
||
then declare in its signature that it takes or returns a <code>Guess</code> rather than an
|
||
<code>i32</code> and wouldn’t need to do any additional checks in its body.</p>
|
||
<h2><a class="header" href="#summary-8" id="summary-8">Summary</a></h2>
|
||
<p>Rust’s error handling features are designed to help you write more robust code.
|
||
The <code>panic!</code> macro signals that your program is in a state it can’t handle and
|
||
lets you tell the process to stop instead of trying to proceed with invalid or
|
||
incorrect values. The <code>Result</code> enum uses Rust’s type system to indicate that
|
||
operations might fail in a way that your code could recover from. You can use
|
||
<code>Result</code> to tell code that calls your code that it needs to handle potential
|
||
success or failure as well. Using <code>panic!</code> and <code>Result</code> in the appropriate
|
||
situations will make your code more reliable in the face of inevitable problems.</p>
|
||
<p>Now that you’ve seen useful ways that the standard library uses generics with
|
||
the <code>Option</code> and <code>Result</code> enums, we’ll talk about how generics work and how you
|
||
can use them in your code.</p>
|
||
<h1><a class="header" href="#generic-types-traits-and-lifetimes" id="generic-types-traits-and-lifetimes">Generic Types, Traits, and Lifetimes</a></h1>
|
||
<p>Every programming language has tools for effectively handling the duplication
|
||
of concepts. In Rust, one such tool is <em>generics</em>. Generics are abstract
|
||
stand-ins for concrete types or other properties. When we’re writing code, we
|
||
can express the behavior of generics or how they relate to other generics
|
||
without knowing what will be in their place when compiling and running the code.</p>
|
||
<p>Similar to the way a function takes parameters with unknown values to run the
|
||
same code on multiple concrete values, functions can take parameters of some
|
||
generic type instead of a concrete type, like <code>i32</code> or <code>String</code>. In fact, we’ve
|
||
already used generics in Chapter 6 with <code>Option<T></code>, Chapter 8 with <code>Vec<T></code>
|
||
and <code>HashMap<K, V></code>, and Chapter 9 with <code>Result<T, E></code>. In this chapter, you’ll
|
||
explore how to define your own types, functions, and methods with generics!</p>
|
||
<p>First, we’ll review how to extract a function to reduce code duplication. Next,
|
||
we’ll use the same technique to make a generic function from two functions that
|
||
differ only in the types of their parameters. We’ll also explain how to use
|
||
generic types in struct and enum definitions.</p>
|
||
<p>Then you’ll learn how to use <em>traits</em> to define behavior in a generic way. You
|
||
can combine traits with generic types to constrain a generic type to only
|
||
those types that have a particular behavior, as opposed to just any type.</p>
|
||
<p>Finally, we’ll discuss <em>lifetimes</em>, a variety of generics that give the
|
||
compiler information about how references relate to each other. Lifetimes allow
|
||
us to borrow values in many situations while still enabling the compiler to
|
||
check that the references are valid.</p>
|
||
<h2><a class="header" href="#removing-duplication-by-extracting-a-function" id="removing-duplication-by-extracting-a-function">Removing Duplication by Extracting a Function</a></h2>
|
||
<p>Before diving into generics syntax, let’s first look at how to remove
|
||
duplication that doesn’t involve generic types by extracting a function. Then
|
||
we’ll apply this technique to extract a generic function! In the same way that
|
||
you recognize duplicated code to extract into a function, you’ll start to
|
||
recognize duplicated code that can use generics.</p>
|
||
<p>Consider a short program that finds the largest number in a list, as shown in
|
||
Listing 10-1.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let mut largest = number_list[0];
|
||
|
||
for number in number_list {
|
||
if number > largest {
|
||
largest = number;
|
||
}
|
||
}
|
||
|
||
println!("The largest number is {}", largest);
|
||
<span class="boring"> assert_eq!(largest, 100);
|
||
</span>}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-1: Code to find the largest number in a list
|
||
of numbers</span></p>
|
||
<p>This code stores a list of integers in the variable <code>number_list</code> and places
|
||
the first number in the list in a variable named <code>largest</code>. Then it iterates
|
||
through all the numbers in the list, and if the current number is greater than
|
||
the number stored in <code>largest</code>, it replaces the number in that variable.
|
||
However, if the current number is less than or equal to the largest number seen
|
||
so far, the variable doesn’t change, and the code moves on to the next number
|
||
in the list. After considering all the numbers in the list, <code>largest</code> should
|
||
hold the largest number, which in this case is 100.</p>
|
||
<p>To find the largest number in two different lists of numbers, we can duplicate
|
||
the code in Listing 10-1 and use the same logic at two different places in the
|
||
program, as shown in Listing 10-2.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let mut largest = number_list[0];
|
||
|
||
for number in number_list {
|
||
if number > largest {
|
||
largest = number;
|
||
}
|
||
}
|
||
|
||
println!("The largest number is {}", largest);
|
||
|
||
let number_list = vec![102, 34, 6000, 89, 54, 2, 43, 8];
|
||
|
||
let mut largest = number_list[0];
|
||
|
||
for number in number_list {
|
||
if number > largest {
|
||
largest = number;
|
||
}
|
||
}
|
||
|
||
println!("The largest number is {}", largest);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-2: Code to find the largest number in <em>two</em>
|
||
lists of numbers</span></p>
|
||
<p>Although this code works, duplicating code is tedious and error prone. We also
|
||
have to update the code in multiple places when we want to change it.</p>
|
||
<p>To eliminate this duplication, we can create an abstraction by defining a
|
||
function that operates on any list of integers given to it in a parameter. This
|
||
solution makes our code clearer and lets us express the concept of finding the
|
||
largest number in a list abstractly.</p>
|
||
<p>In Listing 10-3, we extracted the code that finds the largest number into a
|
||
function named <code>largest</code>. Unlike the code in Listing 10-1, which can find the
|
||
largest number in only one particular list, this program can find the largest
|
||
number in two different lists.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn largest(list: &[i32]) -> i32 {
|
||
let mut largest = list[0];
|
||
|
||
for &item in list.iter() {
|
||
if item > largest {
|
||
largest = item;
|
||
}
|
||
}
|
||
|
||
largest
|
||
}
|
||
|
||
fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let result = largest(&number_list);
|
||
println!("The largest number is {}", result);
|
||
<span class="boring"> assert_eq!(result, 100);
|
||
</span>
|
||
let number_list = vec![102, 34, 6000, 89, 54, 2, 43, 8];
|
||
|
||
let result = largest(&number_list);
|
||
println!("The largest number is {}", result);
|
||
<span class="boring"> assert_eq!(result, 6000);
|
||
</span>}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-3: Abstracted code to find the largest number
|
||
in two lists</span></p>
|
||
<p>The <code>largest</code> function has a parameter called <code>list</code>, which represents any
|
||
concrete slice of <code>i32</code> values that we might pass into the function. As a
|
||
result, when we call the function, the code runs on the specific values that we
|
||
pass in.</p>
|
||
<p>In sum, here are the steps we took to change the code from Listing 10-2 to
|
||
Listing 10-3:</p>
|
||
<ol>
|
||
<li>Identify duplicate code.</li>
|
||
<li>Extract the duplicate code into the body of the function and specify the
|
||
inputs and return values of that code in the function signature.</li>
|
||
<li>Update the two instances of duplicated code to call the function instead.</li>
|
||
</ol>
|
||
<p>Next, we’ll use these same steps with generics to reduce code duplication in
|
||
different ways. In the same way that the function body can operate on an
|
||
abstract <code>list</code> instead of specific values, generics allow code to operate on
|
||
abstract types.</p>
|
||
<p>For example, say we had two functions: one that finds the largest item in a
|
||
slice of <code>i32</code> values and one that finds the largest item in a slice of <code>char</code>
|
||
values. How would we eliminate that duplication? Let’s find out!</p>
|
||
<h2><a class="header" href="#generic-data-types" id="generic-data-types">Generic Data Types</a></h2>
|
||
<p>We can use generics to create definitions for items like function signatures or
|
||
structs, which we can then use with many different concrete data types. Let’s
|
||
first look at how to define functions, structs, enums, and methods using
|
||
generics. Then we’ll discuss how generics affect code performance.</p>
|
||
<h3><a class="header" href="#in-function-definitions" id="in-function-definitions">In Function Definitions</a></h3>
|
||
<p>When defining a function that uses generics, we place the generics in the
|
||
signature of the function where we would usually specify the data types of the
|
||
parameters and return value. Doing so makes our code more flexible and provides
|
||
more functionality to callers of our function while preventing code duplication.</p>
|
||
<p>Continuing with our <code>largest</code> function, Listing 10-4 shows two functions that
|
||
both find the largest value in a slice.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn largest_i32(list: &[i32]) -> i32 {
|
||
let mut largest = list[0];
|
||
|
||
for &item in list.iter() {
|
||
if item > largest {
|
||
largest = item;
|
||
}
|
||
}
|
||
|
||
largest
|
||
}
|
||
|
||
fn largest_char(list: &[char]) -> char {
|
||
let mut largest = list[0];
|
||
|
||
for &item in list.iter() {
|
||
if item > largest {
|
||
largest = item;
|
||
}
|
||
}
|
||
|
||
largest
|
||
}
|
||
|
||
fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let result = largest_i32(&number_list);
|
||
println!("The largest number is {}", result);
|
||
<span class="boring"> assert_eq!(result, 100);
|
||
</span>
|
||
let char_list = vec!['y', 'm', 'a', 'q'];
|
||
|
||
let result = largest_char(&char_list);
|
||
println!("The largest char is {}", result);
|
||
<span class="boring"> assert_eq!(result, 'y');
|
||
</span>}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-4: Two functions that differ only in their
|
||
names and the types in their signatures</span></p>
|
||
<p>The <code>largest_i32</code> function is the one we extracted in Listing 10-3 that finds
|
||
the largest <code>i32</code> in a slice. The <code>largest_char</code> function finds the largest
|
||
<code>char</code> in a slice. The function bodies have the same code, so let’s eliminate
|
||
the duplication by introducing a generic type parameter in a single function.</p>
|
||
<p>To parameterize the types in the new function we’ll define, we need to name the
|
||
type parameter, just as we do for the value parameters to a function. You can
|
||
use any identifier as a type parameter name. But we’ll use <code>T</code> because, by
|
||
convention, parameter names in Rust are short, often just a letter, and Rust’s
|
||
type-naming convention is CamelCase. Short for “type,” <code>T</code> is the default
|
||
choice of most Rust programmers.</p>
|
||
<p>When we use a parameter in the body of the function, we have to declare the
|
||
parameter name in the signature so the compiler knows what that name means.
|
||
Similarly, when we use a type parameter name in a function signature, we have
|
||
to declare the type parameter name before we use it. To define the generic
|
||
<code>largest</code> function, place type name declarations inside angle brackets, <code><></code>,
|
||
between the name of the function and the parameter list, like this:</p>
|
||
<pre><code class="language-rust ignore">fn largest<T>(list: &[T]) -> T {
|
||
</code></pre>
|
||
<p>We read this definition as: the function <code>largest</code> is generic over some type
|
||
<code>T</code>. This function has one parameter named <code>list</code>, which is a slice of values
|
||
of type <code>T</code>. The <code>largest</code> function will return a value of the same type <code>T</code>.</p>
|
||
<p>Listing 10-5 shows the combined <code>largest</code> function definition using the generic
|
||
data type in its signature. The listing also shows how we can call the function
|
||
with either a slice of <code>i32</code> values or <code>char</code> values. Note that this code won’t
|
||
compile yet, but we’ll fix it later in this chapter.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn largest<T>(list: &[T]) -> T {
|
||
let mut largest = list[0];
|
||
|
||
for &item in list.iter() {
|
||
if item > largest {
|
||
largest = item;
|
||
}
|
||
}
|
||
|
||
largest
|
||
}
|
||
|
||
fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let result = largest(&number_list);
|
||
println!("The largest number is {}", result);
|
||
|
||
let char_list = vec!['y', 'm', 'a', 'q'];
|
||
|
||
let result = largest(&char_list);
|
||
println!("The largest char is {}", result);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-5: A definition of the <code>largest</code> function that
|
||
uses generic type parameters but doesn’t compile yet</span></p>
|
||
<p>If we compile this code right now, we’ll get this error:</p>
|
||
<pre><code class="language-text">error[E0369]: binary operation `>` cannot be applied to type `T`
|
||
--> src/main.rs:5:12
|
||
|
|
||
5 | if item > largest {
|
||
| ^^^^^^^^^^^^^^
|
||
|
|
||
= note: an implementation of `std::cmp::PartialOrd` might be missing for `T`
|
||
</code></pre>
|
||
<p>The note mentions <code>std::cmp::PartialOrd</code>, which is a <em>trait</em>. We’ll talk about
|
||
traits in the next section. For now, this error states that the body of
|
||
<code>largest</code> won’t work for all possible types that <code>T</code> could be. Because we want
|
||
to compare values of type <code>T</code> in the body, we can only use types whose values
|
||
can be ordered. To enable comparisons, the standard library has the
|
||
<code>std::cmp::PartialOrd</code> trait that you can implement on types (see Appendix C
|
||
for more on this trait). You’ll learn how to specify that a generic type has a
|
||
particular trait in the <a href="ch10-02-traits.html#traits-as-parameters">“Traits as Parameters”</a><!--
|
||
ignore --> section, but let’s first explore other ways of using generic type
|
||
parameters.</p>
|
||
<h3><a class="header" href="#in-struct-definitions" id="in-struct-definitions">In Struct Definitions</a></h3>
|
||
<p>We can also define structs to use a generic type parameter in one or more
|
||
fields using the <code><></code> syntax. Listing 10-6 shows how to define a <code>Point<T></code>
|
||
struct to hold <code>x</code> and <code>y</code> coordinate values of any type.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point<T> {
|
||
x: T,
|
||
y: T,
|
||
}
|
||
|
||
fn main() {
|
||
let integer = Point { x: 5, y: 10 };
|
||
let float = Point { x: 1.0, y: 4.0 };
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-6: A <code>Point<T></code> struct that holds <code>x</code> and <code>y</code>
|
||
values of type <code>T</code></span></p>
|
||
<p>The syntax for using generics in struct definitions is similar to that used in
|
||
function definitions. First, we declare the name of the type parameter inside
|
||
angle brackets just after the name of the struct. Then we can use the generic
|
||
type in the struct definition where we would otherwise specify concrete data
|
||
types.</p>
|
||
<p>Note that because we’ve used only one generic type to define <code>Point<T></code>, this
|
||
definition says that the <code>Point<T></code> struct is generic over some type <code>T</code>, and
|
||
the fields <code>x</code> and <code>y</code> are <em>both</em> that same type, whatever that type may be. If
|
||
we create an instance of a <code>Point<T></code> that has values of different types, as in
|
||
Listing 10-7, our code won’t compile.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">struct Point<T> {
|
||
x: T,
|
||
y: T,
|
||
}
|
||
|
||
fn main() {
|
||
let wont_work = Point { x: 5, y: 4.0 };
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-7: The fields <code>x</code> and <code>y</code> must be the same
|
||
type because both have the same generic data type <code>T</code>.</span></p>
|
||
<p>In this example, when we assign the integer value 5 to <code>x</code>, we let the
|
||
compiler know that the generic type <code>T</code> will be an integer for this instance of
|
||
<code>Point<T></code>. Then when we specify 4.0 for <code>y</code>, which we’ve defined to have the
|
||
same type as <code>x</code>, we’ll get a type mismatch error like this:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:7:38
|
||
|
|
||
7 | let wont_work = Point { x: 5, y: 4.0 };
|
||
| ^^^ expected integer, found
|
||
floating-point number
|
||
|
|
||
= note: expected type `{integer}`
|
||
found type `{float}`
|
||
</code></pre>
|
||
<p>To define a <code>Point</code> struct where <code>x</code> and <code>y</code> are both generics but could have
|
||
different types, we can use multiple generic type parameters. For example, in
|
||
Listing 10-8, we can change the definition of <code>Point</code> to be generic over types
|
||
<code>T</code> and <code>U</code> where <code>x</code> is of type <code>T</code> and <code>y</code> is of type <code>U</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point<T, U> {
|
||
x: T,
|
||
y: U,
|
||
}
|
||
|
||
fn main() {
|
||
let both_integer = Point { x: 5, y: 10 };
|
||
let both_float = Point { x: 1.0, y: 4.0 };
|
||
let integer_and_float = Point { x: 5, y: 4.0 };
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-8: A <code>Point<T, U></code> generic over two types so
|
||
that <code>x</code> and <code>y</code> can be values of different types</span></p>
|
||
<p>Now all the instances of <code>Point</code> shown are allowed! You can use as many generic
|
||
type parameters in a definition as you want, but using more than a few makes
|
||
your code hard to read. When you need lots of generic types in your code, it
|
||
could indicate that your code needs restructuring into smaller pieces.</p>
|
||
<h3><a class="header" href="#in-enum-definitions" id="in-enum-definitions">In Enum Definitions</a></h3>
|
||
<p>As we did with structs, we can define enums to hold generic data types in their
|
||
variants. Let’s take another look at the <code>Option<T></code> enum that the standard
|
||
library provides, which we used in Chapter 6:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Option<T> {
|
||
Some(T),
|
||
None,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This definition should now make more sense to you. As you can see, <code>Option<T></code>
|
||
is an enum that is generic over type <code>T</code> and has two variants: <code>Some</code>, which
|
||
holds one value of type <code>T</code>, and a <code>None</code> variant that doesn’t hold any value.
|
||
By using the <code>Option<T></code> enum, we can express the abstract concept of having an
|
||
optional value, and because <code>Option<T></code> is generic, we can use this abstraction
|
||
no matter what the type of the optional value is.</p>
|
||
<p>Enums can use multiple generic types as well. The definition of the <code>Result</code>
|
||
enum that we used in Chapter 9 is one example:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Result<T, E> {
|
||
Ok(T),
|
||
Err(E),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>Result</code> enum is generic over two types, <code>T</code> and <code>E</code>, and has two variants:
|
||
<code>Ok</code>, which holds a value of type <code>T</code>, and <code>Err</code>, which holds a value of type
|
||
<code>E</code>. This definition makes it convenient to use the <code>Result</code> enum anywhere we
|
||
have an operation that might succeed (return a value of some type <code>T</code>) or fail
|
||
(return an error of some type <code>E</code>). In fact, this is what we used to open a
|
||
file in Listing 9-3, where <code>T</code> was filled in with the type <code>std::fs::File</code> when
|
||
the file was opened successfully and <code>E</code> was filled in with the type
|
||
<code>std::io::Error</code> when there were problems opening the file.</p>
|
||
<p>When you recognize situations in your code with multiple struct or enum
|
||
definitions that differ only in the types of the values they hold, you can
|
||
avoid duplication by using generic types instead.</p>
|
||
<h3><a class="header" href="#in-method-definitions" id="in-method-definitions">In Method Definitions</a></h3>
|
||
<p>We can implement methods on structs and enums (as we did in Chapter 5) and use
|
||
generic types in their definitions, too. Listing 10-9 shows the <code>Point<T></code>
|
||
struct we defined in Listing 10-6 with a method named <code>x</code> implemented on it.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point<T> {
|
||
x: T,
|
||
y: T,
|
||
}
|
||
|
||
impl<T> Point<T> {
|
||
fn x(&self) -> &T {
|
||
&self.x
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let p = Point { x: 5, y: 10 };
|
||
|
||
println!("p.x = {}", p.x());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-9: Implementing a method named <code>x</code> on the
|
||
<code>Point<T></code> struct that will return a reference to the <code>x</code> field of type
|
||
<code>T</code></span></p>
|
||
<p>Here, we’ve defined a method named <code>x</code> on <code>Point<T></code> that returns a reference
|
||
to the data in the field <code>x</code>.</p>
|
||
<p>Note that we have to declare <code>T</code> just after <code>impl</code> so we can use it to specify
|
||
that we’re implementing methods on the type <code>Point<T></code>. By declaring <code>T</code> as a
|
||
generic type after <code>impl</code>, Rust can identify that the type in the angle
|
||
brackets in <code>Point</code> is a generic type rather than a concrete type.</p>
|
||
<p>We could, for example, implement methods only on <code>Point<f32></code> instances rather
|
||
than on <code>Point<T></code> instances with any generic type. In Listing 10-10 we use the
|
||
concrete type <code>f32</code>, meaning we don’t declare any types after <code>impl</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Point<T> {
|
||
</span><span class="boring"> x: T,
|
||
</span><span class="boring"> y: T,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Point<f32> {
|
||
fn distance_from_origin(&self) -> f32 {
|
||
(self.x.powi(2) + self.y.powi(2)).sqrt()
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-10: An <code>impl</code> block that only applies to a
|
||
struct with a particular concrete type for the generic type parameter <code>T</code></span></p>
|
||
<p>This code means the type <code>Point<f32></code> will have a method named
|
||
<code>distance_from_origin</code> and other instances of <code>Point<T></code> where <code>T</code> is not of
|
||
type <code>f32</code> will not have this method defined. The method measures how far our
|
||
point is from the point at coordinates (0.0, 0.0) and uses mathematical
|
||
operations that are available only for floating point types.</p>
|
||
<p>Generic type parameters in a struct definition aren’t always the same as those
|
||
you use in that struct’s method signatures. For example, Listing 10-11 defines
|
||
the method <code>mixup</code> on the <code>Point<T, U></code> struct from Listing 10-8. The method
|
||
takes another <code>Point</code> as a parameter, which might have different types from the
|
||
<code>self</code> <code>Point</code> we’re calling <code>mixup</code> on. The method creates a new <code>Point</code>
|
||
instance with the <code>x</code> value from the <code>self</code> <code>Point</code> (of type <code>T</code>) and the <code>y</code>
|
||
value from the passed-in <code>Point</code> (of type <code>W</code>).</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point<T, U> {
|
||
x: T,
|
||
y: U,
|
||
}
|
||
|
||
impl<T, U> Point<T, U> {
|
||
fn mixup<V, W>(self, other: Point<V, W>) -> Point<T, W> {
|
||
Point {
|
||
x: self.x,
|
||
y: other.y,
|
||
}
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let p1 = Point { x: 5, y: 10.4 };
|
||
let p2 = Point { x: "Hello", y: 'c'};
|
||
|
||
let p3 = p1.mixup(p2);
|
||
|
||
println!("p3.x = {}, p3.y = {}", p3.x, p3.y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-11: A method that uses different generic types
|
||
from its struct’s definition</span></p>
|
||
<p>In <code>main</code>, we’ve defined a <code>Point</code> that has an <code>i32</code> for <code>x</code> (with value <code>5</code>)
|
||
and an <code>f64</code> for <code>y</code> (with value <code>10.4</code>). The <code>p2</code> variable is a <code>Point</code> struct
|
||
that has a string slice for <code>x</code> (with value <code>"Hello"</code>) and a <code>char</code> for <code>y</code>
|
||
(with value <code>c</code>). Calling <code>mixup</code> on <code>p1</code> with the argument <code>p2</code> gives us <code>p3</code>,
|
||
which will have an <code>i32</code> for <code>x</code>, because <code>x</code> came from <code>p1</code>. The <code>p3</code> variable
|
||
will have a <code>char</code> for <code>y</code>, because <code>y</code> came from <code>p2</code>. The <code>println!</code> macro
|
||
call will print <code>p3.x = 5, p3.y = c</code>.</p>
|
||
<p>The purpose of this example is to demonstrate a situation in which some generic
|
||
parameters are declared with <code>impl</code> and some are declared with the method
|
||
definition. Here, the generic parameters <code>T</code> and <code>U</code> are declared after <code>impl</code>,
|
||
because they go with the struct definition. The generic parameters <code>V</code> and <code>W</code>
|
||
are declared after <code>fn mixup</code>, because they’re only relevant to the method.</p>
|
||
<h3><a class="header" href="#performance-of-code-using-generics" id="performance-of-code-using-generics">Performance of Code Using Generics</a></h3>
|
||
<p>You might be wondering whether there is a runtime cost when you’re using
|
||
generic type parameters. The good news is that Rust implements generics in such
|
||
a way that your code doesn’t run any slower using generic types than it would
|
||
with concrete types.</p>
|
||
<p>Rust accomplishes this by performing monomorphization of the code that is using
|
||
generics at compile time. <em>Monomorphization</em> is the process of turning generic
|
||
code into specific code by filling in the concrete types that are used when
|
||
compiled.</p>
|
||
<p>In this process, the compiler does the opposite of the steps we used to create
|
||
the generic function in Listing 10-5: the compiler looks at all the places
|
||
where generic code is called and generates code for the concrete types the
|
||
generic code is called with.</p>
|
||
<p>Let’s look at how this works with an example that uses the standard library’s
|
||
<code>Option<T></code> enum:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let integer = Some(5);
|
||
let float = Some(5.0);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>When Rust compiles this code, it performs monomorphization. During that
|
||
process, the compiler reads the values that have been used in <code>Option<T></code>
|
||
instances and identifies two kinds of <code>Option<T></code>: one is <code>i32</code> and the other
|
||
is <code>f64</code>. As such, it expands the generic definition of <code>Option<T></code> into
|
||
<code>Option_i32</code> and <code>Option_f64</code>, thereby replacing the generic definition with
|
||
the specific ones.</p>
|
||
<p>The monomorphized version of the code looks like the following. The generic
|
||
<code>Option<T></code> is replaced with the specific definitions created by the compiler:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">enum Option_i32 {
|
||
Some(i32),
|
||
None,
|
||
}
|
||
|
||
enum Option_f64 {
|
||
Some(f64),
|
||
None,
|
||
}
|
||
|
||
fn main() {
|
||
let integer = Option_i32::Some(5);
|
||
let float = Option_f64::Some(5.0);
|
||
}
|
||
</code></pre></pre>
|
||
<p>Because Rust compiles generic code into code that specifies the type in each
|
||
instance, we pay no runtime cost for using generics. When the code runs, it
|
||
performs just as it would if we had duplicated each definition by hand. The
|
||
process of monomorphization makes Rust’s generics extremely efficient at
|
||
runtime.</p>
|
||
<h2><a class="header" href="#traits-defining-shared-behavior" id="traits-defining-shared-behavior">Traits: Defining Shared Behavior</a></h2>
|
||
<p>A <em>trait</em> tells the Rust compiler about functionality a particular type has and
|
||
can share with other types. We can use traits to define shared behavior in an
|
||
abstract way. We can use trait bounds to specify that a generic can be any type
|
||
that has certain behavior.</p>
|
||
<blockquote>
|
||
<p>Note: Traits are similar to a feature often called <em>interfaces</em> in other
|
||
languages, although with some differences.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#defining-a-trait" id="defining-a-trait">Defining a Trait</a></h3>
|
||
<p>A type’s behavior consists of the methods we can call on that type. Different
|
||
types share the same behavior if we can call the same methods on all of those
|
||
types. Trait definitions are a way to group method signatures together to
|
||
define a set of behaviors necessary to accomplish some purpose.</p>
|
||
<p>For example, let’s say we have multiple structs that hold various kinds and
|
||
amounts of text: a <code>NewsArticle</code> struct that holds a news story filed in a
|
||
particular location and a <code>Tweet</code> that can have at most 280 characters along
|
||
with metadata that indicates whether it was a new tweet, a retweet, or a reply
|
||
to another tweet.</p>
|
||
<p>We want to make a media aggregator library that can display summaries of data
|
||
that might be stored in a <code>NewsArticle</code> or <code>Tweet</code> instance. To do this, we
|
||
need a summary from each type, and we need to request that summary by calling a
|
||
<code>summarize</code> method on an instance. Listing 10-12 shows the definition of a
|
||
<code>Summary</code> trait that expresses this behavior.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Summary {
|
||
fn summarize(&self) -> String;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-12: A <code>Summary</code> trait that consists of the
|
||
behavior provided by a <code>summarize</code> method</span></p>
|
||
<p>Here, we declare a trait using the <code>trait</code> keyword and then the trait’s name,
|
||
which is <code>Summary</code> in this case. Inside the curly brackets, we declare the
|
||
method signatures that describe the behaviors of the types that implement this
|
||
trait, which in this case is <code>fn summarize(&self) -> String</code>.</p>
|
||
<p>After the method signature, instead of providing an implementation within curly
|
||
brackets, we use a semicolon. Each type implementing this trait must provide
|
||
its own custom behavior for the body of the method. The compiler will enforce
|
||
that any type that has the <code>Summary</code> trait will have the method <code>summarize</code>
|
||
defined with this signature exactly.</p>
|
||
<p>A trait can have multiple methods in its body: the method signatures are listed
|
||
one per line and each line ends in a semicolon.</p>
|
||
<h3><a class="header" href="#implementing-a-trait-on-a-type" id="implementing-a-trait-on-a-type">Implementing a Trait on a Type</a></h3>
|
||
<p>Now that we’ve defined the desired behavior using the <code>Summary</code> trait, we can
|
||
implement it on the types in our media aggregator. Listing 10-13 shows an
|
||
implementation of the <code>Summary</code> trait on the <code>NewsArticle</code> struct that uses the
|
||
headline, the author, and the location to create the return value of
|
||
<code>summarize</code>. For the <code>Tweet</code> struct, we define <code>summarize</code> as the username
|
||
followed by the entire text of the tweet, assuming that tweet content is
|
||
already limited to 280 characters.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub trait Summary {
|
||
</span><span class="boring"> fn summarize(&self) -> String;
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>pub struct NewsArticle {
|
||
pub headline: String,
|
||
pub location: String,
|
||
pub author: String,
|
||
pub content: String,
|
||
}
|
||
|
||
impl Summary for NewsArticle {
|
||
fn summarize(&self) -> String {
|
||
format!("{}, by {} ({})", self.headline, self.author, self.location)
|
||
}
|
||
}
|
||
|
||
pub struct Tweet {
|
||
pub username: String,
|
||
pub content: String,
|
||
pub reply: bool,
|
||
pub retweet: bool,
|
||
}
|
||
|
||
impl Summary for Tweet {
|
||
fn summarize(&self) -> String {
|
||
format!("{}: {}", self.username, self.content)
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-13: Implementing the <code>Summary</code> trait on the
|
||
<code>NewsArticle</code> and <code>Tweet</code> types</span></p>
|
||
<p>Implementing a trait on a type is similar to implementing regular methods. The
|
||
difference is that after <code>impl</code>, we put the trait name that we want to
|
||
implement, then use the <code>for</code> keyword, and then specify the name of the type we
|
||
want to implement the trait for. Within the <code>impl</code> block, we put the method
|
||
signatures that the trait definition has defined. Instead of adding a semicolon
|
||
after each signature, we use curly brackets and fill in the method body with
|
||
the specific behavior that we want the methods of the trait to have for the
|
||
particular type.</p>
|
||
<p>After implementing the trait, we can call the methods on instances of
|
||
<code>NewsArticle</code> and <code>Tweet</code> in the same way we call regular methods, like this:</p>
|
||
<pre><code class="language-rust ignore">let tweet = Tweet {
|
||
username: String::from("horse_ebooks"),
|
||
content: String::from("of course, as you probably already know, people"),
|
||
reply: false,
|
||
retweet: false,
|
||
};
|
||
|
||
println!("1 new tweet: {}", tweet.summarize());
|
||
</code></pre>
|
||
<p>This code prints <code>1 new tweet: horse_ebooks: of course, as you probably already know, people</code>.</p>
|
||
<p>Note that because we defined the <code>Summary</code> trait and the <code>NewsArticle</code> and
|
||
<code>Tweet</code> types in the same <em>lib.rs</em> in Listing 10-13, they’re all in the same
|
||
scope. Let’s say this <em>lib.rs</em> is for a crate we’ve called <code>aggregator</code> and
|
||
someone else wants to use our crate’s functionality to implement the <code>Summary</code>
|
||
trait on a struct defined within their library’s scope. They would need to
|
||
bring the trait into their scope first. They would do so by specifying <code>use aggregator::Summary;</code>, which then would enable them to implement <code>Summary</code> for
|
||
their type. The <code>Summary</code> trait would also need to be a public trait for
|
||
another crate to implement it, which it is because we put the <code>pub</code> keyword
|
||
before <code>trait</code> in Listing 10-12.</p>
|
||
<p>One restriction to note with trait implementations is that we can implement a
|
||
trait on a type only if either the trait or the type is local to our crate.
|
||
For example, we can implement standard library traits like <code>Display</code> on a
|
||
custom type like <code>Tweet</code> as part of our <code>aggregator</code> crate functionality,
|
||
because the type <code>Tweet</code> is local to our <code>aggregator</code> crate. We can also
|
||
implement <code>Summary</code> on <code>Vec<T></code> in our <code>aggregator</code> crate, because the
|
||
trait <code>Summary</code> is local to our <code>aggregator</code> crate.</p>
|
||
<p>But we can’t implement external traits on external types. For example, we can’t
|
||
implement the <code>Display</code> trait on <code>Vec<T></code> within our <code>aggregator</code> crate,
|
||
because <code>Display</code> and <code>Vec<T></code> are defined in the standard library and aren’t
|
||
local to our <code>aggregator</code> crate. This restriction is part of a property of
|
||
programs called <em>coherence</em>, and more specifically the <em>orphan rule</em>, so named
|
||
because the parent type is not present. This rule ensures that other people’s
|
||
code can’t break your code and vice versa. Without the rule, two crates could
|
||
implement the same trait for the same type, and Rust wouldn’t know which
|
||
implementation to use.</p>
|
||
<h3><a class="header" href="#default-implementations" id="default-implementations">Default Implementations</a></h3>
|
||
<p>Sometimes it’s useful to have default behavior for some or all of the methods
|
||
in a trait instead of requiring implementations for all methods on every type.
|
||
Then, as we implement the trait on a particular type, we can keep or override
|
||
each method’s default behavior.</p>
|
||
<p>Listing 10-14 shows how to specify a default string for the <code>summarize</code> method
|
||
of the <code>Summary</code> trait instead of only defining the method signature, as we did
|
||
in Listing 10-12.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Summary {
|
||
fn summarize(&self) -> String {
|
||
String::from("(Read more...)")
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-14: Definition of a <code>Summary</code> trait with a
|
||
default implementation of the <code>summarize</code> method</span></p>
|
||
<p>To use a default implementation to summarize instances of <code>NewsArticle</code> instead
|
||
of defining a custom implementation, we specify an empty <code>impl</code> block with
|
||
<code>impl Summary for NewsArticle {}</code>.</p>
|
||
<p>Even though we’re no longer defining the <code>summarize</code> method on <code>NewsArticle</code>
|
||
directly, we’ve provided a default implementation and specified that
|
||
<code>NewsArticle</code> implements the <code>Summary</code> trait. As a result, we can still call
|
||
the <code>summarize</code> method on an instance of <code>NewsArticle</code>, like this:</p>
|
||
<pre><code class="language-rust ignore">let article = NewsArticle {
|
||
headline: String::from("Penguins win the Stanley Cup Championship!"),
|
||
location: String::from("Pittsburgh, PA, USA"),
|
||
author: String::from("Iceburgh"),
|
||
content: String::from("The Pittsburgh Penguins once again are the best
|
||
hockey team in the NHL."),
|
||
};
|
||
|
||
println!("New article available! {}", article.summarize());
|
||
</code></pre>
|
||
<p>This code prints <code>New article available! (Read more...)</code>.</p>
|
||
<p>Creating a default implementation for <code>summarize</code> doesn’t require us to change
|
||
anything about the implementation of <code>Summary</code> on <code>Tweet</code> in Listing 10-13. The
|
||
reason is that the syntax for overriding a default implementation is the same
|
||
as the syntax for implementing a trait method that doesn’t have a default
|
||
implementation.</p>
|
||
<p>Default implementations can call other methods in the same trait, even if those
|
||
other methods don’t have a default implementation. In this way, a trait can
|
||
provide a lot of useful functionality and only require implementors to specify
|
||
a small part of it. For example, we could define the <code>Summary</code> trait to have a
|
||
<code>summarize_author</code> method whose implementation is required, and then define a
|
||
<code>summarize</code> method that has a default implementation that calls the
|
||
<code>summarize_author</code> method:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Summary {
|
||
fn summarize_author(&self) -> String;
|
||
|
||
fn summarize(&self) -> String {
|
||
format!("(Read more from {}...)", self.summarize_author())
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>To use this version of <code>Summary</code>, we only need to define <code>summarize_author</code>
|
||
when we implement the trait on a type:</p>
|
||
<pre><code class="language-rust ignore">impl Summary for Tweet {
|
||
fn summarize_author(&self) -> String {
|
||
format!("@{}", self.username)
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>After we define <code>summarize_author</code>, we can call <code>summarize</code> on instances of the
|
||
<code>Tweet</code> struct, and the default implementation of <code>summarize</code> will call the
|
||
definition of <code>summarize_author</code> that we’ve provided. Because we’ve implemented
|
||
<code>summarize_author</code>, the <code>Summary</code> trait has given us the behavior of the
|
||
<code>summarize</code> method without requiring us to write any more code.</p>
|
||
<pre><code class="language-rust ignore">let tweet = Tweet {
|
||
username: String::from("horse_ebooks"),
|
||
content: String::from("of course, as you probably already know, people"),
|
||
reply: false,
|
||
retweet: false,
|
||
};
|
||
|
||
println!("1 new tweet: {}", tweet.summarize());
|
||
</code></pre>
|
||
<p>This code prints <code>1 new tweet: (Read more from @horse_ebooks...)</code>.</p>
|
||
<p>Note that it isn’t possible to call the default implementation from an
|
||
overriding implementation of that same method.</p>
|
||
<h3><a class="header" href="#traits-as-parameters" id="traits-as-parameters">Traits as Parameters</a></h3>
|
||
<p>Now that you know how to define and implement traits, we can explore how to use
|
||
traits to define functions that accept many different types.</p>
|
||
<p>For example, in Listing 10-13, we implemented the <code>Summary</code> trait on the
|
||
<code>NewsArticle</code> and <code>Tweet</code> types. We can define a <code>notify</code> function that calls
|
||
the <code>summarize</code> method on its <code>item</code> parameter, which is of some type that
|
||
implements the <code>Summary</code> trait. To do this, we can use the <code>impl Trait</code>
|
||
syntax, like this:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify(item: impl Summary) {
|
||
println!("Breaking news! {}", item.summarize());
|
||
}
|
||
</code></pre>
|
||
<p>Instead of a concrete type for the <code>item</code> parameter, we specify the <code>impl</code>
|
||
keyword and the trait name. This parameter accepts any type that implements the
|
||
specified trait. In the body of <code>notify</code>, we can call any methods on <code>item</code>
|
||
that come from the <code>Summary</code> trait, such as <code>summarize</code>. We can call <code>notify</code>
|
||
and pass in any instance of <code>NewsArticle</code> or <code>Tweet</code>. Code that calls the
|
||
function with any other type, such as a <code>String</code> or an <code>i32</code>, won’t compile
|
||
because those types don’t implement <code>Summary</code>.</p>
|
||
<h4><a class="header" href="#trait-bound-syntax" id="trait-bound-syntax">Trait Bound Syntax</a></h4>
|
||
<p>The <code>impl Trait</code> syntax works for straightforward cases but is actually
|
||
syntax sugar for a longer form, which is called a <em>trait bound</em>; it looks like
|
||
this:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify<T: Summary>(item: T) {
|
||
println!("Breaking news! {}", item.summarize());
|
||
}
|
||
</code></pre>
|
||
<p>This longer form is equivalent to the example in the previous section but is
|
||
more verbose. We place trait bounds with the declaration of the generic type
|
||
parameter after a colon and inside angle brackets.</p>
|
||
<p>The <code>impl Trait</code> syntax is convenient and makes for more concise code in simple
|
||
cases. The trait bound syntax can express more complexity in other cases. For
|
||
example, we can have two parameters that implement <code>Summary</code>. Using the <code>impl Trait</code> syntax looks like this:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify(item1: impl Summary, item2: impl Summary) {
|
||
</code></pre>
|
||
<p>If we wanted this function to allow <code>item1</code> and <code>item2</code> to have different
|
||
types, using <code>impl Trait</code> would be appropriate (as long as both types implement
|
||
<code>Summary</code>). If we wanted to force both parameters to have the same type, that’s
|
||
only possible to express using a trait bound, like this:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify<T: Summary>(item1: T, item2: T) {
|
||
</code></pre>
|
||
<p>The generic type <code>T</code> specified as the type of the <code>item1</code> and <code>item2</code>
|
||
parameters constrains the function such that the concrete type of the value
|
||
passed as an argument for <code>item1</code> and <code>item2</code> must be the same.</p>
|
||
<h4><a class="header" href="#specifying-multiple-trait-bounds-with-the--syntax" id="specifying-multiple-trait-bounds-with-the--syntax">Specifying Multiple Trait Bounds with the <code>+</code> Syntax</a></h4>
|
||
<p>We can also specify more than one trait bound. Say we wanted <code>notify</code> to use
|
||
display formatting on <code>item</code> as well as the <code>summarize</code> method: we specify in
|
||
the <code>notify</code> definition that <code>item</code> must implement both <code>Display</code> and
|
||
<code>Summary</code>. We can do so using the <code>+</code> syntax:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify(item: impl Summary + Display) {
|
||
</code></pre>
|
||
<p>The <code>+</code> syntax is also valid with trait bounds on generic types:</p>
|
||
<pre><code class="language-rust ignore">pub fn notify<T: Summary + Display>(item: T) {
|
||
</code></pre>
|
||
<p>With the two trait bounds specified, the body of <code>notify</code> can call <code>summarize</code>
|
||
and use <code>{}</code> to format <code>item</code>.</p>
|
||
<h4><a class="header" href="#clearer-trait-bounds-with-where-clauses" id="clearer-trait-bounds-with-where-clauses">Clearer Trait Bounds with <code>where</code> Clauses</a></h4>
|
||
<p>Using too many trait bounds has its downsides. Each generic has its own trait
|
||
bounds, so functions with multiple generic type parameters can contain lots of
|
||
trait bound information between the function’s name and its parameter list,
|
||
making the function signature hard to read. For this reason, Rust has alternate
|
||
syntax for specifying trait bounds inside a <code>where</code> clause after the function
|
||
signature. So instead of writing this:</p>
|
||
<pre><code class="language-rust ignore">fn some_function<T: Display + Clone, U: Clone + Debug>(t: T, u: U) -> i32 {
|
||
</code></pre>
|
||
<p>we can use a <code>where</code> clause, like this:</p>
|
||
<pre><code class="language-rust ignore">fn some_function<T, U>(t: T, u: U) -> i32
|
||
where T: Display + Clone,
|
||
U: Clone + Debug
|
||
{
|
||
</code></pre>
|
||
<p>This function’s signature is less cluttered: the function name, parameter list,
|
||
and return type are close together, similar to a function without lots of trait
|
||
bounds.</p>
|
||
<h3><a class="header" href="#returning-types-that-implement-traits" id="returning-types-that-implement-traits">Returning Types that Implement Traits</a></h3>
|
||
<p>We can also use the <code>impl Trait</code> syntax in the return position to return a
|
||
value of some type that implements a trait, as shown here:</p>
|
||
<pre><code class="language-rust ignore">fn returns_summarizable() -> impl Summary {
|
||
Tweet {
|
||
username: String::from("horse_ebooks"),
|
||
content: String::from("of course, as you probably already know, people"),
|
||
reply: false,
|
||
retweet: false,
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>By using <code>impl Summary</code> for the return type, we specify that the
|
||
<code>returns_summarizable</code> function returns some type that implements the <code>Summary</code>
|
||
trait without naming the concrete type. In this case, <code>returns_summarizable</code>
|
||
returns a <code>Tweet</code>, but the code calling this function doesn’t know that.</p>
|
||
<p>The ability to return a type that is only specified by the trait it implements
|
||
is especially useful in the context of closures and iterators, which we cover
|
||
in Chapter 13. Closures and iterators create types that only the compiler knows
|
||
or types that are very long to specify. The <code>impl Trait</code> syntax lets you
|
||
concisely specify that a function returns some type that implements the
|
||
<code>Iterator</code> trait without needing to write out a very long type.</p>
|
||
<p>However, you can only use <code>impl Trait</code> if you’re returning a single type. For
|
||
example, this code that returns either a <code>NewsArticle</code> or a <code>Tweet</code> with the
|
||
return type specified as <code>impl Summary</code> wouldn’t work:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn returns_summarizable(switch: bool) -> impl Summary {
|
||
if switch {
|
||
NewsArticle {
|
||
headline: String::from("Penguins win the Stanley Cup Championship!"),
|
||
location: String::from("Pittsburgh, PA, USA"),
|
||
author: String::from("Iceburgh"),
|
||
content: String::from("The Pittsburgh Penguins once again are the best
|
||
hockey team in the NHL."),
|
||
}
|
||
} else {
|
||
Tweet {
|
||
username: String::from("horse_ebooks"),
|
||
content: String::from("of course, as you probably already know, people"),
|
||
reply: false,
|
||
retweet: false,
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>Returning either a <code>NewsArticle</code> or a <code>Tweet</code> isn’t allowed due to restrictions
|
||
around how the <code>impl Trait</code> syntax is implemented in the compiler. We’ll cover
|
||
how to write a function with this behavior in the <a href="ch17-02-trait-objects.html#using-trait-objects-that-allow-for-values-of-different-types">“Using Trait Objects That
|
||
Allow for Values of Different
|
||
Types”</a><!--
|
||
ignore --> section of Chapter 17.</p>
|
||
<h3><a class="header" href="#fixing-the-largest-function-with-trait-bounds" id="fixing-the-largest-function-with-trait-bounds">Fixing the <code>largest</code> Function with Trait Bounds</a></h3>
|
||
<p>Now that you know how to specify the behavior you want to use using the generic
|
||
type parameter’s bounds, let’s return to Listing 10-5 to fix the definition of
|
||
the <code>largest</code> function that uses a generic type parameter! Last time we tried
|
||
to run that code, we received this error:</p>
|
||
<pre><code class="language-text">error[E0369]: binary operation `>` cannot be applied to type `T`
|
||
--> src/main.rs:5:12
|
||
|
|
||
5 | if item > largest {
|
||
| ^^^^^^^^^^^^^^
|
||
|
|
||
= note: an implementation of `std::cmp::PartialOrd` might be missing for `T`
|
||
</code></pre>
|
||
<p>In the body of <code>largest</code> we wanted to compare two values of type <code>T</code> using the
|
||
greater than (<code>></code>) operator. Because that operator is defined as a default
|
||
method on the standard library trait <code>std::cmp::PartialOrd</code>, we need to specify
|
||
<code>PartialOrd</code> in the trait bounds for <code>T</code> so the <code>largest</code> function can work on
|
||
slices of any type that we can compare. We don’t need to bring <code>PartialOrd</code>
|
||
into scope because it’s in the prelude. Change the signature of <code>largest</code> to
|
||
look like this:</p>
|
||
<pre><code class="language-rust ignore">fn largest<T: PartialOrd>(list: &[T]) -> T {
|
||
</code></pre>
|
||
<p>This time when we compile the code, we get a different set of errors:</p>
|
||
<pre><code class="language-text">error[E0508]: cannot move out of type `[T]`, a non-copy slice
|
||
--> src/main.rs:2:23
|
||
|
|
||
2 | let mut largest = list[0];
|
||
| ^^^^^^^
|
||
| |
|
||
| cannot move out of here
|
||
| help: consider using a reference instead: `&list[0]`
|
||
|
||
error[E0507]: cannot move out of borrowed content
|
||
--> src/main.rs:4:9
|
||
|
|
||
4 | for &item in list.iter() {
|
||
| ^----
|
||
| ||
|
||
| |hint: to prevent move, use `ref item` or `ref mut item`
|
||
| cannot move out of borrowed content
|
||
</code></pre>
|
||
<p>The key line in this error is <code>cannot move out of type [T], a non-copy slice</code>.
|
||
With our non-generic versions of the <code>largest</code> function, we were only trying to
|
||
find the largest <code>i32</code> or <code>char</code>. As discussed in the <a href="ch04-01-what-is-ownership.html#stack-only-data-copy">“Stack-Only Data:
|
||
Copy”</a><!-- ignore --> section in Chapter 4, types like
|
||
<code>i32</code> and <code>char</code> that have a known size can be stored on the stack, so they
|
||
implement the <code>Copy</code> trait. But when we made the <code>largest</code> function generic,
|
||
it became possible for the <code>list</code> parameter to have types in it that don’t
|
||
implement the <code>Copy</code> trait. Consequently, we wouldn’t be able to move the
|
||
value out of <code>list[0]</code> and into the <code>largest</code> variable, resulting in this
|
||
error.</p>
|
||
<p>To call this code with only those types that implement the <code>Copy</code> trait, we can
|
||
add <code>Copy</code> to the trait bounds of <code>T</code>! Listing 10-15 shows the complete code of
|
||
a generic <code>largest</code> function that will compile as long as the types of the
|
||
values in the slice that we pass into the function implement the <code>PartialOrd</code>
|
||
<em>and</em> <code>Copy</code> traits, like <code>i32</code> and <code>char</code> do.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn largest<T: PartialOrd + Copy>(list: &[T]) -> T {
|
||
let mut largest = list[0];
|
||
|
||
for &item in list.iter() {
|
||
if item > largest {
|
||
largest = item;
|
||
}
|
||
}
|
||
|
||
largest
|
||
}
|
||
|
||
fn main() {
|
||
let number_list = vec![34, 50, 25, 100, 65];
|
||
|
||
let result = largest(&number_list);
|
||
println!("The largest number is {}", result);
|
||
|
||
let char_list = vec!['y', 'm', 'a', 'q'];
|
||
|
||
let result = largest(&char_list);
|
||
println!("The largest char is {}", result);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-15: A working definition of the <code>largest</code>
|
||
function that works on any generic type that implements the <code>PartialOrd</code> and
|
||
<code>Copy</code> traits</span></p>
|
||
<p>If we don’t want to restrict the <code>largest</code> function to the types that implement
|
||
the <code>Copy</code> trait, we could specify that <code>T</code> has the trait bound <code>Clone</code> instead
|
||
of <code>Copy</code>. Then we could clone each value in the slice when we want the
|
||
<code>largest</code> function to have ownership. Using the <code>clone</code> function means we’re
|
||
potentially making more heap allocations in the case of types that own heap
|
||
data like <code>String</code>, and heap allocations can be slow if we’re working with
|
||
large amounts of data.</p>
|
||
<p>Another way we could implement <code>largest</code> is for the function to return a
|
||
reference to a <code>T</code> value in the slice. If we change the return type to <code>&T</code>
|
||
instead of <code>T</code>, thereby changing the body of the function to return a
|
||
reference, we wouldn’t need the <code>Clone</code> or <code>Copy</code> trait bounds and we could
|
||
avoid heap allocations. Try implementing these alternate solutions on your own!</p>
|
||
<h3><a class="header" href="#using-trait-bounds-to-conditionally-implement-methods" id="using-trait-bounds-to-conditionally-implement-methods">Using Trait Bounds to Conditionally Implement Methods</a></h3>
|
||
<p>By using a trait bound with an <code>impl</code> block that uses generic type parameters,
|
||
we can implement methods conditionally for types that implement the specified
|
||
traits. For example, the type <code>Pair<T></code> in Listing 10-16 always implements the
|
||
<code>new</code> function. But <code>Pair<T></code> only implements the <code>cmp_display</code> method if its
|
||
inner type <code>T</code> implements the <code>PartialOrd</code> trait that enables comparison <em>and</em>
|
||
the <code>Display</code> trait that enables printing.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::fmt::Display;
|
||
|
||
struct Pair<T> {
|
||
x: T,
|
||
y: T,
|
||
}
|
||
|
||
impl<T> Pair<T> {
|
||
fn new(x: T, y: T) -> Self {
|
||
Self {
|
||
x,
|
||
y,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<T: Display + PartialOrd> Pair<T> {
|
||
fn cmp_display(&self) {
|
||
if self.x >= self.y {
|
||
println!("The largest member is x = {}", self.x);
|
||
} else {
|
||
println!("The largest member is y = {}", self.y);
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-16: Conditionally implement methods on a
|
||
generic type depending on trait bounds</span></p>
|
||
<p>We can also conditionally implement a trait for any type that implements
|
||
another trait. Implementations of a trait on any type that satisfies the trait
|
||
bounds are called <em>blanket implementations</em> and are extensively used in the
|
||
Rust standard library. For example, the standard library implements the
|
||
<code>ToString</code> trait on any type that implements the <code>Display</code> trait. The <code>impl</code>
|
||
block in the standard library looks similar to this code:</p>
|
||
<pre><code class="language-rust ignore">impl<T: Display> ToString for T {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>Because the standard library has this blanket implementation, we can call the
|
||
<code>to_string</code> method defined by the <code>ToString</code> trait on any type that implements
|
||
the <code>Display</code> trait. For example, we can turn integers into their corresponding
|
||
<code>String</code> values like this because integers implement <code>Display</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = 3.to_string();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Blanket implementations appear in the documentation for the trait in the
|
||
“Implementors” section.</p>
|
||
<p>Traits and trait bounds let us write code that uses generic type parameters to
|
||
reduce duplication but also specify to the compiler that we want the generic
|
||
type to have particular behavior. The compiler can then use the trait bound
|
||
information to check that all the concrete types used with our code provide the
|
||
correct behavior. In dynamically typed languages, we would get an error at
|
||
runtime if we called a method on a type which didn’t implement the type which
|
||
defines the method. But Rust moves these errors to compile time so we’re forced
|
||
to fix the problems before our code is even able to run. Additionally, we don’t
|
||
have to write code that checks for behavior at runtime because we’ve already
|
||
checked at compile time. Doing so improves performance without having to give
|
||
up the flexibility of generics.</p>
|
||
<p>Another kind of generic that we’ve already been using is called <em>lifetimes</em>.
|
||
Rather than ensuring that a type has the behavior we want, lifetimes ensure
|
||
that references are valid as long as we need them to be. Let’s look at how
|
||
lifetimes do that.</p>
|
||
<h2><a class="header" href="#validating-references-with-lifetimes" id="validating-references-with-lifetimes">Validating References with Lifetimes</a></h2>
|
||
<p>One detail we didn’t discuss in the <a href="ch04-02-references-and-borrowing.html#references-and-borrowing">“References and
|
||
Borrowing”</a><!-- ignore --> section in Chapter 4 is
|
||
that every reference in Rust has a <em>lifetime</em>, which is the scope for which
|
||
that reference is valid. Most of the time, lifetimes are implicit and
|
||
inferred, just like most of the time, types are inferred. We must annotate
|
||
types when multiple types are possible. In a similar way, we must annotate
|
||
lifetimes when the lifetimes of references could be related in a few different
|
||
ways. Rust requires us to annotate the relationships using generic lifetime
|
||
parameters to ensure the actual references used at runtime will definitely be
|
||
valid.</p>
|
||
<p>The concept of lifetimes is somewhat different from tools in other programming
|
||
languages, arguably making lifetimes Rust’s most distinctive feature. Although
|
||
we won’t cover lifetimes in their entirety in this chapter, we’ll discuss
|
||
common ways you might encounter lifetime syntax so you can become familiar with
|
||
the concepts.</p>
|
||
<h3><a class="header" href="#preventing-dangling-references-with-lifetimes" id="preventing-dangling-references-with-lifetimes">Preventing Dangling References with Lifetimes</a></h3>
|
||
<p>The main aim of lifetimes is to prevent dangling references, which cause a
|
||
program to reference data other than the data it’s intended to reference.
|
||
Consider the program in Listing 10-17, which has an outer scope and an inner
|
||
scope.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">{
|
||
let r;
|
||
|
||
{
|
||
let x = 5;
|
||
r = &x;
|
||
}
|
||
|
||
println!("r: {}", r);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-17: An attempt to use a reference whose value
|
||
has gone out of scope</span></p>
|
||
<blockquote>
|
||
<p>Note: The examples in Listings 10-17, 10-18, and 10-24 declare variables
|
||
without giving them an initial value, so the variable name exists in the
|
||
outer scope. At first glance, this might appear to be in conflict with Rust’s
|
||
having no null values. However, if we try to use a variable before giving it
|
||
a value, we’ll get a compile-time error, which shows that Rust indeed does
|
||
not allow null values.</p>
|
||
</blockquote>
|
||
<p>The outer scope declares a variable named <code>r</code> with no initial value, and the
|
||
inner scope declares a variable named <code>x</code> with the initial value of 5. Inside
|
||
the inner scope, we attempt to set the value of <code>r</code> as a reference to <code>x</code>. Then
|
||
the inner scope ends, and we attempt to print the value in <code>r</code>. This code won’t
|
||
compile because the value <code>r</code> is referring to has gone out of scope before we
|
||
try to use it. Here is the error message:</p>
|
||
<pre><code class="language-text">error[E0597]: `x` does not live long enough
|
||
--> src/main.rs:7:5
|
||
|
|
||
6 | r = &x;
|
||
| - borrow occurs here
|
||
7 | }
|
||
| ^ `x` dropped here while still borrowed
|
||
...
|
||
10 | }
|
||
| - borrowed value needs to live until here
|
||
</code></pre>
|
||
<p>The variable <code>x</code> doesn’t “live long enough.” The reason is that <code>x</code> will be out
|
||
of scope when the inner scope ends on line 7. But <code>r</code> is still valid for the
|
||
outer scope; because its scope is larger, we say that it “lives longer.” If
|
||
Rust allowed this code to work, <code>r</code> would be referencing memory that was
|
||
deallocated when <code>x</code> went out of scope, and anything we tried to do with <code>r</code>
|
||
wouldn’t work correctly. So how does Rust determine that this code is invalid?
|
||
It uses a borrow checker.</p>
|
||
<h3><a class="header" href="#the-borrow-checker" id="the-borrow-checker">The Borrow Checker</a></h3>
|
||
<p>The Rust compiler has a <em>borrow checker</em> that compares scopes to determine
|
||
whether all borrows are valid. Listing 10-18 shows the same code as Listing
|
||
10-17 but with annotations showing the lifetimes of the variables.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">{
|
||
let r; // ---------+-- 'a
|
||
// |
|
||
{ // |
|
||
let x = 5; // -+-- 'b |
|
||
r = &x; // | |
|
||
} // -+ |
|
||
// |
|
||
println!("r: {}", r); // |
|
||
} // ---------+
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-18: Annotations of the lifetimes of <code>r</code> and
|
||
<code>x</code>, named <code>'a</code> and <code>'b</code>, respectively</span></p>
|
||
<p>Here, we’ve annotated the lifetime of <code>r</code> with <code>'a</code> and the lifetime of <code>x</code>
|
||
with <code>'b</code>. As you can see, the inner <code>'b</code> block is much smaller than the outer
|
||
<code>'a</code> lifetime block. At compile time, Rust compares the size of the two
|
||
lifetimes and sees that <code>r</code> has a lifetime of <code>'a</code> but that it refers to memory
|
||
with a lifetime of <code>'b</code>. The program is rejected because <code>'b</code> is shorter than
|
||
<code>'a</code>: the subject of the reference doesn’t live as long as the reference.</p>
|
||
<p>Listing 10-19 fixes the code so it doesn’t have a dangling reference and
|
||
compiles without any errors.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>{
|
||
let x = 5; // ----------+-- 'b
|
||
// |
|
||
let r = &x; // --+-- 'a |
|
||
// | |
|
||
println!("r: {}", r); // | |
|
||
// --+ |
|
||
} // ----------+
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-19: A valid reference because the data has a
|
||
longer lifetime than the reference</span></p>
|
||
<p>Here, <code>x</code> has the lifetime <code>'b</code>, which in this case is larger than <code>'a</code>. This
|
||
means <code>r</code> can reference <code>x</code> because Rust knows that the reference in <code>r</code> will
|
||
always be valid while <code>x</code> is valid.</p>
|
||
<p>Now that you know where the lifetimes of references are and how Rust analyzes
|
||
lifetimes to ensure references will always be valid, let’s explore generic
|
||
lifetimes of parameters and return values in the context of functions.</p>
|
||
<h3><a class="header" href="#generic-lifetimes-in-functions" id="generic-lifetimes-in-functions">Generic Lifetimes in Functions</a></h3>
|
||
<p>Let’s write a function that returns the longer of two string slices. This
|
||
function will take two string slices and return a string slice. After we’ve
|
||
implemented the <code>longest</code> function, the code in Listing 10-20 should print <code>The longest string is abcd</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let string1 = String::from("abcd");
|
||
let string2 = "xyz";
|
||
|
||
let result = longest(string1.as_str(), string2);
|
||
println!("The longest string is {}", result);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-20: A <code>main</code> function that calls the <code>longest</code>
|
||
function to find the longer of two string slices</span></p>
|
||
<p>Note that we want the function to take string slices, which are references,
|
||
because we don’t want the <code>longest</code> function to take ownership of its
|
||
parameters. We want to allow the function to accept slices of a <code>String</code> (the
|
||
type stored in the variable <code>string1</code>) as well as string literals (which is
|
||
what variable <code>string2</code> contains).</p>
|
||
<p>Refer to the <a href="ch04-03-slices.html#string-slices-as-parameters">“String Slices as Parameters”</a><!--
|
||
ignore --> section in Chapter 4 for more discussion about why the parameters we
|
||
use in Listing 10-20 are the ones we want.</p>
|
||
<p>If we try to implement the <code>longest</code> function as shown in Listing 10-21, it
|
||
won’t compile.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn longest(x: &str, y: &str) -> &str {
|
||
if x.len() > y.len() {
|
||
x
|
||
} else {
|
||
y
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-21: An implementation of the <code>longest</code>
|
||
function that returns the longer of two string slices but does not yet
|
||
compile</span></p>
|
||
<p>Instead, we get the following error that talks about lifetimes:</p>
|
||
<pre><code class="language-text">error[E0106]: missing lifetime specifier
|
||
--> src/main.rs:1:33
|
||
|
|
||
1 | fn longest(x: &str, y: &str) -> &str {
|
||
| ^ expected lifetime parameter
|
||
|
|
||
= help: this function's return type contains a borrowed value, but the
|
||
signature does not say whether it is borrowed from `x` or `y`
|
||
</code></pre>
|
||
<p>The help text reveals that the return type needs a generic lifetime parameter
|
||
on it because Rust can’t tell whether the reference being returned refers to
|
||
<code>x</code> or <code>y</code>. Actually, we don’t know either, because the <code>if</code> block in the body
|
||
of this function returns a reference to <code>x</code> and the <code>else</code> block returns a
|
||
reference to <code>y</code>!</p>
|
||
<p>When we’re defining this function, we don’t know the concrete values that will
|
||
be passed into this function, so we don’t know whether the <code>if</code> case or the
|
||
<code>else</code> case will execute. We also don’t know the concrete lifetimes of the
|
||
references that will be passed in, so we can’t look at the scopes as we did in
|
||
Listings 10-18 and 10-19 to determine whether the reference we return will
|
||
always be valid. The borrow checker can’t determine this either, because it
|
||
doesn’t know how the lifetimes of <code>x</code> and <code>y</code> relate to the lifetime of the
|
||
return value. To fix this error, we’ll add generic lifetime parameters that
|
||
define the relationship between the references so the borrow checker can
|
||
perform its analysis.</p>
|
||
<h3><a class="header" href="#lifetime-annotation-syntax" id="lifetime-annotation-syntax">Lifetime Annotation Syntax</a></h3>
|
||
<p>Lifetime annotations don’t change how long any of the references live. Just
|
||
as functions can accept any type when the signature specifies a generic type
|
||
parameter, functions can accept references with any lifetime by specifying a
|
||
generic lifetime parameter. Lifetime annotations describe the relationships of
|
||
the lifetimes of multiple references to each other without affecting the
|
||
lifetimes.</p>
|
||
<p>Lifetime annotations have a slightly unusual syntax: the names of lifetime
|
||
parameters must start with an apostrophe (<code>'</code>) and are usually all lowercase and
|
||
very short, like generic types. Most people use the name <code>'a</code>. We place
|
||
lifetime parameter annotations after the <code>&</code> of a reference, using a space to
|
||
separate the annotation from the reference’s type.</p>
|
||
<p>Here are some examples: a reference to an <code>i32</code> without a lifetime parameter, a
|
||
reference to an <code>i32</code> that has a lifetime parameter named <code>'a</code>, and a mutable
|
||
reference to an <code>i32</code> that also has the lifetime <code>'a</code>.</p>
|
||
<pre><code class="language-rust ignore">&i32 // a reference
|
||
&'a i32 // a reference with an explicit lifetime
|
||
&'a mut i32 // a mutable reference with an explicit lifetime
|
||
</code></pre>
|
||
<p>One lifetime annotation by itself doesn’t have much meaning, because the
|
||
annotations are meant to tell Rust how generic lifetime parameters of multiple
|
||
references relate to each other. For example, let’s say we have a function with
|
||
the parameter <code>first</code> that is a reference to an <code>i32</code> with lifetime <code>'a</code>. The
|
||
function also has another parameter named <code>second</code> that is another reference to
|
||
an <code>i32</code> that also has the lifetime <code>'a</code>. The lifetime annotations indicate
|
||
that the references <code>first</code> and <code>second</code> must both live as long as that generic
|
||
lifetime.</p>
|
||
<h3><a class="header" href="#lifetime-annotations-in-function-signatures" id="lifetime-annotations-in-function-signatures">Lifetime Annotations in Function Signatures</a></h3>
|
||
<p>Now let’s examine lifetime annotations in the context of the <code>longest</code>
|
||
function. As with generic type parameters, we need to declare generic lifetime
|
||
parameters inside angle brackets between the function name and the parameter
|
||
list. The constraint we want to express in this signature is that all the
|
||
references in the parameters and the return value must have the same lifetime.
|
||
We’ll name the lifetime <code>'a</code> and then add it to each reference, as shown in
|
||
Listing 10-22.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn longest<'a>(x: &'a str, y: &'a str) -> &'a str {
|
||
if x.len() > y.len() {
|
||
x
|
||
} else {
|
||
y
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-22: The <code>longest</code> function definition
|
||
specifying that all the references in the signature must have the same lifetime
|
||
<code>'a</code></span></p>
|
||
<p>This code should compile and produce the result we want when we use it with the
|
||
<code>main</code> function in Listing 10-20.</p>
|
||
<p>The function signature now tells Rust that for some lifetime <code>'a</code>, the function
|
||
takes two parameters, both of which are string slices that live at least as
|
||
long as lifetime <code>'a</code>. The function signature also tells Rust that the string
|
||
slice returned from the function will live at least as long as lifetime <code>'a</code>.
|
||
In practice, it means that the lifetime of the reference returned by the
|
||
<code>longest</code> function is the same as the smaller of the lifetimes of the
|
||
references passed in. These constraints are what we want Rust to enforce.
|
||
Remember, when we specify the lifetime parameters in this function signature,
|
||
we’re not changing the lifetimes of any values passed in or returned. Rather,
|
||
we’re specifying that the borrow checker should reject any values that don’t
|
||
adhere to these constraints. Note that the <code>longest</code> function doesn’t need to
|
||
know exactly how long <code>x</code> and <code>y</code> will live, only that some scope can be
|
||
substituted for <code>'a</code> that will satisfy this signature.</p>
|
||
<p>When annotating lifetimes in functions, the annotations go in the function
|
||
signature, not in the function body. Rust can analyze the code within the
|
||
function without any help. However, when a function has references to or from
|
||
code outside that function, it becomes almost impossible for Rust to figure out
|
||
the lifetimes of the parameters or return values on its own. The lifetimes
|
||
might be different each time the function is called. This is why we need to
|
||
annotate the lifetimes manually.</p>
|
||
<p>When we pass concrete references to <code>longest</code>, the concrete lifetime that is
|
||
substituted for <code>'a</code> is the part of the scope of <code>x</code> that overlaps with the
|
||
scope of <code>y</code>. In other words, the generic lifetime <code>'a</code> will get the concrete
|
||
lifetime that is equal to the smaller of the lifetimes of <code>x</code> and <code>y</code>. Because
|
||
we’ve annotated the returned reference with the same lifetime parameter <code>'a</code>,
|
||
the returned reference will also be valid for the length of the smaller of the
|
||
lifetimes of <code>x</code> and <code>y</code>.</p>
|
||
<p>Let’s look at how the lifetime annotations restrict the <code>longest</code> function by
|
||
passing in references that have different concrete lifetimes. Listing 10-23 is
|
||
a straightforward example.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn longest<'a>(x: &'a str, y: &'a str) -> &'a str {
|
||
</span><span class="boring"> if x.len() > y.len() {
|
||
</span><span class="boring"> x
|
||
</span><span class="boring"> } else {
|
||
</span><span class="boring"> y
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let string1 = String::from("long string is long");
|
||
|
||
{
|
||
let string2 = String::from("xyz");
|
||
let result = longest(string1.as_str(), string2.as_str());
|
||
println!("The longest string is {}", result);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-23: Using the <code>longest</code> function with
|
||
references to <code>String</code> values that have different concrete lifetimes</span></p>
|
||
<p>In this example, <code>string1</code> is valid until the end of the outer scope, <code>string2</code>
|
||
is valid until the end of the inner scope, and <code>result</code> references something
|
||
that is valid until the end of the inner scope. Run this code, and you’ll see
|
||
that the borrow checker approves of this code; it will compile and print <code>The longest string is long string is long</code>.</p>
|
||
<p>Next, let’s try an example that shows that the lifetime of the reference in
|
||
<code>result</code> must be the smaller lifetime of the two arguments. We’ll move the
|
||
declaration of the <code>result</code> variable outside the inner scope but leave the
|
||
assignment of the value to the <code>result</code> variable inside the scope with
|
||
<code>string2</code>. Then we’ll move the <code>println!</code> that uses <code>result</code> outside the inner
|
||
scope, after the inner scope has ended. The code in Listing 10-24 will not
|
||
compile.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let string1 = String::from("long string is long");
|
||
let result;
|
||
{
|
||
let string2 = String::from("xyz");
|
||
result = longest(string1.as_str(), string2.as_str());
|
||
}
|
||
println!("The longest string is {}", result);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 10-24: Attempting to use <code>result</code> after <code>string2</code>
|
||
has gone out of scope</span></p>
|
||
<p>When we try to compile this code, we’ll get this error:</p>
|
||
<pre><code class="language-text">error[E0597]: `string2` does not live long enough
|
||
--> src/main.rs:15:5
|
||
|
|
||
14 | result = longest(string1.as_str(), string2.as_str());
|
||
| ------- borrow occurs here
|
||
15 | }
|
||
| ^ `string2` dropped here while still borrowed
|
||
16 | println!("The longest string is {}", result);
|
||
17 | }
|
||
| - borrowed value needs to live until here
|
||
</code></pre>
|
||
<p>The error shows that for <code>result</code> to be valid for the <code>println!</code> statement,
|
||
<code>string2</code> would need to be valid until the end of the outer scope. Rust knows
|
||
this because we annotated the lifetimes of the function parameters and return
|
||
values using the same lifetime parameter <code>'a</code>.</p>
|
||
<p>As humans, we can look at this code and see that <code>string1</code> is longer than
|
||
<code>string2</code> and therefore <code>result</code> will contain a reference to <code>string1</code>.
|
||
Because <code>string1</code> has not gone out of scope yet, a reference to <code>string1</code> will
|
||
still be valid for the <code>println!</code> statement. However, the compiler can’t see
|
||
that the reference is valid in this case. We’ve told Rust that the lifetime of
|
||
the reference returned by the <code>longest</code> function is the same as the smaller of
|
||
the lifetimes of the references passed in. Therefore, the borrow checker
|
||
disallows the code in Listing 10-24 as possibly having an invalid reference.</p>
|
||
<p>Try designing more experiments that vary the values and lifetimes of the
|
||
references passed in to the <code>longest</code> function and how the returned reference
|
||
is used. Make hypotheses about whether or not your experiments will pass the
|
||
borrow checker before you compile; then check to see if you’re right!</p>
|
||
<h3><a class="header" href="#thinking-in-terms-of-lifetimes" id="thinking-in-terms-of-lifetimes">Thinking in Terms of Lifetimes</a></h3>
|
||
<p>The way in which you need to specify lifetime parameters depends on what your
|
||
function is doing. For example, if we changed the implementation of the
|
||
<code>longest</code> function to always return the first parameter rather than the longest
|
||
string slice, we wouldn’t need to specify a lifetime on the <code>y</code> parameter. The
|
||
following code will compile:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn longest<'a>(x: &'a str, y: &str) -> &'a str {
|
||
x
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>In this example, we’ve specified a lifetime parameter <code>'a</code> for the parameter
|
||
<code>x</code> and the return type, but not for the parameter <code>y</code>, because the lifetime of
|
||
<code>y</code> does not have any relationship with the lifetime of <code>x</code> or the return value.</p>
|
||
<p>When returning a reference from a function, the lifetime parameter for the
|
||
return type needs to match the lifetime parameter for one of the parameters. If
|
||
the reference returned does <em>not</em> refer to one of the parameters, it must refer
|
||
to a value created within this function, which would be a dangling reference
|
||
because the value will go out of scope at the end of the function. Consider
|
||
this attempted implementation of the <code>longest</code> function that won’t compile:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn longest<'a>(x: &str, y: &str) -> &'a str {
|
||
let result = String::from("really long string");
|
||
result.as_str()
|
||
}
|
||
</code></pre>
|
||
<p>Here, even though we’ve specified a lifetime parameter <code>'a</code> for the return
|
||
type, this implementation will fail to compile because the return value
|
||
lifetime is not related to the lifetime of the parameters at all. Here is the
|
||
error message we get:</p>
|
||
<pre><code class="language-text">error[E0597]: `result` does not live long enough
|
||
--> src/main.rs:3:5
|
||
|
|
||
3 | result.as_str()
|
||
| ^^^^^^ does not live long enough
|
||
4 | }
|
||
| - borrowed value only lives until here
|
||
|
|
||
note: borrowed value must be valid for the lifetime 'a as defined on the
|
||
function body at 1:1...
|
||
--> src/main.rs:1:1
|
||
|
|
||
1 | / fn longest<'a>(x: &str, y: &str) -> &'a str {
|
||
2 | | let result = String::from("really long string");
|
||
3 | | result.as_str()
|
||
4 | | }
|
||
| |_^
|
||
</code></pre>
|
||
<p>The problem is that <code>result</code> goes out of scope and gets cleaned up at the end
|
||
of the <code>longest</code> function. We’re also trying to return a reference to <code>result</code>
|
||
from the function. There is no way we can specify lifetime parameters that
|
||
would change the dangling reference, and Rust won’t let us create a dangling
|
||
reference. In this case, the best fix would be to return an owned data type
|
||
rather than a reference so the calling function is then responsible for
|
||
cleaning up the value.</p>
|
||
<p>Ultimately, lifetime syntax is about connecting the lifetimes of various
|
||
parameters and return values of functions. Once they’re connected, Rust has
|
||
enough information to allow memory-safe operations and disallow operations that
|
||
would create dangling pointers or otherwise violate memory safety.</p>
|
||
<h3><a class="header" href="#lifetime-annotations-in-struct-definitions" id="lifetime-annotations-in-struct-definitions">Lifetime Annotations in Struct Definitions</a></h3>
|
||
<p>So far, we’ve only defined structs to hold owned types. It’s possible for
|
||
structs to hold references, but in that case we would need to add a lifetime
|
||
annotation on every reference in the struct’s definition. Listing 10-25 has a
|
||
struct named <code>ImportantExcerpt</code> that holds a string slice.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct ImportantExcerpt<'a> {
|
||
part: &'a str,
|
||
}
|
||
|
||
fn main() {
|
||
let novel = String::from("Call me Ishmael. Some years ago...");
|
||
let first_sentence = novel.split('.')
|
||
.next()
|
||
.expect("Could not find a '.'");
|
||
let i = ImportantExcerpt { part: first_sentence };
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 10-25: A struct that holds a reference, so its
|
||
definition needs a lifetime annotation</span></p>
|
||
<p>This struct has one field, <code>part</code>, that holds a string slice, which is a
|
||
reference. As with generic data types, we declare the name of the generic
|
||
lifetime parameter inside angle brackets after the name of the struct so we can
|
||
use the lifetime parameter in the body of the struct definition. This
|
||
annotation means an instance of <code>ImportantExcerpt</code> can’t outlive the reference
|
||
it holds in its <code>part</code> field.</p>
|
||
<p>The <code>main</code> function here creates an instance of the <code>ImportantExcerpt</code> struct
|
||
that holds a reference to the first sentence of the <code>String</code> owned by the
|
||
variable <code>novel</code>. The data in <code>novel</code> exists before the <code>ImportantExcerpt</code>
|
||
instance is created. In addition, <code>novel</code> doesn’t go out of scope until after
|
||
the <code>ImportantExcerpt</code> goes out of scope, so the reference in the
|
||
<code>ImportantExcerpt</code> instance is valid.</p>
|
||
<h3><a class="header" href="#lifetime-elision" id="lifetime-elision">Lifetime Elision</a></h3>
|
||
<p>You’ve learned that every reference has a lifetime and that you need to specify
|
||
lifetime parameters for functions or structs that use references. However, in
|
||
Chapter 4 we had a function in Listing 4-9, which is shown again in Listing
|
||
10-26, that compiled without lifetime annotations.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn first_word(s: &str) -> &str {
|
||
let bytes = s.as_bytes();
|
||
|
||
for (i, &item) in bytes.iter().enumerate() {
|
||
if item == b' ' {
|
||
return &s[0..i];
|
||
}
|
||
}
|
||
|
||
&s[..]
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 10-26: A function we defined in Listing 4-9 that
|
||
compiled without lifetime annotations, even though the parameter and return
|
||
type are references</span></p>
|
||
<p>The reason this function compiles without lifetime annotations is historical:
|
||
in early versions (pre-1.0) of Rust, this code wouldn’t have compiled because
|
||
every reference needed an explicit lifetime. At that time, the function
|
||
signature would have been written like this:</p>
|
||
<pre><code class="language-rust ignore">fn first_word<'a>(s: &'a str) -> &'a str {
|
||
</code></pre>
|
||
<p>After writing a lot of Rust code, the Rust team found that Rust programmers
|
||
were entering the same lifetime annotations over and over in particular
|
||
situations. These situations were predictable and followed a few deterministic
|
||
patterns. The developers programmed these patterns into the compiler’s code so
|
||
the borrow checker could infer the lifetimes in these situations and wouldn’t
|
||
need explicit annotations.</p>
|
||
<p>This piece of Rust history is relevant because it’s possible that more
|
||
deterministic patterns will emerge and be added to the compiler. In the future,
|
||
even fewer lifetime annotations might be required.</p>
|
||
<p>The patterns programmed into Rust’s analysis of references are called the
|
||
<em>lifetime elision rules</em>. These aren’t rules for programmers to follow; they’re
|
||
a set of particular cases that the compiler will consider, and if your code
|
||
fits these cases, you don’t need to write the lifetimes explicitly.</p>
|
||
<p>The elision rules don’t provide full inference. If Rust deterministically
|
||
applies the rules but there is still ambiguity as to what lifetimes the
|
||
references have, the compiler won’t guess what the lifetime of the remaining
|
||
references should be. In this case, instead of guessing, the compiler will give
|
||
you an error that you can resolve by adding the lifetime annotations that
|
||
specify how the references relate to each other.</p>
|
||
<p>Lifetimes on function or method parameters are called <em>input lifetimes</em>, and
|
||
lifetimes on return values are called <em>output lifetimes</em>.</p>
|
||
<p>The compiler uses three rules to figure out what lifetimes references have when
|
||
there aren’t explicit annotations. The first rule applies to input lifetimes,
|
||
and the second and third rules apply to output lifetimes. If the compiler gets
|
||
to the end of the three rules and there are still references for which it can’t
|
||
figure out lifetimes, the compiler will stop with an error. These rules apply
|
||
to <code>fn</code> definitions as well as <code>impl</code> blocks.</p>
|
||
<p>The first rule is that each parameter that is a reference gets its own lifetime
|
||
parameter. In other words, a function with one parameter gets one lifetime
|
||
parameter: <code>fn foo<'a>(x: &'a i32)</code>; a function with two parameters gets two
|
||
separate lifetime parameters: <code>fn foo<'a, 'b>(x: &'a i32, y: &'b i32)</code>; and so
|
||
on.</p>
|
||
<p>The second rule is if there is exactly one input lifetime parameter, that
|
||
lifetime is assigned to all output lifetime parameters: <code>fn foo<'a>(x: &'a i32) -> &'a i32</code>.</p>
|
||
<p>The third rule is if there are multiple input lifetime parameters, but one of
|
||
them is <code>&self</code> or <code>&mut self</code> because this is a method, the lifetime of <code>self</code>
|
||
is assigned to all output lifetime parameters. This third rule makes methods
|
||
much nicer to read and write because fewer symbols are necessary.</p>
|
||
<p>Let’s pretend we’re the compiler. We’ll apply these rules to figure out what
|
||
the lifetimes of the references in the signature of the <code>first_word</code> function
|
||
in Listing 10-26 are. The signature starts without any lifetimes associated
|
||
with the references:</p>
|
||
<pre><code class="language-rust ignore">fn first_word(s: &str) -> &str {
|
||
</code></pre>
|
||
<p>Then the compiler applies the first rule, which specifies that each parameter
|
||
gets its own lifetime. We’ll call it <code>'a</code> as usual, so now the signature is
|
||
this:</p>
|
||
<pre><code class="language-rust ignore">fn first_word<'a>(s: &'a str) -> &str {
|
||
</code></pre>
|
||
<p>The second rule applies because there is exactly one input lifetime. The second
|
||
rule specifies that the lifetime of the one input parameter gets assigned to
|
||
the output lifetime, so the signature is now this:</p>
|
||
<pre><code class="language-rust ignore">fn first_word<'a>(s: &'a str) -> &'a str {
|
||
</code></pre>
|
||
<p>Now all the references in this function signature have lifetimes, and the
|
||
compiler can continue its analysis without needing the programmer to annotate
|
||
the lifetimes in this function signature.</p>
|
||
<p>Let’s look at another example, this time using the <code>longest</code> function that had
|
||
no lifetime parameters when we started working with it in Listing 10-21:</p>
|
||
<pre><code class="language-rust ignore">fn longest(x: &str, y: &str) -> &str {
|
||
</code></pre>
|
||
<p>Let’s apply the first rule: each parameter gets its own lifetime. This time we
|
||
have two parameters instead of one, so we have two lifetimes:</p>
|
||
<pre><code class="language-rust ignore">fn longest<'a, 'b>(x: &'a str, y: &'b str) -> &str {
|
||
</code></pre>
|
||
<p>You can see that the second rule doesn’t apply because there is more than one
|
||
input lifetime. The third rule doesn’t apply either, because <code>longest</code> is a
|
||
function rather than a method, so none of the parameters are <code>self</code>. After
|
||
working through all three rules, we still haven’t figured out what the return
|
||
type’s lifetime is. This is why we got an error trying to compile the code in
|
||
Listing 10-21: the compiler worked through the lifetime elision rules but still
|
||
couldn’t figure out all the lifetimes of the references in the signature.</p>
|
||
<p>Because the third rule really only applies in method signatures, we’ll look at
|
||
lifetimes in that context next to see why the third rule means we don’t have to
|
||
annotate lifetimes in method signatures very often.</p>
|
||
<h3><a class="header" href="#lifetime-annotations-in-method-definitions" id="lifetime-annotations-in-method-definitions">Lifetime Annotations in Method Definitions</a></h3>
|
||
<p>When we implement methods on a struct with lifetimes, we use the same syntax as
|
||
that of generic type parameters shown in Listing 10-11. Where we declare and
|
||
use the lifetime parameters depends on whether they’re related to the struct
|
||
fields or the method parameters and return values.</p>
|
||
<p>Lifetime names for struct fields always need to be declared after the <code>impl</code>
|
||
keyword and then used after the struct’s name, because those lifetimes are part
|
||
of the struct’s type.</p>
|
||
<p>In method signatures inside the <code>impl</code> block, references might be tied to the
|
||
lifetime of references in the struct’s fields, or they might be independent. In
|
||
addition, the lifetime elision rules often make it so that lifetime annotations
|
||
aren’t necessary in method signatures. Let’s look at some examples using the
|
||
struct named <code>ImportantExcerpt</code> that we defined in Listing 10-25.</p>
|
||
<p>First, we’ll use a method named <code>level</code> whose only parameter is a reference to
|
||
<code>self</code> and whose return value is an <code>i32</code>, which is not a reference to anything:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct ImportantExcerpt<'a> {
|
||
</span><span class="boring"> part: &'a str,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl<'a> ImportantExcerpt<'a> {
|
||
fn level(&self) -> i32 {
|
||
3
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The lifetime parameter declaration after <code>impl</code> and its use after the type name
|
||
are required, but we’re not required to annotate the lifetime of the reference
|
||
to <code>self</code> because of the first elision rule.</p>
|
||
<p>Here is an example where the third lifetime elision rule applies:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct ImportantExcerpt<'a> {
|
||
</span><span class="boring"> part: &'a str,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl<'a> ImportantExcerpt<'a> {
|
||
fn announce_and_return_part(&self, announcement: &str) -> &str {
|
||
println!("Attention please: {}", announcement);
|
||
self.part
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>There are two input lifetimes, so Rust applies the first lifetime elision rule
|
||
and gives both <code>&self</code> and <code>announcement</code> their own lifetimes. Then, because
|
||
one of the parameters is <code>&self</code>, the return type gets the lifetime of <code>&self</code>,
|
||
and all lifetimes have been accounted for.</p>
|
||
<h3><a class="header" href="#the-static-lifetime" id="the-static-lifetime">The Static Lifetime</a></h3>
|
||
<p>One special lifetime we need to discuss is <code>'static</code>, which means that this
|
||
reference <em>can</em> live for the entire duration of the program. All string
|
||
literals have the <code>'static</code> lifetime, which we can annotate as follows:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s: &'static str = "I have a static lifetime.";
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The text of this string is stored directly in the program’s binary, which
|
||
is always available. Therefore, the lifetime of all string literals is
|
||
<code>'static</code>.</p>
|
||
<p>You might see suggestions to use the <code>'static</code> lifetime in error messages. But
|
||
before specifying <code>'static</code> as the lifetime for a reference, think about
|
||
whether the reference you have actually lives the entire lifetime of your
|
||
program or not. You might consider whether you want it to live that long, even
|
||
if it could. Most of the time, the problem results from attempting to create a
|
||
dangling reference or a mismatch of the available lifetimes. In such cases, the
|
||
solution is fixing those problems, not specifying the <code>'static</code> lifetime.</p>
|
||
<h2><a class="header" href="#generic-type-parameters-trait-bounds-and-lifetimes-together" id="generic-type-parameters-trait-bounds-and-lifetimes-together">Generic Type Parameters, Trait Bounds, and Lifetimes Together</a></h2>
|
||
<p>Let’s briefly look at the syntax of specifying generic type parameters, trait
|
||
bounds, and lifetimes all in one function!</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::fmt::Display;
|
||
|
||
fn longest_with_an_announcement<'a, T>(x: &'a str, y: &'a str, ann: T) -> &'a str
|
||
where T: Display
|
||
{
|
||
println!("Announcement! {}", ann);
|
||
if x.len() > y.len() {
|
||
x
|
||
} else {
|
||
y
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This is the <code>longest</code> function from Listing 10-22 that returns the longer of
|
||
two string slices. But now it has an extra parameter named <code>ann</code> of the generic
|
||
type <code>T</code>, which can be filled in by any type that implements the <code>Display</code>
|
||
trait as specified by the <code>where</code> clause. This extra parameter will be printed
|
||
before the function compares the lengths of the string slices, which is why the
|
||
<code>Display</code> trait bound is necessary. Because lifetimes are a type of generic,
|
||
the declarations of the lifetime parameter <code>'a</code> and the generic type parameter
|
||
<code>T</code> go in the same list inside the angle brackets after the function name.</p>
|
||
<h2><a class="header" href="#summary-9" id="summary-9">Summary</a></h2>
|
||
<p>We covered a lot in this chapter! Now that you know about generic type
|
||
parameters, traits and trait bounds, and generic lifetime parameters, you’re
|
||
ready to write code without repetition that works in many different situations.
|
||
Generic type parameters let you apply the code to different types. Traits and
|
||
trait bounds ensure that even though the types are generic, they’ll have the
|
||
behavior the code needs. You learned how to use lifetime annotations to ensure
|
||
that this flexible code won’t have any dangling references. And all of this
|
||
analysis happens at compile time, which doesn’t affect runtime performance!</p>
|
||
<p>Believe it or not, there is much more to learn on the topics we discussed in
|
||
this chapter: Chapter 17 discusses trait objects, which are another way to use
|
||
traits. Chapter 19 covers more complex scenarios involving lifetime annotations
|
||
as well as some advanced type system features. But next, you’ll learn how to
|
||
write tests in Rust so you can make sure your code is working the way it should.</p>
|
||
<h1><a class="header" href="#writing-automated-tests" id="writing-automated-tests">Writing Automated Tests</a></h1>
|
||
<p>In his 1972 essay “The Humble Programmer,” Edsger W. Dijkstra said that
|
||
“Program testing can be a very effective way to show the presence of bugs, but
|
||
it is hopelessly inadequate for showing their absence.” That doesn’t mean we
|
||
shouldn’t try to test as much as we can!</p>
|
||
<p>Correctness in our programs is the extent to which our code does what we intend
|
||
it to do. Rust is designed with a high degree of concern about the correctness
|
||
of programs, but correctness is complex and not easy to prove. Rust’s type
|
||
system shoulders a huge part of this burden, but the type system cannot catch
|
||
every kind of incorrectness. As such, Rust includes support for writing
|
||
automated software tests within the language.</p>
|
||
<p>As an example, say we write a function called <code>add_two</code> that adds 2 to whatever
|
||
number is passed to it. This function’s signature accepts an integer as a
|
||
parameter and returns an integer as a result. When we implement and compile
|
||
that function, Rust does all the type checking and borrow checking that you’ve
|
||
learned so far to ensure that, for instance, we aren’t passing a <code>String</code> value
|
||
or an invalid reference to this function. But Rust <em>can’t</em> check that this
|
||
function will do precisely what we intend, which is return the parameter plus 2
|
||
rather than, say, the parameter plus 10 or the parameter minus 50! That’s where
|
||
tests come in.</p>
|
||
<p>We can write tests that assert, for example, that when we pass <code>3</code> to the
|
||
<code>add_two</code> function, the returned value is <code>5</code>. We can run these tests whenever
|
||
we make changes to our code to make sure any existing correct behavior has not
|
||
changed.</p>
|
||
<p>Testing is a complex skill: although we can’t cover every detail about how to
|
||
write good tests in one chapter, we’ll discuss the mechanics of Rust’s testing
|
||
facilities. We’ll talk about the annotations and macros available to you when
|
||
writing your tests, the default behavior and options provided for running your
|
||
tests, and how to organize tests into unit tests and integration tests.</p>
|
||
<h2><a class="header" href="#how-to-write-tests" id="how-to-write-tests">How to Write Tests</a></h2>
|
||
<p>Tests are Rust functions that verify that the non-test code is functioning in
|
||
the expected manner. The bodies of test functions typically perform these three
|
||
actions:</p>
|
||
<ol>
|
||
<li>Set up any needed data or state.</li>
|
||
<li>Run the code you want to test.</li>
|
||
<li>Assert the results are what you expect.</li>
|
||
</ol>
|
||
<p>Let’s look at the features Rust provides specifically for writing tests that
|
||
take these actions, which include the <code>test</code> attribute, a few macros, and the
|
||
<code>should_panic</code> attribute.</p>
|
||
<h3><a class="header" href="#the-anatomy-of-a-test-function" id="the-anatomy-of-a-test-function">The Anatomy of a Test Function</a></h3>
|
||
<p>At its simplest, a test in Rust is a function that’s annotated with the <code>test</code>
|
||
attribute. Attributes are metadata about pieces of Rust code; one example is
|
||
the <code>derive</code> attribute we used with structs in Chapter 5. To change a function
|
||
into a test function, add <code>#[test]</code> on the line before <code>fn</code>. When you run your
|
||
tests with the <code>cargo test</code> command, Rust builds a test runner binary that runs
|
||
the functions annotated with the <code>test</code> attribute and reports on whether each
|
||
test function passes or fails.</p>
|
||
<p>When we make a new library project with Cargo, a test module with a test
|
||
function in it is automatically generated for us. This module helps you start
|
||
writing your tests so you don’t have to look up the exact structure and syntax
|
||
of test functions every time you start a new project. You can add as many
|
||
additional test functions and as many test modules as you want!</p>
|
||
<p>We’ll explore some aspects of how tests work by experimenting with the template
|
||
test generated for us without actually testing any code. Then we’ll write some
|
||
real-world tests that call some code that we’ve written and assert that its
|
||
behavior is correct.</p>
|
||
<p>Let’s create a new library project called <code>adder</code>:</p>
|
||
<pre><code class="language-text">$ cargo new adder --lib
|
||
Created library `adder` project
|
||
$ cd adder
|
||
</code></pre>
|
||
<p>The contents of the <em>src/lib.rs</em> file in your <code>adder</code> library should look like
|
||
Listing 11-1.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
#[test]
|
||
fn it_works() {
|
||
assert_eq!(2 + 2, 4);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-1: The test module and function generated
|
||
automatically by <code>cargo new</code></span></p>
|
||
<p>For now, let’s ignore the top two lines and focus on the function to see how it
|
||
works. Note the <code>#[test]</code> annotation before the <code>fn</code> line: this attribute
|
||
indicates this is a test function, so the test runner knows to treat this
|
||
function as a test. We could also have non-test functions in the <code>tests</code> module
|
||
to help set up common scenarios or perform common operations, so we need to
|
||
indicate which functions are tests by using the <code>#[test]</code> attribute.</p>
|
||
<p>The function body uses the <code>assert_eq!</code> macro to assert that 2 + 2 equals 4.
|
||
This assertion serves as an example of the format for a typical test. Let’s run
|
||
it to see that this test passes.</p>
|
||
<p>The <code>cargo test</code> command runs all tests in our project, as shown in Listing
|
||
11-2.</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
Compiling adder v0.1.0 (file:///projects/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.22 secs
|
||
Running target/debug/deps/adder-ce99bcc2479f4607
|
||
|
||
running 1 test
|
||
test tests::it_works ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Doc-tests adder
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p><span class="caption">Listing 11-2: The output from running the automatically
|
||
generated test</span></p>
|
||
<p>Cargo compiled and ran the test. After the <code>Compiling</code>, <code>Finished</code>, and
|
||
<code>Running</code> lines is the line <code>running 1 test</code>. The next line shows the name
|
||
of the generated test function, called <code>it_works</code>, and the result of running
|
||
that test, <code>ok</code>. The overall summary of running the tests appears next. The
|
||
text <code>test result: ok.</code> means that all the tests passed, and the portion that
|
||
reads <code>1 passed; 0 failed</code> totals the number of tests that passed or failed.</p>
|
||
<p>Because we don’t have any tests we’ve marked as ignored, the summary shows <code>0 ignored</code>. We also haven’t filtered the tests being run, so the end of the
|
||
summary shows <code>0 filtered out</code>. We’ll talk about ignoring and filtering out
|
||
tests in the next section, <a href="ch11-02-running-tests.html#controlling-how-tests-are-run">“Controlling How Tests Are
|
||
Run.”</a><!-- ignore --></p>
|
||
<p>The <code>0 measured</code> statistic is for benchmark tests that measure performance.
|
||
Benchmark tests are, as of this writing, only available in nightly Rust. See
|
||
<a href="../unstable-book/library-features/test.html">the documentation about benchmark tests</a> to learn more.</p>
|
||
<p>The next part of the test output, which starts with <code>Doc-tests adder</code>, is for
|
||
the results of any documentation tests. We don’t have any documentation tests
|
||
yet, but Rust can compile any code examples that appear in our API
|
||
documentation. This feature helps us keep our docs and our code in sync! We’ll
|
||
discuss how to write documentation tests in the <a href="ch14-02-publishing-to-crates-io.html#documentation-comments-as-tests">“Documentation Comments as
|
||
Tests”</a><!-- ignore --> section of Chapter 14. For now, we’ll
|
||
ignore the <code>Doc-tests</code> output.</p>
|
||
<p>Let’s change the name of our test to see how that changes the test output.
|
||
Change the <code>it_works</code> function to a different name, such as <code>exploration</code>, like
|
||
so:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
#[test]
|
||
fn exploration() {
|
||
assert_eq!(2 + 2, 4);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>Then run <code>cargo test</code> again. The output now shows <code>exploration</code> instead of
|
||
<code>it_works</code>:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::exploration ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Let’s add another test, but this time we’ll make a test that fails! Tests fail
|
||
when something in the test function panics. Each test is run in a new thread,
|
||
and when the main thread sees that a test thread has died, the test is marked
|
||
as failed. We talked about the simplest way to cause a panic in Chapter 9,
|
||
which is to call the <code>panic!</code> macro. Enter the new test, <code>another</code>, so your
|
||
<em>src/lib.rs</em> file looks like Listing 11-3.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust panics"><span class="boring">fn main() {}
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
#[test]
|
||
fn exploration() {
|
||
assert_eq!(2 + 2, 4);
|
||
}
|
||
|
||
#[test]
|
||
fn another() {
|
||
panic!("Make this test fail");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-3: Adding a second test that will fail because
|
||
we call the <code>panic!</code> macro</span></p>
|
||
<p>Run the tests again using <code>cargo test</code>. The output should look like Listing
|
||
11-4, which shows that our <code>exploration</code> test passed and <code>another</code> failed.</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
test tests::exploration ... ok
|
||
test tests::another ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::another stdout ----
|
||
thread 'tests::another' panicked at 'Make this test fail', src/lib.rs:10:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
failures:
|
||
tests::another
|
||
|
||
test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
error: test failed
|
||
</code></pre>
|
||
<p><span class="caption">Listing 11-4: Test results when one test passes and one
|
||
test fails</span></p>
|
||
<p>Instead of <code>ok</code>, the line <code>test tests::another</code> shows <code>FAILED</code>. Two new
|
||
sections appear between the individual results and the summary: the first
|
||
section displays the detailed reason for each test failure. In this case,
|
||
<code>another</code> failed because it <code>panicked at 'Make this test fail'</code>, which happened
|
||
on line 10 in the <em>src/lib.rs</em> file. The next section lists just the names of
|
||
all the failing tests, which is useful when there are lots of tests and lots of
|
||
detailed failing test output. We can use the name of a failing test to run just
|
||
that test to more easily debug it; we’ll talk more about ways to run tests in
|
||
the <a href="ch11-02-running-tests.html#controlling-how-tests-are-run">“Controlling How Tests Are Run”</a><!-- ignore
|
||
--> section.</p>
|
||
<p>The summary line displays at the end: overall, our test result is <code>FAILED</code>.
|
||
We had one test pass and one test fail.</p>
|
||
<p>Now that you’ve seen what the test results look like in different scenarios,
|
||
let’s look at some macros other than <code>panic!</code> that are useful in tests.</p>
|
||
<h3><a class="header" href="#checking-results-with-the-assert-macro" id="checking-results-with-the-assert-macro">Checking Results with the <code>assert!</code> Macro</a></h3>
|
||
<p>The <code>assert!</code> macro, provided by the standard library, is useful when you want
|
||
to ensure that some condition in a test evaluates to <code>true</code>. We give the
|
||
<code>assert!</code> macro an argument that evaluates to a Boolean. If the value is
|
||
<code>true</code>, <code>assert!</code> does nothing and the test passes. If the value is <code>false</code>,
|
||
the <code>assert!</code> macro calls the <code>panic!</code> macro, which causes the test to fail.
|
||
Using the <code>assert!</code> macro helps us check that our code is functioning in the
|
||
way we intend.</p>
|
||
<p>In Chapter 5, Listing 5-15, we used a <code>Rectangle</code> struct and a <code>can_hold</code>
|
||
method, which are repeated here in Listing 11-5. Let’s put this code in the
|
||
<em>src/lib.rs</em> file and write some tests for it using the <code>assert!</code> macro.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>#[derive(Debug)]
|
||
struct Rectangle {
|
||
width: u32,
|
||
height: u32,
|
||
}
|
||
|
||
impl Rectangle {
|
||
fn can_hold(&self, other: &Rectangle) -> bool {
|
||
self.width > other.width && self.height > other.height
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-5: Using the <code>Rectangle</code> struct and its
|
||
<code>can_hold</code> method from Chapter 5</span></p>
|
||
<p>The <code>can_hold</code> method returns a Boolean, which means it’s a perfect use case
|
||
for the <code>assert!</code> macro. In Listing 11-6, we write a test that exercises the
|
||
<code>can_hold</code> method by creating a <code>Rectangle</code> instance that has a width of 8 and
|
||
a height of 7 and asserting that it can hold another <code>Rectangle</code> instance that
|
||
has a width of 5 and a height of 1.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn larger_can_hold_smaller() {
|
||
let larger = Rectangle { width: 8, height: 7 };
|
||
let smaller = Rectangle { width: 5, height: 1 };
|
||
|
||
assert!(larger.can_hold(&smaller));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-6: A test for <code>can_hold</code> that checks whether a
|
||
larger rectangle can indeed hold a smaller rectangle</span></p>
|
||
<p>Note that we’ve added a new line inside the <code>tests</code> module: <code>use super::*;</code>.
|
||
The <code>tests</code> module is a regular module that follows the usual visibility rules
|
||
we covered in Chapter 7 in the <a href="ch07-03-paths-for-referring-to-an-item-in-the-module-tree.html">“Paths for Referring to an Item in the Module
|
||
Tree”</a><!-- ignore -->
|
||
section. Because the <code>tests</code> module is an inner module, we need to bring the
|
||
code under test in the outer module into the scope of the inner module. We use
|
||
a glob here so anything we define in the outer module is available to this
|
||
<code>tests</code> module.</p>
|
||
<p>We’ve named our test <code>larger_can_hold_smaller</code>, and we’ve created the two
|
||
<code>Rectangle</code> instances that we need. Then we called the <code>assert!</code> macro and
|
||
passed it the result of calling <code>larger.can_hold(&smaller)</code>. This expression
|
||
is supposed to return <code>true</code>, so our test should pass. Let’s find out!</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::larger_can_hold_smaller ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>It does pass! Let’s add another test, this time asserting that a smaller
|
||
rectangle cannot hold a larger rectangle:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn larger_can_hold_smaller() {
|
||
// --snip--
|
||
}
|
||
|
||
#[test]
|
||
fn smaller_cannot_hold_larger() {
|
||
let larger = Rectangle { width: 8, height: 7 };
|
||
let smaller = Rectangle { width: 5, height: 1 };
|
||
|
||
assert!(!smaller.can_hold(&larger));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>Because the correct result of the <code>can_hold</code> function in this case is <code>false</code>,
|
||
we need to negate that result before we pass it to the <code>assert!</code> macro. As a
|
||
result, our test will pass if <code>can_hold</code> returns <code>false</code>:</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
test tests::smaller_cannot_hold_larger ... ok
|
||
test tests::larger_can_hold_smaller ... ok
|
||
|
||
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Two tests that pass! Now let’s see what happens to our test results when we
|
||
introduce a bug in our code. Let’s change the implementation of the <code>can_hold</code>
|
||
method by replacing the greater than sign with a less than sign when it
|
||
compares the widths:</p>
|
||
<pre><pre class="playpen"><code class="language-rust not_desired_behavior"><span class="boring">fn main() {}
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Rectangle {
|
||
</span><span class="boring"> width: u32,
|
||
</span><span class="boring"> height: u32,
|
||
</span><span class="boring">}
|
||
</span>// --snip--
|
||
|
||
impl Rectangle {
|
||
fn can_hold(&self, other: &Rectangle) -> bool {
|
||
self.width < other.width && self.height > other.height
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>Running the tests now produces the following:</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
test tests::smaller_cannot_hold_larger ... ok
|
||
test tests::larger_can_hold_smaller ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::larger_can_hold_smaller stdout ----
|
||
thread 'tests::larger_can_hold_smaller' panicked at 'assertion failed:
|
||
larger.can_hold(&smaller)', src/lib.rs:22:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
failures:
|
||
tests::larger_can_hold_smaller
|
||
|
||
test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Our tests caught the bug! Because <code>larger.width</code> is 8 and <code>smaller.width</code> is
|
||
5, the comparison of the widths in <code>can_hold</code> now returns <code>false</code>: 8 is not
|
||
less than 5.</p>
|
||
<h3><a class="header" href="#testing-equality-with-the-assert_eq-and-assert_ne-macros" id="testing-equality-with-the-assert_eq-and-assert_ne-macros">Testing Equality with the <code>assert_eq!</code> and <code>assert_ne!</code> Macros</a></h3>
|
||
<p>A common way to test functionality is to compare the result of the code under
|
||
test to the value you expect the code to return to make sure they’re equal. You
|
||
could do this using the <code>assert!</code> macro and passing it an expression using the
|
||
<code>==</code> operator. However, this is such a common test that the standard library
|
||
provides a pair of macros—<code>assert_eq!</code> and <code>assert_ne!</code>—to perform this test
|
||
more conveniently. These macros compare two arguments for equality or
|
||
inequality, respectively. They’ll also print the two values if the assertion
|
||
fails, which makes it easier to see <em>why</em> the test failed; conversely, the
|
||
<code>assert!</code> macro only indicates that it got a <code>false</code> value for the <code>==</code>
|
||
expression, not the values that lead to the <code>false</code> value.</p>
|
||
<p>In Listing 11-7, we write a function named <code>add_two</code> that adds <code>2</code> to its
|
||
parameter and returns the result. Then we test this function using the
|
||
<code>assert_eq!</code> macro.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>pub fn add_two(a: i32) -> i32 {
|
||
a + 2
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn it_adds_two() {
|
||
assert_eq!(4, add_two(2));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-7: Testing the function <code>add_two</code> using the
|
||
<code>assert_eq!</code> macro</span></p>
|
||
<p>Let’s check that it passes!</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::it_adds_two ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>The first argument we gave to the <code>assert_eq!</code> macro, <code>4</code>, is equal to the
|
||
result of calling <code>add_two(2)</code>. The line for this test is <code>test tests::it_adds_two ... ok</code>, and the <code>ok</code> text indicates that our test passed!</p>
|
||
<p>Let’s introduce a bug into our code to see what it looks like when a test that
|
||
uses <code>assert_eq!</code> fails. Change the implementation of the <code>add_two</code> function to
|
||
instead add <code>3</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust not_desired_behavior"><span class="boring">fn main() {}
|
||
</span>pub fn add_two(a: i32) -> i32 {
|
||
a + 3
|
||
}
|
||
</code></pre></pre>
|
||
<p>Run the tests again:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::it_adds_two ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::it_adds_two stdout ----
|
||
thread 'tests::it_adds_two' panicked at 'assertion failed: `(left == right)`
|
||
left: `4`,
|
||
right: `5`', src/lib.rs:11:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
failures:
|
||
tests::it_adds_two
|
||
|
||
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Our test caught the bug! The <code>it_adds_two</code> test failed, displaying the message
|
||
<code>assertion failed: `(left == right)`</code> and showing that <code>left</code> was <code>4</code> and
|
||
<code>right</code> was <code>5</code>. This message is useful and helps us start debugging: it means
|
||
the <code>left</code> argument to <code>assert_eq!</code> was <code>4</code> but the <code>right</code> argument, where we
|
||
had <code>add_two(2)</code>, was <code>5</code>.</p>
|
||
<p>Note that in some languages and test frameworks, the parameters to the
|
||
functions that assert two values are equal are called <code>expected</code> and <code>actual</code>,
|
||
and the order in which we specify the arguments matters. However, in Rust,
|
||
they’re called <code>left</code> and <code>right</code>, and the order in which we specify the value
|
||
we expect and the value that the code under test produces doesn’t matter. We
|
||
could write the assertion in this test as <code>assert_eq!(add_two(2), 4)</code>, which
|
||
would result in a failure message that displays <code>assertion failed: `(left == right)`</code> and that <code>left</code> was <code>5</code> and <code>right</code> was <code>4</code>.</p>
|
||
<p>The <code>assert_ne!</code> macro will pass if the two values we give it are not equal and
|
||
fail if they’re equal. This macro is most useful for cases when we’re not sure
|
||
what a value <em>will</em> be, but we know what the value definitely <em>won’t</em> be if our
|
||
code is functioning as we intend. For example, if we’re testing a function that
|
||
is guaranteed to change its input in some way, but the way in which the input
|
||
is changed depends on the day of the week that we run our tests, the best thing
|
||
to assert might be that the output of the function is not equal to the input.</p>
|
||
<p>Under the surface, the <code>assert_eq!</code> and <code>assert_ne!</code> macros use the operators
|
||
<code>==</code> and <code>!=</code>, respectively. When the assertions fail, these macros print their
|
||
arguments using debug formatting, which means the values being compared must
|
||
implement the <code>PartialEq</code> and <code>Debug</code> traits. All the primitive types and most
|
||
of the standard library types implement these traits. For structs and enums
|
||
that you define, you’ll need to implement <code>PartialEq</code> to assert that values of
|
||
those types are equal or not equal. You’ll need to implement <code>Debug</code> to print
|
||
the values when the assertion fails. Because both traits are derivable traits,
|
||
as mentioned in Listing 5-12 in Chapter 5, this is usually as straightforward
|
||
as adding the <code>#[derive(PartialEq, Debug)]</code> annotation to your struct or enum
|
||
definition. See Appendix C, <a href="appendix-03-derivable-traits.html">“Derivable Traits,”</a><!-- ignore
|
||
--> for more details about these and other derivable traits.</p>
|
||
<h3><a class="header" href="#adding-custom-failure-messages" id="adding-custom-failure-messages">Adding Custom Failure Messages</a></h3>
|
||
<p>You can also add a custom message to be printed with the failure message as
|
||
optional arguments to the <code>assert!</code>, <code>assert_eq!</code>, and <code>assert_ne!</code> macros. Any
|
||
arguments specified after the one required argument to <code>assert!</code> or the two
|
||
required arguments to <code>assert_eq!</code> and <code>assert_ne!</code> are passed along to the
|
||
<code>format!</code> macro (discussed in Chapter 8 in the <a href="ch08-02-strings.html#concatenation-with-the--operator-or-the-format-macro">“Concatenation with the <code>+</code>
|
||
Operator or the <code>format!</code>
|
||
Macro”</a><!-- ignore -->
|
||
section), so you can pass a format string that contains <code>{}</code> placeholders and
|
||
values to go in those placeholders. Custom messages are useful to document
|
||
what an assertion means; when a test fails, you’ll have a better idea of what
|
||
the problem is with the code.</p>
|
||
<p>For example, let’s say we have a function that greets people by name and we
|
||
want to test that the name we pass into the function appears in the output:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>pub fn greeting(name: &str) -> String {
|
||
format!("Hello {}!", name)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn greeting_contains_name() {
|
||
let result = greeting("Carol");
|
||
assert!(result.contains("Carol"));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>The requirements for this program haven’t been agreed upon yet, and we’re
|
||
pretty sure the <code>Hello</code> text at the beginning of the greeting will change. We
|
||
decided we don’t want to have to update the test when the requirements change,
|
||
so instead of checking for exact equality to the value returned from the
|
||
<code>greeting</code> function, we’ll just assert that the output contains the text of the
|
||
input parameter.</p>
|
||
<p>Let’s introduce a bug into this code by changing <code>greeting</code> to not include
|
||
<code>name</code> to see what this test failure looks like:</p>
|
||
<pre><pre class="playpen"><code class="language-rust not_desired_behavior"><span class="boring">fn main() {}
|
||
</span>pub fn greeting(name: &str) -> String {
|
||
String::from("Hello!")
|
||
}
|
||
</code></pre></pre>
|
||
<p>Running this test produces the following:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::greeting_contains_name ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::greeting_contains_name stdout ----
|
||
thread 'tests::greeting_contains_name' panicked at 'assertion failed:
|
||
result.contains("Carol")', src/lib.rs:12:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
failures:
|
||
tests::greeting_contains_name
|
||
</code></pre>
|
||
<p>This result just indicates that the assertion failed and which line the
|
||
assertion is on. A more useful failure message in this case would print the
|
||
value we got from the <code>greeting</code> function. Let’s change the test function,
|
||
giving it a custom failure message made from a format string with a placeholder
|
||
filled in with the actual value we got from the <code>greeting</code> function:</p>
|
||
<pre><code class="language-rust ignore">#[test]
|
||
fn greeting_contains_name() {
|
||
let result = greeting("Carol");
|
||
assert!(
|
||
result.contains("Carol"),
|
||
"Greeting did not contain name, value was `{}`", result
|
||
);
|
||
}
|
||
</code></pre>
|
||
<p>Now when we run the test, we’ll get a more informative error message:</p>
|
||
<pre><code class="language-text">---- tests::greeting_contains_name stdout ----
|
||
thread 'tests::greeting_contains_name' panicked at 'Greeting did not
|
||
contain name, value was `Hello!`', src/lib.rs:12:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>We can see the value we actually got in the test output, which would help us
|
||
debug what happened instead of what we were expecting to happen.</p>
|
||
<h3><a class="header" href="#checking-for-panics-with-should_panic" id="checking-for-panics-with-should_panic">Checking for Panics with <code>should_panic</code></a></h3>
|
||
<p>In addition to checking that our code returns the correct values we expect,
|
||
it’s also important to check that our code handles error conditions as we
|
||
expect. For example, consider the <code>Guess</code> type that we created in Chapter 9,
|
||
Listing 9-10. Other code that uses <code>Guess</code> depends on the guarantee that <code>Guess</code>
|
||
instances will contain only values between 1 and 100. We can write a test that
|
||
ensures that attempting to create a <code>Guess</code> instance with a value outside that
|
||
range panics.</p>
|
||
<p>We do this by adding another attribute, <code>should_panic</code>, to our test function.
|
||
This attribute makes a test pass if the code inside the function panics; the
|
||
test will fail if the code inside the function doesn’t panic.</p>
|
||
<p>Listing 11-8 shows a test that checks that the error conditions of <code>Guess::new</code>
|
||
happen when we expect them to.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>pub struct Guess {
|
||
value: i32,
|
||
}
|
||
|
||
impl Guess {
|
||
pub fn new(value: i32) -> Guess {
|
||
if value < 1 || value > 100 {
|
||
panic!("Guess value must be between 1 and 100, got {}.", value);
|
||
}
|
||
|
||
Guess {
|
||
value
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
#[should_panic]
|
||
fn greater_than_100() {
|
||
Guess::new(200);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-8: Testing that a condition will cause a
|
||
<code>panic!</code></span></p>
|
||
<p>We place the <code>#[should_panic]</code> attribute after the <code>#[test]</code> attribute and
|
||
before the test function it applies to. Let’s look at the result when this test
|
||
passes:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::greater_than_100 ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Looks good! Now let’s introduce a bug in our code by removing the condition
|
||
that the <code>new</code> function will panic if the value is greater than 100:</p>
|
||
<pre><pre class="playpen"><code class="language-rust not_desired_behavior"><span class="boring">fn main() {}
|
||
</span><span class="boring">pub struct Guess {
|
||
</span><span class="boring"> value: i32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>// --snip--
|
||
|
||
impl Guess {
|
||
pub fn new(value: i32) -> Guess {
|
||
if value < 1 {
|
||
panic!("Guess value must be between 1 and 100, got {}.", value);
|
||
}
|
||
|
||
Guess {
|
||
value
|
||
}
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>When we run the test in Listing 11-8, it will fail:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::greater_than_100 ... FAILED
|
||
|
||
failures:
|
||
|
||
failures:
|
||
tests::greater_than_100
|
||
|
||
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>We don’t get a very helpful message in this case, but when we look at the test
|
||
function, we see that it’s annotated with <code>#[should_panic]</code>. The failure we got
|
||
means that the code in the test function did not cause a panic.</p>
|
||
<p>Tests that use <code>should_panic</code> can be imprecise because they only indicate that
|
||
the code has caused some panic. A <code>should_panic</code> test would pass even if the
|
||
test panics for a different reason from the one we were expecting to happen. To
|
||
make <code>should_panic</code> tests more precise, we can add an optional <code>expected</code>
|
||
parameter to the <code>should_panic</code> attribute. The test harness will make sure that
|
||
the failure message contains the provided text. For example, consider the
|
||
modified code for <code>Guess</code> in Listing 11-9 where the <code>new</code> function panics with
|
||
different messages depending on whether the value is too small or too large.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span><span class="boring">pub struct Guess {
|
||
</span><span class="boring"> value: i32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>// --snip--
|
||
|
||
impl Guess {
|
||
pub fn new(value: i32) -> Guess {
|
||
if value < 1 {
|
||
panic!("Guess value must be greater than or equal to 1, got {}.",
|
||
value);
|
||
} else if value > 100 {
|
||
panic!("Guess value must be less than or equal to 100, got {}.",
|
||
value);
|
||
}
|
||
|
||
Guess {
|
||
value
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
#[should_panic(expected = "Guess value must be less than or equal to 100")]
|
||
fn greater_than_100() {
|
||
Guess::new(200);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-9: Testing that a condition will cause a
|
||
<code>panic!</code> with a particular panic message</span></p>
|
||
<p>This test will pass because the value we put in the <code>should_panic</code> attribute’s
|
||
<code>expected</code> parameter is a substring of the message that the <code>Guess::new</code>
|
||
function panics with. We could have specified the entire panic message that we
|
||
expect, which in this case would be <code>Guess value must be less than or equal to 100, got 200.</code> What you choose to specify in the expected parameter for
|
||
<code>should_panic</code> depends on how much of the panic message is unique or dynamic
|
||
and how precise you want your test to be. In this case, a substring of the
|
||
panic message is enough to ensure that the code in the test function executes
|
||
the <code>else if value > 100</code> case.</p>
|
||
<p>To see what happens when a <code>should_panic</code> test with an <code>expected</code> message
|
||
fails, let’s again introduce a bug into our code by swapping the bodies of the
|
||
<code>if value < 1</code> and the <code>else if value > 100</code> blocks:</p>
|
||
<pre><code class="language-rust ignore not_desired_behavior">if value < 1 {
|
||
panic!("Guess value must be less than or equal to 100, got {}.", value);
|
||
} else if value > 100 {
|
||
panic!("Guess value must be greater than or equal to 1, got {}.", value);
|
||
}
|
||
</code></pre>
|
||
<p>This time when we run the <code>should_panic</code> test, it will fail:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::greater_than_100 ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::greater_than_100 stdout ----
|
||
thread 'tests::greater_than_100' panicked at 'Guess value must be
|
||
greater than or equal to 1, got 200.', src/lib.rs:11:13
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
note: Panic did not include expected string 'Guess value must be less than or
|
||
equal to 100'
|
||
|
||
failures:
|
||
tests::greater_than_100
|
||
|
||
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>The failure message indicates that this test did indeed panic as we expected,
|
||
but the panic message did not include the expected string <code>'Guess value must be less than or equal to 100'</code>. The panic message that we did get in this case was
|
||
<code>Guess value must be greater than or equal to 1, got 200.</code> Now we can start
|
||
figuring out where our bug is!</p>
|
||
<h3><a class="header" href="#using-resultt-e-in-tests" id="using-resultt-e-in-tests">Using <code>Result<T, E></code> in Tests</a></h3>
|
||
<p>So far, we’ve written tests that panic when they fail. We can also write tests
|
||
that use <code>Result<T, E></code>! Here’s the test from Listing 11-1, rewritten to use
|
||
<code>Result<T, E></code> and return an <code>Err</code> instead of panicking:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
#[test]
|
||
fn it_works() -> Result<(), String> {
|
||
if 2 + 2 == 4 {
|
||
Ok(())
|
||
} else {
|
||
Err(String::from("two plus two does not equal four"))
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>it_works</code> function now has a return type, <code>Result<(), String></code>. In the
|
||
body of the function, rather than calling the <code>assert_eq!</code> macro, we return
|
||
<code>Ok(())</code> when the test passes and an <code>Err</code> with a <code>String</code> inside when the test
|
||
fails.</p>
|
||
<p>Writing tests so they return a <code>Result<T, E></code> enables you to use the question
|
||
mark operator in the body of tests, which can be a convenient way to write
|
||
tests that should fail if any operation within them returns an <code>Err</code> variant.</p>
|
||
<p>You can’t use the <code>#[should_panic]</code> annotation on tests that use <code>Result<T, E></code>. Instead, you should return an <code>Err</code> value directly when the test should
|
||
fail.</p>
|
||
<p>Now that you know several ways to write tests, let’s look at what is happening
|
||
when we run our tests and explore the different options we can use with <code>cargo test</code>.</p>
|
||
<h2><a class="header" href="#controlling-how-tests-are-run" id="controlling-how-tests-are-run">Controlling How Tests Are Run</a></h2>
|
||
<p>Just as <code>cargo run</code> compiles your code and then runs the resulting binary,
|
||
<code>cargo test</code> compiles your code in test mode and runs the resulting test
|
||
binary. You can specify command line options to change the default behavior of
|
||
<code>cargo test</code>. For example, the default behavior of the binary produced by
|
||
<code>cargo test</code> is to run all the tests in parallel and capture output generated
|
||
during test runs, preventing the output from being displayed and making it
|
||
easier to read the output related to the test results.</p>
|
||
<p>Some command line options go to <code>cargo test</code>, and some go to the resulting test
|
||
binary. To separate these two types of arguments, you list the arguments that
|
||
go to <code>cargo test</code> followed by the separator <code>--</code> and then the ones that go to
|
||
the test binary. Running <code>cargo test --help</code> displays the options you can use
|
||
with <code>cargo test</code>, and running <code>cargo test -- --help</code> displays the options you
|
||
can use after the separator <code>--</code>.</p>
|
||
<h3><a class="header" href="#running-tests-in-parallel-or-consecutively" id="running-tests-in-parallel-or-consecutively">Running Tests in Parallel or Consecutively</a></h3>
|
||
<p>When you run multiple tests, by default they run in parallel using threads.
|
||
This means the tests will finish running faster so you can get feedback quicker
|
||
on whether or not your code is working. Because the tests are running at the
|
||
same time, make sure your tests don’t depend on each other or on any shared
|
||
state, including a shared environment, such as the current working directory or
|
||
environment variables.</p>
|
||
<p>For example, say each of your tests runs some code that creates a file on disk
|
||
named <em>test-output.txt</em> and writes some data to that file. Then each test reads
|
||
the data in that file and asserts that the file contains a particular value,
|
||
which is different in each test. Because the tests run at the same time, one
|
||
test might overwrite the file between when another test writes and reads the
|
||
file. The second test will then fail, not because the code is incorrect but
|
||
because the tests have interfered with each other while running in parallel.
|
||
One solution is to make sure each test writes to a different file; another
|
||
solution is to run the tests one at a time.</p>
|
||
<p>If you don’t want to run the tests in parallel or if you want more fine-grained
|
||
control over the number of threads used, you can send the <code>--test-threads</code> flag
|
||
and the number of threads you want to use to the test binary. Take a look at
|
||
the following example:</p>
|
||
<pre><code class="language-text">$ cargo test -- --test-threads=1
|
||
</code></pre>
|
||
<p>We set the number of test threads to <code>1</code>, telling the program not to use any
|
||
parallelism. Running the tests using one thread will take longer than running
|
||
them in parallel, but the tests won’t interfere with each other if they share
|
||
state.</p>
|
||
<h3><a class="header" href="#showing-function-output" id="showing-function-output">Showing Function Output</a></h3>
|
||
<p>By default, if a test passes, Rust’s test library captures anything printed to
|
||
standard output. For example, if we call <code>println!</code> in a test and the test
|
||
passes, we won’t see the <code>println!</code> output in the terminal; we’ll see only the
|
||
line that indicates the test passed. If a test fails, we’ll see whatever was
|
||
printed to standard output with the rest of the failure message.</p>
|
||
<p>As an example, Listing 11-10 has a silly function that prints the value of its
|
||
parameter and returns 10, as well as a test that passes and a test that fails.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust panics">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn prints_and_returns_10(a: i32) -> i32 {
|
||
println!("I got the value {}", a);
|
||
10
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn this_test_will_pass() {
|
||
let value = prints_and_returns_10(4);
|
||
assert_eq!(10, value);
|
||
}
|
||
|
||
#[test]
|
||
fn this_test_will_fail() {
|
||
let value = prints_and_returns_10(8);
|
||
assert_eq!(5, value);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 11-10: Tests for a function that calls
|
||
<code>println!</code></span></p>
|
||
<p>When we run these tests with <code>cargo test</code>, we’ll see the following output:</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
test tests::this_test_will_pass ... ok
|
||
test tests::this_test_will_fail ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::this_test_will_fail stdout ----
|
||
I got the value 8
|
||
thread 'tests::this_test_will_fail' panicked at 'assertion failed: `(left == right)`
|
||
left: `5`,
|
||
right: `10`', src/lib.rs:19:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
failures:
|
||
tests::this_test_will_fail
|
||
|
||
test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Note that nowhere in this output do we see <code>I got the value 4</code>, which is what
|
||
is printed when the test that passes runs. That output has been captured. The
|
||
output from the test that failed, <code>I got the value 8</code>, appears in the section
|
||
of the test summary output, which also shows the cause of the test failure.</p>
|
||
<p>If we want to see printed values for passing tests as well, we can disable the
|
||
output capture behavior by using the <code>--nocapture</code> flag:</p>
|
||
<pre><code class="language-text">$ cargo test -- --nocapture
|
||
</code></pre>
|
||
<p>When we run the tests in Listing 11-10 again with the <code>--nocapture</code> flag, we
|
||
see the following output:</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
I got the value 4
|
||
I got the value 8
|
||
test tests::this_test_will_pass ... ok
|
||
thread 'tests::this_test_will_fail' panicked at 'assertion failed: `(left == right)`
|
||
left: `5`,
|
||
right: `10`', src/lib.rs:19:9
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
test tests::this_test_will_fail ... FAILED
|
||
|
||
failures:
|
||
|
||
failures:
|
||
tests::this_test_will_fail
|
||
|
||
test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Note that the output for the tests and the test results are interleaved; the
|
||
reason is that the tests are running in parallel, as we talked about in the
|
||
previous section. Try using the <code>--test-threads=1</code> option and the <code>--nocapture</code>
|
||
flag, and see what the output looks like then!</p>
|
||
<h3><a class="header" href="#running-a-subset-of-tests-by-name" id="running-a-subset-of-tests-by-name">Running a Subset of Tests by Name</a></h3>
|
||
<p>Sometimes, running a full test suite can take a long time. If you’re working on
|
||
code in a particular area, you might want to run only the tests pertaining to
|
||
that code. You can choose which tests to run by passing <code>cargo test</code> the name
|
||
or names of the test(s) you want to run as an argument.</p>
|
||
<p>To demonstrate how to run a subset of tests, we’ll create three tests for our
|
||
<code>add_two</code> function, as shown in Listing 11-11, and choose which ones to run.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn add_two(a: i32) -> i32 {
|
||
a + 2
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn add_two_and_two() {
|
||
assert_eq!(4, add_two(2));
|
||
}
|
||
|
||
#[test]
|
||
fn add_three_and_two() {
|
||
assert_eq!(5, add_two(3));
|
||
}
|
||
|
||
#[test]
|
||
fn one_hundred() {
|
||
assert_eq!(102, add_two(100));
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 11-11: Three tests with three different
|
||
names</span></p>
|
||
<p>If we run the tests without passing any arguments, as we saw earlier, all the
|
||
tests will run in parallel:</p>
|
||
<pre><code class="language-text">running 3 tests
|
||
test tests::add_two_and_two ... ok
|
||
test tests::add_three_and_two ... ok
|
||
test tests::one_hundred ... ok
|
||
|
||
test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<h4><a class="header" href="#running-single-tests" id="running-single-tests">Running Single Tests</a></h4>
|
||
<p>We can pass the name of any test function to <code>cargo test</code> to run only that test:</p>
|
||
<pre><code class="language-text">$ cargo test one_hundred
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running target/debug/deps/adder-06a75b4a1f2515e9
|
||
|
||
running 1 test
|
||
test tests::one_hundred ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 2 filtered out
|
||
</code></pre>
|
||
<p>Only the test with the name <code>one_hundred</code> ran; the other two tests didn’t match
|
||
that name. The test output lets us know we had more tests than what this
|
||
command ran by displaying <code>2 filtered out</code> at the end of the summary line.</p>
|
||
<p>We can’t specify the names of multiple tests in this way; only the first value
|
||
given to <code>cargo test</code> will be used. But there is a way to run multiple tests.</p>
|
||
<h4><a class="header" href="#filtering-to-run-multiple-tests" id="filtering-to-run-multiple-tests">Filtering to Run Multiple Tests</a></h4>
|
||
<p>We can specify part of a test name, and any test whose name matches that value
|
||
will be run. For example, because two of our tests’ names contain <code>add</code>, we can
|
||
run those two by running <code>cargo test add</code>:</p>
|
||
<pre><code class="language-text">$ cargo test add
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running target/debug/deps/adder-06a75b4a1f2515e9
|
||
|
||
running 2 tests
|
||
test tests::add_two_and_two ... ok
|
||
test tests::add_three_and_two ... ok
|
||
|
||
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out
|
||
</code></pre>
|
||
<p>This command ran all tests with <code>add</code> in the name and filtered out the test
|
||
named <code>one_hundred</code>. Also note that the module in which a test appears becomes
|
||
part of the test’s name, so we can run all the tests in a module by filtering
|
||
on the module’s name.</p>
|
||
<h3><a class="header" href="#ignoring-some-tests-unless-specifically-requested" id="ignoring-some-tests-unless-specifically-requested">Ignoring Some Tests Unless Specifically Requested</a></h3>
|
||
<p>Sometimes a few specific tests can be very time-consuming to execute, so you
|
||
might want to exclude them during most runs of <code>cargo test</code>. Rather than
|
||
listing as arguments all tests you do want to run, you can instead annotate the
|
||
time-consuming tests using the <code>ignore</code> attribute to exclude them, as shown
|
||
here:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[test]
|
||
fn it_works() {
|
||
assert_eq!(2 + 2, 4);
|
||
}
|
||
|
||
#[test]
|
||
#[ignore]
|
||
fn expensive_test() {
|
||
// code that takes an hour to run
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>After <code>#[test]</code> we add the <code>#[ignore]</code> line to the test we want to exclude. Now
|
||
when we run our tests, <code>it_works</code> runs, but <code>expensive_test</code> doesn’t:</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
Compiling adder v0.1.0 (file:///projects/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.24 secs
|
||
Running target/debug/deps/adder-ce99bcc2479f4607
|
||
|
||
running 2 tests
|
||
test expensive_test ... ignored
|
||
test it_works ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 1 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>The <code>expensive_test</code> function is listed as <code>ignored</code>. If we want to run only
|
||
the ignored tests, we can use <code>cargo test -- --ignored</code>:</p>
|
||
<pre><code class="language-text">$ cargo test -- --ignored
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running target/debug/deps/adder-ce99bcc2479f4607
|
||
|
||
running 1 test
|
||
test expensive_test ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out
|
||
</code></pre>
|
||
<p>By controlling which tests run, you can make sure your <code>cargo test</code> results
|
||
will be fast. When you’re at a point where it makes sense to check the results
|
||
of the <code>ignored</code> tests and you have time to wait for the results, you can run
|
||
<code>cargo test -- --ignored</code> instead.</p>
|
||
<h2><a class="header" href="#test-organization" id="test-organization">Test Organization</a></h2>
|
||
<p>As mentioned at the start of the chapter, testing is a complex discipline, and
|
||
different people use different terminology and organization. The Rust community
|
||
thinks about tests in terms of two main categories: <em>unit tests</em> and
|
||
<em>integration tests</em>. Unit tests are small and more focused, testing one module
|
||
in isolation at a time, and can test private interfaces. Integration tests are
|
||
entirely external to your library and use your code in the same way any other
|
||
external code would, using only the public interface and potentially exercising
|
||
multiple modules per test.</p>
|
||
<p>Writing both kinds of tests is important to ensure that the pieces of your
|
||
library are doing what you expect them to, separately and together.</p>
|
||
<h3><a class="header" href="#unit-tests" id="unit-tests">Unit Tests</a></h3>
|
||
<p>The purpose of unit tests is to test each unit of code in isolation from the
|
||
rest of the code to quickly pinpoint where code is and isn’t working as
|
||
expected. You’ll put unit tests in the <em>src</em> directory in each file with the
|
||
code that they’re testing. The convention is to create a module named <code>tests</code>
|
||
in each file to contain the test functions and to annotate the module with
|
||
<code>cfg(test)</code>.</p>
|
||
<h4><a class="header" href="#the-tests-module-and-cfgtest" id="the-tests-module-and-cfgtest">The Tests Module and <code>#[cfg(test)]</code></a></h4>
|
||
<p>The <code>#[cfg(test)]</code> annotation on the tests module tells Rust to compile and run
|
||
the test code only when you run <code>cargo test</code>, not when you run <code>cargo build</code>.
|
||
This saves compile time when you only want to build the library and saves space
|
||
in the resulting compiled artifact because the tests are not included. You’ll
|
||
see that because integration tests go in a different directory, they don’t need
|
||
the <code>#[cfg(test)]</code> annotation. However, because unit tests go in the same files
|
||
as the code, you’ll use <code>#[cfg(test)]</code> to specify that they shouldn’t be
|
||
included in the compiled result.</p>
|
||
<p>Recall that when we generated the new <code>adder</code> project in the first section of
|
||
this chapter, Cargo generated this code for us:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
#[test]
|
||
fn it_works() {
|
||
assert_eq!(2 + 2, 4);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code is the automatically generated test module. The attribute <code>cfg</code>
|
||
stands for <em>configuration</em> and tells Rust that the following item should only
|
||
be included given a certain configuration option. In this case, the
|
||
configuration option is <code>test</code>, which is provided by Rust for compiling and
|
||
running tests. By using the <code>cfg</code> attribute, Cargo compiles our test code only
|
||
if we actively run the tests with <code>cargo test</code>. This includes any helper
|
||
functions that might be within this module, in addition to the functions
|
||
annotated with <code>#[test]</code>.</p>
|
||
<h4><a class="header" href="#testing-private-functions" id="testing-private-functions">Testing Private Functions</a></h4>
|
||
<p>There’s debate within the testing community about whether or not private
|
||
functions should be tested directly, and other languages make it difficult or
|
||
impossible to test private functions. Regardless of which testing ideology you
|
||
adhere to, Rust’s privacy rules do allow you to test private functions.
|
||
Consider the code in Listing 11-12 with the private function <code>internal_adder</code>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>
|
||
pub fn add_two(a: i32) -> i32 {
|
||
internal_adder(a, 2)
|
||
}
|
||
|
||
fn internal_adder(a: i32, b: i32) -> i32 {
|
||
a + b
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn internal() {
|
||
assert_eq!(4, internal_adder(2, 2));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 11-12: Testing a private function</span></p>
|
||
<p>Note that the <code>internal_adder</code> function is not marked as <code>pub</code>, but because
|
||
tests are just Rust code and the <code>tests</code> module is just another module, you can
|
||
bring <code>internal_adder</code> into a test’s scope and call it. If you don’t think
|
||
private functions should be tested, there’s nothing in Rust that will compel
|
||
you to do so.</p>
|
||
<h3><a class="header" href="#integration-tests" id="integration-tests">Integration Tests</a></h3>
|
||
<p>In Rust, integration tests are entirely external to your library. They use your
|
||
library in the same way any other code would, which means they can only call
|
||
functions that are part of your library’s public API. Their purpose is to test
|
||
whether many parts of your library work together correctly. Units of code that
|
||
work correctly on their own could have problems when integrated, so test
|
||
coverage of the integrated code is important as well. To create integration
|
||
tests, you first need a <em>tests</em> directory.</p>
|
||
<h4><a class="header" href="#the-tests-directory" id="the-tests-directory">The <em>tests</em> Directory</a></h4>
|
||
<p>We create a <em>tests</em> directory at the top level of our project directory, next
|
||
to <em>src</em>. Cargo knows to look for integration test files in this directory. We
|
||
can then make as many test files as we want to in this directory, and Cargo
|
||
will compile each of the files as an individual crate.</p>
|
||
<p>Let’s create an integration test. With the code in Listing 11-12 still in the
|
||
<em>src/lib.rs</em> file, make a <em>tests</em> directory, create a new file named
|
||
<em>tests/integration_test.rs</em>, and enter the code in Listing 11-13.</p>
|
||
<p><span class="filename">Filename: tests/integration_test.rs</span></p>
|
||
<pre><code class="language-rust ignore">use adder;
|
||
|
||
#[test]
|
||
fn it_adds_two() {
|
||
assert_eq!(4, adder::add_two(2));
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 11-13: An integration test of a function in the
|
||
<code>adder</code> crate</span></p>
|
||
<p>We’ve added <code>use adder</code> at the top of the code, which we didn’t need in the
|
||
unit tests. The reason is that each file in the <code>tests</code> directory is a separate
|
||
crate, so we need to bring our library into each test crate’s scope.</p>
|
||
<p>We don’t need to annotate any code in <em>tests/integration_test.rs</em> with
|
||
<code>#[cfg(test)]</code>. Cargo treats the <code>tests</code> directory specially and compiles files
|
||
in this directory only when we run <code>cargo test</code>. Run <code>cargo test</code> now:</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
Compiling adder v0.1.0 (file:///projects/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.31 secs
|
||
Running target/debug/deps/adder-abcabcabc
|
||
|
||
running 1 test
|
||
test tests::internal ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Running target/debug/deps/integration_test-ce99bcc2479f4607
|
||
|
||
running 1 test
|
||
test it_adds_two ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Doc-tests adder
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>The three sections of output include the unit tests, the integration test, and
|
||
the doc tests. The first section for the unit tests is the same as we’ve been
|
||
seeing: one line for each unit test (one named <code>internal</code> that we added in
|
||
Listing 11-12) and then a summary line for the unit tests.</p>
|
||
<p>The integration tests section starts with the line <code>Running target/debug/deps/integration_test-ce99bcc2479f4607</code> (the hash at the end of
|
||
your output will be different). Next, there is a line for each test function in
|
||
that integration test and a summary line for the results of the integration
|
||
test just before the <code>Doc-tests adder</code> section starts.</p>
|
||
<p>Similarly to how adding more unit test functions adds more result lines to the
|
||
unit tests section, adding more test functions to the integration test file
|
||
adds more result lines to this integration test file’s section. Each
|
||
integration test file has its own section, so if we add more files in the
|
||
<em>tests</em> directory, there will be more integration test sections.</p>
|
||
<p>We can still run a particular integration test function by specifying the test
|
||
function’s name as an argument to <code>cargo test</code>. To run all the tests in a
|
||
particular integration test file, use the <code>--test</code> argument of <code>cargo test</code>
|
||
followed by the name of the file:</p>
|
||
<pre><code class="language-text">$ cargo test --test integration_test
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running target/debug/integration_test-952a27e0126bb565
|
||
|
||
running 1 test
|
||
test it_adds_two ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>This command runs only the tests in the <em>tests/integration_test.rs</em> file.</p>
|
||
<h4><a class="header" href="#submodules-in-integration-tests" id="submodules-in-integration-tests">Submodules in Integration Tests</a></h4>
|
||
<p>As you add more integration tests, you might want to make more than one file in
|
||
the <em>tests</em> directory to help organize them; for example, you can group the
|
||
test functions by the functionality they’re testing. As mentioned earlier, each
|
||
file in the <em>tests</em> directory is compiled as its own separate crate.</p>
|
||
<p>Treating each integration test file as its own crate is useful to create
|
||
separate scopes that are more like the way end users will be using your crate.
|
||
However, this means files in the <em>tests</em> directory don’t share the same
|
||
behavior as files in <em>src</em> do, as you learned in Chapter 7 regarding how to
|
||
separate code into modules and files.</p>
|
||
<p>The different behavior of files in the <em>tests</em> directory is most noticeable
|
||
when you have a set of helper functions that would be useful in multiple
|
||
integration test files and you try to follow the steps in the <a href="ch07-05-separating-modules-into-different-files.html">“Separating
|
||
Modules into Different Files”</a><!-- ignore -->
|
||
section of Chapter 7 to extract them into a common module. For example, if we
|
||
create <em>tests/common.rs</em> and place a function named <code>setup</code> in it, we can add
|
||
some code to <code>setup</code> that we want to call from multiple test functions in
|
||
multiple test files:</p>
|
||
<p><span class="filename">Filename: tests/common.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn setup() {
|
||
// setup code specific to your library's tests would go here
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>When we run the tests again, we’ll see a new section in the test output for the
|
||
<em>common.rs</em> file, even though this file doesn’t contain any test functions nor
|
||
did we call the <code>setup</code> function from anywhere:</p>
|
||
<pre><code class="language-text">running 1 test
|
||
test tests::internal ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Running target/debug/deps/common-b8b07b6f1be2db70
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Running target/debug/deps/integration_test-d993c68b431d39df
|
||
|
||
running 1 test
|
||
test it_adds_two ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Doc-tests adder
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Having <code>common</code> appear in the test results with <code>running 0 tests</code> displayed for
|
||
it is not what we wanted. We just wanted to share some code with the other
|
||
integration test files.</p>
|
||
<p>To avoid having <code>common</code> appear in the test output, instead of creating
|
||
<em>tests/common.rs</em>, we’ll create <em>tests/common/mod.rs</em>. This is an alternate
|
||
naming convention that Rust also understands. Naming the file this way tells
|
||
Rust not to treat the <code>common</code> module as an integration test file. When we move
|
||
the <code>setup</code> function code into <em>tests/common/mod.rs</em> and delete the
|
||
<em>tests/common.rs</em> file, the section in the test output will no longer appear.
|
||
Files in subdirectories of the <em>tests</em> directory don’t get compiled as separate
|
||
crates or have sections in the test output.</p>
|
||
<p>After we’ve created <em>tests/common/mod.rs</em>, we can use it from any of the
|
||
integration test files as a module. Here’s an example of calling the <code>setup</code>
|
||
function from the <code>it_adds_two</code> test in <em>tests/integration_test.rs</em>:</p>
|
||
<p><span class="filename">Filename: tests/integration_test.rs</span></p>
|
||
<pre><code class="language-rust ignore">use adder;
|
||
|
||
mod common;
|
||
|
||
#[test]
|
||
fn it_adds_two() {
|
||
common::setup();
|
||
assert_eq!(4, adder::add_two(2));
|
||
}
|
||
</code></pre>
|
||
<p>Note that the <code>mod common;</code> declaration is the same as the module declaration
|
||
we demonstrated in Listing 7-21. Then in the test function, we can call the
|
||
<code>common::setup()</code> function.</p>
|
||
<h4><a class="header" href="#integration-tests-for-binary-crates" id="integration-tests-for-binary-crates">Integration Tests for Binary Crates</a></h4>
|
||
<p>If our project is a binary crate that only contains a <em>src/main.rs</em> file and
|
||
doesn’t have a <em>src/lib.rs</em> file, we can’t create integration tests in the
|
||
<em>tests</em> directory and bring functions defined in the <em>src/main.rs</em> file into
|
||
scope with a <code>use</code> statement. Only library crates expose functions that other
|
||
crates can use; binary crates are meant to be run on their own.</p>
|
||
<p>This is one of the reasons Rust projects that provide a binary have a
|
||
straightforward <em>src/main.rs</em> file that calls logic that lives in the
|
||
<em>src/lib.rs</em> file. Using that structure, integration tests <em>can</em> test the
|
||
library crate with <code>use</code> to make the important functionality available.
|
||
If the important functionality works, the small amount of code in the
|
||
<em>src/main.rs</em> file will work as well, and that small amount of code doesn’t
|
||
need to be tested.</p>
|
||
<h2><a class="header" href="#summary-10" id="summary-10">Summary</a></h2>
|
||
<p>Rust’s testing features provide a way to specify how code should function to
|
||
ensure it continues to work as you expect, even as you make changes. Unit tests
|
||
exercise different parts of a library separately and can test private
|
||
implementation details. Integration tests check that many parts of the library
|
||
work together correctly, and they use the library’s public API to test the code
|
||
in the same way external code will use it. Even though Rust’s type system and
|
||
ownership rules help prevent some kinds of bugs, tests are still important to
|
||
reduce logic bugs having to do with how your code is expected to behave.</p>
|
||
<p>Let’s combine the knowledge you learned in this chapter and in previous
|
||
chapters to work on a project!</p>
|
||
<h1><a class="header" href="#an-io-project-building-a-command-line-program" id="an-io-project-building-a-command-line-program">An I/O Project: Building a Command Line Program</a></h1>
|
||
<p>This chapter is a recap of the many skills you’ve learned so far and an
|
||
exploration of a few more standard library features. We’ll build a command line
|
||
tool that interacts with file and command line input/output to practice some of
|
||
the Rust concepts you now have under your belt.</p>
|
||
<p>Rust’s speed, safety, single binary output, and cross-platform support make it
|
||
an ideal language for creating command line tools, so for our project, we’ll
|
||
make our own version of the classic command line tool <code>grep</code> (<strong>g</strong>lobally
|
||
search a <strong>r</strong>egular <strong>e</strong>xpression and <strong>p</strong>rint). In the simplest use case,
|
||
<code>grep</code> searches a specified file for a specified string. To do so, <code>grep</code> takes
|
||
as its arguments a filename and a string. Then it reads the file, finds lines
|
||
in that file that contain the string argument, and prints those lines.</p>
|
||
<p>Along the way, we’ll show how to make our command line tool use features of the
|
||
terminal that many command line tools use. We’ll read the value of an
|
||
environment variable to allow the user to configure the behavior of our tool.
|
||
We’ll also print error messages to the standard error console stream (<code>stderr</code>)
|
||
instead of standard output (<code>stdout</code>), so, for example, the user can redirect
|
||
successful output to a file while still seeing error messages onscreen.</p>
|
||
<p>One Rust community member, Andrew Gallant, has already created a fully
|
||
featured, very fast version of <code>grep</code>, called <code>ripgrep</code>. By comparison, our
|
||
version of <code>grep</code> will be fairly simple, but this chapter will give you some of
|
||
the background knowledge you need to understand a real-world project such as
|
||
<code>ripgrep</code>.</p>
|
||
<p>Our <code>grep</code> project will combine a number of concepts you’ve learned so far:</p>
|
||
<ul>
|
||
<li>Organizing code (using what you learned about modules in <a href="ch07-00-managing-growing-projects-with-packages-crates-and-modules.html">Chapter 7</a><!--
|
||
ignore -->)</li>
|
||
<li>Using vectors and strings (collections, <a href="ch08-00-common-collections.html">Chapter 8</a><!-- ignore -->)</li>
|
||
<li>Handling errors (<a href="ch09-00-error-handling.html">Chapter 9</a><!-- ignore -->)</li>
|
||
<li>Using traits and lifetimes where appropriate (<a href="ch10-00-generics.html">Chapter 10</a><!-- ignore
|
||
-->)</li>
|
||
<li>Writing tests (<a href="ch11-00-testing.html">Chapter 11</a><!-- ignore -->)</li>
|
||
</ul>
|
||
<p>We’ll also briefly introduce closures, iterators, and trait objects, which
|
||
Chapters <a href="ch13-00-functional-features.html">13</a><!-- ignore --> and <a href="ch17-00-oop.html">17</a><!-- ignore --> will cover in
|
||
detail.</p>
|
||
<h2><a class="header" href="#accepting-command-line-arguments" id="accepting-command-line-arguments">Accepting Command Line Arguments</a></h2>
|
||
<p>Let’s create a new project with, as always, <code>cargo new</code>. We’ll call our project
|
||
<code>minigrep</code> to distinguish it from the <code>grep</code> tool that you might already have
|
||
on your system.</p>
|
||
<pre><code class="language-text">$ cargo new minigrep
|
||
Created binary (application) `minigrep` project
|
||
$ cd minigrep
|
||
</code></pre>
|
||
<p>The first task is to make <code>minigrep</code> accept its two command line arguments: the
|
||
filename and a string to search for. That is, we want to be able to run our
|
||
program with <code>cargo run</code>, a string to search for, and a path to a file to
|
||
search in, like so:</p>
|
||
<pre><code class="language-text">$ cargo run searchstring example-filename.txt
|
||
</code></pre>
|
||
<p>Right now, the program generated by <code>cargo new</code> cannot process arguments we
|
||
give it. Some existing libraries on <a href="https://crates.io/">crates.io</a> can help
|
||
with writing a program that accepts command line arguments, but because you’re
|
||
just learning this concept, let’s implement this capability ourselves.</p>
|
||
<h3><a class="header" href="#reading-the-argument-values" id="reading-the-argument-values">Reading the Argument Values</a></h3>
|
||
<p>To enable <code>minigrep</code> to read the values of command line arguments we pass to
|
||
it, we’ll need a function provided in Rust’s standard library, which is
|
||
<code>std::env::args</code>. This function returns an iterator of the command line
|
||
arguments that were given to <code>minigrep</code>. We’ll cover iterators fully in
|
||
<a href="ch13-00-functional-features.html">Chapter 13</a><!-- ignore -->. For now, you only need to know two details
|
||
about iterators: iterators produce a series of values, and we can call the
|
||
<code>collect</code> method on an iterator to turn it into a collection, such as a vector,
|
||
containing all the elements the iterator produces.</p>
|
||
<p>Use the code in Listing 12-1 to allow your <code>minigrep</code> program to read any
|
||
command line arguments passed to it and then collect the values into a vector.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::env;
|
||
|
||
fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
println!("{:?}", args);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 12-1: Collecting the command line arguments into
|
||
a vector and printing them</span></p>
|
||
<p>First, we bring the <code>std::env</code> module into scope with a <code>use</code> statement so we
|
||
can use its <code>args</code> function. Notice that the <code>std::env::args</code> function is
|
||
nested in two levels of modules. As we discussed in <a href="ch07-04-bringing-paths-into-scope-with-the-use-keyword.html#creating-idiomatic-use-paths">Chapter
|
||
7</a><!-- ignore -->, in cases where the desired function is
|
||
nested in more than one module, it’s conventional to bring the parent module
|
||
into scope rather than the function. By doing so, we can easily use other
|
||
functions from <code>std::env</code>. It’s also less ambiguous than adding <code>use std::env::args</code> and then calling the function with just <code>args</code>, because <code>args</code>
|
||
might easily be mistaken for a function that’s defined in the current module.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#the-args-function-and-invalid-unicode" id="the-args-function-and-invalid-unicode">The <code>args</code> Function and Invalid Unicode</a></h3>
|
||
<p>Note that <code>std::env::args</code> will panic if any argument contains invalid
|
||
Unicode. If your program needs to accept arguments containing invalid
|
||
Unicode, use <code>std::env::args_os</code> instead. That function returns an iterator
|
||
that produces <code>OsString</code> values instead of <code>String</code> values. We’ve chosen to
|
||
use <code>std::env::args</code> here for simplicity, because <code>OsString</code> values differ
|
||
per platform and are more complex to work with than <code>String</code> values.</p>
|
||
</blockquote>
|
||
<p>On the first line of <code>main</code>, we call <code>env::args</code>, and we immediately use
|
||
<code>collect</code> to turn the iterator into a vector containing all the values produced
|
||
by the iterator. We can use the <code>collect</code> function to create many kinds of
|
||
collections, so we explicitly annotate the type of <code>args</code> to specify that we
|
||
want a vector of strings. Although we very rarely need to annotate types in
|
||
Rust, <code>collect</code> is one function you do often need to annotate because Rust
|
||
isn’t able to infer the kind of collection you want.</p>
|
||
<p>Finally, we print the vector using the debug formatter, <code>:?</code>. Let’s try running
|
||
the code first with no arguments and then with two arguments:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
--snip--
|
||
["target/debug/minigrep"]
|
||
|
||
$ cargo run needle haystack
|
||
--snip--
|
||
["target/debug/minigrep", "needle", "haystack"]
|
||
</code></pre>
|
||
<p>Notice that the first value in the vector is <code>"target/debug/minigrep"</code>, which
|
||
is the name of our binary. This matches the behavior of the arguments list in
|
||
C, letting programs use the name by which they were invoked in their execution.
|
||
It’s often convenient to have access to the program name in case you want to
|
||
print it in messages or change behavior of the program based on what command
|
||
line alias was used to invoke the program. But for the purposes of this
|
||
chapter, we’ll ignore it and save only the two arguments we need.</p>
|
||
<h3><a class="header" href="#saving-the-argument-values-in-variables" id="saving-the-argument-values-in-variables">Saving the Argument Values in Variables</a></h3>
|
||
<p>Printing the value of the vector of arguments illustrated that the program is
|
||
able to access the values specified as command line arguments. Now we need to
|
||
save the values of the two arguments in variables so we can use the values
|
||
throughout the rest of the program. We do that in Listing 12-2.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic">use std::env;
|
||
|
||
fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let query = &args[1];
|
||
let filename = &args[2];
|
||
|
||
println!("Searching for {}", query);
|
||
println!("In file {}", filename);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 12-2: Creating variables to hold the query
|
||
argument and filename argument</span></p>
|
||
<p>As we saw when we printed the vector, the program’s name takes up the first
|
||
value in the vector at <code>args[0]</code>, so we’re starting at index <code>1</code>. The first
|
||
argument <code>minigrep</code> takes is the string we’re searching for, so we put a
|
||
reference to the first argument in the variable <code>query</code>. The second argument
|
||
will be the filename, so we put a reference to the second argument in the
|
||
variable <code>filename</code>.</p>
|
||
<p>We temporarily print the values of these variables to prove that the code is
|
||
working as we intend. Let’s run this program again with the arguments <code>test</code>
|
||
and <code>sample.txt</code>:</p>
|
||
<pre><code class="language-text">$ cargo run test sample.txt
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep test sample.txt`
|
||
Searching for test
|
||
In file sample.txt
|
||
</code></pre>
|
||
<p>Great, the program is working! The values of the arguments we need are being
|
||
saved into the right variables. Later we’ll add some error handling to deal
|
||
with certain potential erroneous situations, such as when the user provides no
|
||
arguments; for now, we’ll ignore that situation and work on adding file-reading
|
||
capabilities instead.</p>
|
||
<h2><a class="header" href="#reading-a-file" id="reading-a-file">Reading a File</a></h2>
|
||
<p>Now we’ll add functionality to read the file that is specified in the
|
||
<code>filename</code> command line argument. First, we need a sample file to test it with:
|
||
the best kind of file to use to make sure <code>minigrep</code> is working is one with a
|
||
small amount of text over multiple lines with some repeated words. Listing 12-3
|
||
has an Emily Dickinson poem that will work well! Create a file called
|
||
<em>poem.txt</em> at the root level of your project, and enter the poem “I’m Nobody!
|
||
Who are you?”</p>
|
||
<p><span class="filename">Filename: poem.txt</span></p>
|
||
<pre><code class="language-text">I'm nobody! Who are you?
|
||
Are you nobody, too?
|
||
Then there's a pair of us - don't tell!
|
||
They'd banish us, you know.
|
||
|
||
How dreary to be somebody!
|
||
How public, like a frog
|
||
To tell your name the livelong day
|
||
To an admiring bog!
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-3: A poem by Emily Dickinson makes a good test
|
||
case</span></p>
|
||
<p>With the text in place, edit <em>src/main.rs</em> and add code to read the file, as
|
||
shown in Listing 12-4.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic">use std::env;
|
||
use std::fs;
|
||
|
||
fn main() {
|
||
<span class="boring"> let args: Vec<String> = env::args().collect();
|
||
</span><span class="boring">
|
||
</span><span class="boring"> let query = &args[1];
|
||
</span><span class="boring"> let filename = &args[2];
|
||
</span><span class="boring">
|
||
</span><span class="boring"> println!("Searching for {}", query);
|
||
</span> // --snip--
|
||
println!("In file {}", filename);
|
||
|
||
let contents = fs::read_to_string(filename)
|
||
.expect("Something went wrong reading the file");
|
||
|
||
println!("With text:\n{}", contents);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 12-4: Reading the contents of the file specified
|
||
by the second argument</span></p>
|
||
<p>First, we add another <code>use</code> statement to bring in a relevant part of the
|
||
standard library: we need <code>std::fs</code> to handle files.</p>
|
||
<p>In <code>main</code>, we’ve added a new statement: <code>fs::read_to_string</code> takes the
|
||
<code>filename</code>, opens that file, and returns a <code>Result<String></code> of the file’s
|
||
contents.</p>
|
||
<p>After that statement, we’ve again added a temporary <code>println!</code> statement that
|
||
prints the value of <code>contents</code> after the file is read, so we can check that the
|
||
program is working so far.</p>
|
||
<p>Let’s run this code with any string as the first command line argument (because
|
||
we haven’t implemented the searching part yet) and the <em>poem.txt</em> file as the
|
||
second argument:</p>
|
||
<pre><code class="language-text">$ cargo run the poem.txt
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep the poem.txt`
|
||
Searching for the
|
||
In file poem.txt
|
||
With text:
|
||
I'm nobody! Who are you?
|
||
Are you nobody, too?
|
||
Then there's a pair of us — don't tell!
|
||
They'd banish us, you know.
|
||
|
||
How dreary to be somebody!
|
||
How public, like a frog
|
||
To tell your name the livelong day
|
||
To an admiring bog!
|
||
</code></pre>
|
||
<p>Great! The code read and then printed the contents of the file. But the code
|
||
has a few flaws. The <code>main</code> function has multiple responsibilities: generally,
|
||
functions are clearer and easier to maintain if each function is responsible
|
||
for only one idea. The other problem is that we’re not handling errors as well
|
||
as we could. The program is still small, so these flaws aren’t a big problem,
|
||
but as the program grows, it will be harder to fix them cleanly. It’s good
|
||
practice to begin refactoring early on when developing a program, because it’s
|
||
much easier to refactor smaller amounts of code. We’ll do that next.</p>
|
||
<h2><a class="header" href="#refactoring-to-improve-modularity-and-error-handling" id="refactoring-to-improve-modularity-and-error-handling">Refactoring to Improve Modularity and Error Handling</a></h2>
|
||
<p>To improve our program, we’ll fix four problems that have to do with the
|
||
program’s structure and how it’s handling potential errors.</p>
|
||
<p>First, our <code>main</code> function now performs two tasks: it parses arguments and
|
||
reads files. For such a small function, this isn’t a major problem. However, if
|
||
we continue to grow our program inside <code>main</code>, the number of separate tasks the
|
||
<code>main</code> function handles will increase. As a function gains responsibilities, it
|
||
becomes more difficult to reason about, harder to test, and harder to change
|
||
without breaking one of its parts. It’s best to separate functionality so each
|
||
function is responsible for one task.</p>
|
||
<p>This issue also ties into the second problem: although <code>query</code> and <code>filename</code>
|
||
are configuration variables to our program, variables like <code>contents</code> are used
|
||
to perform the program’s logic. The longer <code>main</code> becomes, the more variables
|
||
we’ll need to bring into scope; the more variables we have in scope, the harder
|
||
it will be to keep track of the purpose of each. It’s best to group the
|
||
configuration variables into one structure to make their purpose clear.</p>
|
||
<p>The third problem is that we’ve used <code>expect</code> to print an error message when
|
||
reading the file fails, but the error message just prints <code>Something went wrong reading the file</code>. Reading a file can fail in a number of ways: for example,
|
||
the file could be missing, or we might not have permission to open it. Right
|
||
now, regardless of the situation, we’d print the <code>Something went wrong reading the file</code> error message, which wouldn’t give the user any information!</p>
|
||
<p>Fourth, we use <code>expect</code> repeatedly to handle different errors, and if the user
|
||
runs our program without specifying enough arguments, they’ll get an <code>index out of bounds</code> error from Rust that doesn’t clearly explain the problem. It would
|
||
be best if all the error-handling code were in one place so future maintainers
|
||
had only one place to consult in the code if the error-handling logic needed to
|
||
change. Having all the error-handling code in one place will also ensure that
|
||
we’re printing messages that will be meaningful to our end users.</p>
|
||
<p>Let’s address these four problems by refactoring our project.</p>
|
||
<h3><a class="header" href="#separation-of-concerns-for-binary-projects" id="separation-of-concerns-for-binary-projects">Separation of Concerns for Binary Projects</a></h3>
|
||
<p>The organizational problem of allocating responsibility for multiple tasks to
|
||
the <code>main</code> function is common to many binary projects. As a result, the Rust
|
||
community has developed a process to use as a guideline for splitting the
|
||
separate concerns of a binary program when <code>main</code> starts getting large. The
|
||
process has the following steps:</p>
|
||
<ul>
|
||
<li>Split your program into a <em>main.rs</em> and a <em>lib.rs</em> and move your program’s
|
||
logic to <em>lib.rs</em>.</li>
|
||
<li>As long as your command line parsing logic is small, it can remain in
|
||
<em>main.rs</em>.</li>
|
||
<li>When the command line parsing logic starts getting complicated, extract it
|
||
from <em>main.rs</em> and move it to <em>lib.rs</em>.</li>
|
||
</ul>
|
||
<p>The responsibilities that remain in the <code>main</code> function after this process
|
||
should be limited to the following:</p>
|
||
<ul>
|
||
<li>Calling the command line parsing logic with the argument values</li>
|
||
<li>Setting up any other configuration</li>
|
||
<li>Calling a <code>run</code> function in <em>lib.rs</em></li>
|
||
<li>Handling the error if <code>run</code> returns an error</li>
|
||
</ul>
|
||
<p>This pattern is about separating concerns: <em>main.rs</em> handles running the
|
||
program, and <em>lib.rs</em> handles all the logic of the task at hand. Because you
|
||
can’t test the <code>main</code> function directly, this structure lets you test all of
|
||
your program’s logic by moving it into functions in <em>lib.rs</em>. The only code
|
||
that remains in <em>main.rs</em> will be small enough to verify its correctness by
|
||
reading it. Let’s rework our program by following this process.</p>
|
||
<h4><a class="header" href="#extracting-the-argument-parser" id="extracting-the-argument-parser">Extracting the Argument Parser</a></h4>
|
||
<p>We’ll extract the functionality for parsing arguments into a function that
|
||
<code>main</code> will call to prepare for moving the command line parsing logic to
|
||
<em>src/lib.rs</em>. Listing 12-5 shows the new start of <code>main</code> that calls a new
|
||
function <code>parse_config</code>, which we’ll define in <em>src/main.rs</em> for the moment.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let (query, filename) = parse_config(&args);
|
||
|
||
// --snip--
|
||
}
|
||
|
||
fn parse_config(args: &[String]) -> (&str, &str) {
|
||
let query = &args[1];
|
||
let filename = &args[2];
|
||
|
||
(query, filename)
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-5: Extracting a <code>parse_config</code> function from
|
||
<code>main</code></span></p>
|
||
<p>We’re still collecting the command line arguments into a vector, but instead of
|
||
assigning the argument value at index 1 to the variable <code>query</code> and the
|
||
argument value at index 2 to the variable <code>filename</code> within the <code>main</code>
|
||
function, we pass the whole vector to the <code>parse_config</code> function. The
|
||
<code>parse_config</code> function then holds the logic that determines which argument
|
||
goes in which variable and passes the values back to <code>main</code>. We still create
|
||
the <code>query</code> and <code>filename</code> variables in <code>main</code>, but <code>main</code> no longer has the
|
||
responsibility of determining how the command line arguments and variables
|
||
correspond.</p>
|
||
<p>This rework may seem like overkill for our small program, but we’re refactoring
|
||
in small, incremental steps. After making this change, run the program again to
|
||
verify that the argument parsing still works. It’s good to check your progress
|
||
often, to help identify the cause of problems when they occur.</p>
|
||
<h4><a class="header" href="#grouping-configuration-values" id="grouping-configuration-values">Grouping Configuration Values</a></h4>
|
||
<p>We can take another small step to improve the <code>parse_config</code> function further.
|
||
At the moment, we’re returning a tuple, but then we immediately break that
|
||
tuple into individual parts again. This is a sign that perhaps we don’t have
|
||
the right abstraction yet.</p>
|
||
<p>Another indicator that shows there’s room for improvement is the <code>config</code> part
|
||
of <code>parse_config</code>, which implies that the two values we return are related and
|
||
are both part of one configuration value. We’re not currently conveying this
|
||
meaning in the structure of the data other than by grouping the two values into
|
||
a tuple; we could put the two values into one struct and give each of the
|
||
struct fields a meaningful name. Doing so will make it easier for future
|
||
maintainers of this code to understand how the different values relate to each
|
||
other and what their purpose is.</p>
|
||
<blockquote>
|
||
<p>Note: Using primitive values when a complex type would be more appropriate is
|
||
an anti-pattern known as <em>primitive obsession</em>.</p>
|
||
</blockquote>
|
||
<p>Listing 12-6 shows the improvements to the <code>parse_config</code> function.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic"><span class="boring">use std::env;
|
||
</span><span class="boring">use std::fs;
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let config = parse_config(&args);
|
||
|
||
println!("Searching for {}", config.query);
|
||
println!("In file {}", config.filename);
|
||
|
||
let contents = fs::read_to_string(config.filename)
|
||
.expect("Something went wrong reading the file");
|
||
|
||
// --snip--
|
||
}
|
||
|
||
struct Config {
|
||
query: String,
|
||
filename: String,
|
||
}
|
||
|
||
fn parse_config(args: &[String]) -> Config {
|
||
let query = args[1].clone();
|
||
let filename = args[2].clone();
|
||
|
||
Config { query, filename }
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 12-6: Refactoring <code>parse_config</code> to return an
|
||
instance of a <code>Config</code> struct</span></p>
|
||
<p>We’ve added a struct named <code>Config</code> defined to have fields named <code>query</code> and
|
||
<code>filename</code>. The signature of <code>parse_config</code> now indicates that it returns a
|
||
<code>Config</code> value. In the body of <code>parse_config</code>, where we used to return string
|
||
slices that reference <code>String</code> values in <code>args</code>, we now define <code>Config</code> to
|
||
contain owned <code>String</code> values. The <code>args</code> variable in <code>main</code> is the owner of
|
||
the argument values and is only letting the <code>parse_config</code> function borrow
|
||
them, which means we’d violate Rust’s borrowing rules if <code>Config</code> tried to take
|
||
ownership of the values in <code>args</code>.</p>
|
||
<p>We could manage the <code>String</code> data in a number of different ways, but the
|
||
easiest, though somewhat inefficient, route is to call the <code>clone</code> method on
|
||
the values. This will make a full copy of the data for the <code>Config</code> instance to
|
||
own, which takes more time and memory than storing a reference to the string
|
||
data. However, cloning the data also makes our code very straightforward
|
||
because we don’t have to manage the lifetimes of the references; in this
|
||
circumstance, giving up a little performance to gain simplicity is a worthwhile
|
||
trade-off.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#the-trade-offs-of-using-clone" id="the-trade-offs-of-using-clone">The Trade-Offs of Using <code>clone</code></a></h3>
|
||
<p>There’s a tendency among many Rustaceans to avoid using <code>clone</code> to fix
|
||
ownership problems because of its runtime cost. In
|
||
<a href="ch13-00-functional-features.html">Chapter 13</a><!-- ignore -->, you’ll learn how to use more efficient
|
||
methods in this type of situation. But for now, it’s okay to copy a few
|
||
strings to continue making progress because you’ll make these copies only
|
||
once and your filename and query string are very small. It’s better to have
|
||
a working program that’s a bit inefficient than to try to hyperoptimize code
|
||
on your first pass. As you become more experienced with Rust, it’ll be
|
||
easier to start with the most efficient solution, but for now, it’s
|
||
perfectly acceptable to call <code>clone</code>.</p>
|
||
</blockquote>
|
||
<p>We’ve updated <code>main</code> so it places the instance of <code>Config</code> returned by
|
||
<code>parse_config</code> into a variable named <code>config</code>, and we updated the code that
|
||
previously used the separate <code>query</code> and <code>filename</code> variables so it now uses
|
||
the fields on the <code>Config</code> struct instead.</p>
|
||
<p>Now our code more clearly conveys that <code>query</code> and <code>filename</code> are related and
|
||
that their purpose is to configure how the program will work. Any code that
|
||
uses these values knows to find them in the <code>config</code> instance in the fields
|
||
named for their purpose.</p>
|
||
<h4><a class="header" href="#creating-a-constructor-for-config" id="creating-a-constructor-for-config">Creating a Constructor for <code>Config</code></a></h4>
|
||
<p>So far, we’ve extracted the logic responsible for parsing the command line
|
||
arguments from <code>main</code> and placed it in the <code>parse_config</code> function. Doing so
|
||
helped us to see that the <code>query</code> and <code>filename</code> values were related and that
|
||
relationship should be conveyed in our code. We then added a <code>Config</code> struct to
|
||
name the related purpose of <code>query</code> and <code>filename</code> and to be able to return the
|
||
values’ names as struct field names from the <code>parse_config</code> function.</p>
|
||
<p>So now that the purpose of the <code>parse_config</code> function is to create a <code>Config</code>
|
||
instance, we can change <code>parse_config</code> from a plain function to a function
|
||
named <code>new</code> that is associated with the <code>Config</code> struct. Making this change
|
||
will make the code more idiomatic. We can create instances of types in the
|
||
standard library, such as <code>String</code>, by calling <code>String::new</code>. Similarly, by
|
||
changing <code>parse_config</code> into a <code>new</code> function associated with <code>Config</code>, we’ll
|
||
be able to create instances of <code>Config</code> by calling <code>Config::new</code>. Listing 12-7
|
||
shows the changes we need to make.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust should_panic"><span class="boring">use std::env;
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let config = Config::new(&args);
|
||
|
||
// --snip--
|
||
}
|
||
|
||
<span class="boring">struct Config {
|
||
</span><span class="boring"> query: String,
|
||
</span><span class="boring"> filename: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>// --snip--
|
||
|
||
impl Config {
|
||
fn new(args: &[String]) -> Config {
|
||
let query = args[1].clone();
|
||
let filename = args[2].clone();
|
||
|
||
Config { query, filename }
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 12-7: Changing <code>parse_config</code> into
|
||
<code>Config::new</code></span></p>
|
||
<p>We’ve updated <code>main</code> where we were calling <code>parse_config</code> to instead call
|
||
<code>Config::new</code>. We’ve changed the name of <code>parse_config</code> to <code>new</code> and moved it
|
||
within an <code>impl</code> block, which associates the <code>new</code> function with <code>Config</code>. Try
|
||
compiling this code again to make sure it works.</p>
|
||
<h3><a class="header" href="#fixing-the-error-handling" id="fixing-the-error-handling">Fixing the Error Handling</a></h3>
|
||
<p>Now we’ll work on fixing our error handling. Recall that attempting to access
|
||
the values in the <code>args</code> vector at index 1 or index 2 will cause the program to
|
||
panic if the vector contains fewer than three items. Try running the program
|
||
without any arguments; it will look like this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep`
|
||
thread 'main' panicked at 'index out of bounds: the len is 1
|
||
but the index is 1', src/main.rs:25:21
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>The line <code>index out of bounds: the len is 1 but the index is 1</code> is an error
|
||
message intended for programmers. It won’t help our end users understand what
|
||
happened and what they should do instead. Let’s fix that now.</p>
|
||
<h4><a class="header" href="#improving-the-error-message" id="improving-the-error-message">Improving the Error Message</a></h4>
|
||
<p>In Listing 12-8, we add a check in the <code>new</code> function that will verify that the
|
||
slice is long enough before accessing index 1 and 2. If the slice isn’t long
|
||
enough, the program panics and displays a better error message than the <code>index out of bounds</code> message.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">// --snip--
|
||
fn new(args: &[String]) -> Config {
|
||
if args.len() < 3 {
|
||
panic!("not enough arguments");
|
||
}
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-8: Adding a check for the number of
|
||
arguments</span></p>
|
||
<p>This code is similar to <a href="ch09-03-to-panic-or-not-to-panic.html#creating-custom-types-for-validation">the <code>Guess::new</code> function we wrote in Listing
|
||
9-10</a><!-- ignore -->, where we called <code>panic!</code> when the
|
||
<code>value</code> argument was out of the range of valid values. Instead of checking for
|
||
a range of values here, we’re checking that the length of <code>args</code> is at least 3
|
||
and the rest of the function can operate under the assumption that this
|
||
condition has been met. If <code>args</code> has fewer than three items, this condition
|
||
will be true, and we call the <code>panic!</code> macro to end the program immediately.</p>
|
||
<p>With these extra few lines of code in <code>new</code>, let’s run the program without any
|
||
arguments again to see what the error looks like now:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep`
|
||
thread 'main' panicked at 'not enough arguments', src/main.rs:26:13
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>This output is better: we now have a reasonable error message. However, we also
|
||
have extraneous information we don’t want to give to our users. Perhaps using
|
||
the technique we used in Listing 9-10 isn’t the best to use here: a call to
|
||
<code>panic!</code> is more appropriate for a programming problem than a usage problem,
|
||
<a href="ch09-03-to-panic-or-not-to-panic.html#guidelines-for-error-handling">as discussed in Chapter 9</a><!-- ignore -->. Instead, we
|
||
can use the other technique you learned about in Chapter 9—<a href="ch09-02-recoverable-errors-with-result.html">returning a
|
||
<code>Result</code></a><!-- ignore --> that indicates either success or an error.</p>
|
||
<h4><a class="header" href="#returning-a-result-from-new-instead-of-calling-panic" id="returning-a-result-from-new-instead-of-calling-panic">Returning a <code>Result</code> from <code>new</code> Instead of Calling <code>panic!</code></a></h4>
|
||
<p>We can instead return a <code>Result</code> value that will contain a <code>Config</code> instance in
|
||
the successful case and will describe the problem in the error case. When
|
||
<code>Config::new</code> is communicating to <code>main</code>, we can use the <code>Result</code> type to
|
||
signal there was a problem. Then we can change <code>main</code> to convert an <code>Err</code>
|
||
variant into a more practical error for our users without the surrounding text
|
||
about <code>thread 'main'</code> and <code>RUST_BACKTRACE</code> that a call to <code>panic!</code> causes.</p>
|
||
<p>Listing 12-9 shows the changes we need to make to the return value of
|
||
<code>Config::new</code> and the body of the function needed to return a <code>Result</code>. Note
|
||
that this won’t compile until we update <code>main</code> as well, which we’ll do in the
|
||
next listing.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Config {
|
||
fn new(args: &[String]) -> Result<Config, &'static str> {
|
||
if args.len() < 3 {
|
||
return Err("not enough arguments");
|
||
}
|
||
|
||
let query = args[1].clone();
|
||
let filename = args[2].clone();
|
||
|
||
Ok(Config { query, filename })
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-9: Returning a <code>Result</code> from
|
||
<code>Config::new</code></span></p>
|
||
<p>Our <code>new</code> function now returns a <code>Result</code> with a <code>Config</code> instance in the
|
||
success case and a <code>&'static str</code> in the error case. Recall from <a href="ch10-03-lifetime-syntax.html#the-static-lifetime">“The Static
|
||
Lifetime”</a><!-- ignore --> section in Chapter 10 that
|
||
<code>&'static str</code> is the type of string literals, which is our error message type
|
||
for now.</p>
|
||
<p>We’ve made two changes in the body of the <code>new</code> function: instead of calling
|
||
<code>panic!</code> when the user doesn’t pass enough arguments, we now return an <code>Err</code>
|
||
value, and we’ve wrapped the <code>Config</code> return value in an <code>Ok</code>. These changes
|
||
make the function conform to its new type signature.</p>
|
||
<p>Returning an <code>Err</code> value from <code>Config::new</code> allows the <code>main</code> function to
|
||
handle the <code>Result</code> value returned from the <code>new</code> function and exit the process
|
||
more cleanly in the error case.</p>
|
||
<h4><a class="header" href="#calling-confignew-and-handling-errors" id="calling-confignew-and-handling-errors">Calling <code>Config::new</code> and Handling Errors</a></h4>
|
||
<p>To handle the error case and print a user-friendly message, we need to update
|
||
<code>main</code> to handle the <code>Result</code> being returned by <code>Config::new</code>, as shown in
|
||
Listing 12-10. We’ll also take the responsibility of exiting the command line
|
||
tool with a nonzero error code from <code>panic!</code> and implement it by hand. A
|
||
nonzero exit status is a convention to signal to the process that called our
|
||
program that the program exited with an error state.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::process;
|
||
|
||
fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let config = Config::new(&args).unwrap_or_else(|err| {
|
||
println!("Problem parsing arguments: {}", err);
|
||
process::exit(1);
|
||
});
|
||
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-10: Exiting with an error code if creating a
|
||
new <code>Config</code> fails</span></p>
|
||
<p>In this listing, we’ve used a method we haven’t covered before:
|
||
<code>unwrap_or_else</code>, which is defined on <code>Result<T, E></code> by the standard library.
|
||
Using <code>unwrap_or_else</code> allows us to define some custom, non-<code>panic!</code> error
|
||
handling. If the <code>Result</code> is an <code>Ok</code> value, this method’s behavior is similar
|
||
to <code>unwrap</code>: it returns the inner value <code>Ok</code> is wrapping. However, if the value
|
||
is an <code>Err</code> value, this method calls the code in the <em>closure</em>, which is an
|
||
anonymous function we define and pass as an argument to <code>unwrap_or_else</code>. We’ll
|
||
cover closures in more detail in <a href="ch13-00-functional-features.html">Chapter 13</a><!-- ignore -->. For now,
|
||
you just need to know that <code>unwrap_or_else</code> will pass the inner value of the
|
||
<code>Err</code>, which in this case is the static string <code>not enough arguments</code> that we
|
||
added in Listing 12-9, to our closure in the argument <code>err</code> that appears
|
||
between the vertical pipes. The code in the closure can then use the <code>err</code>
|
||
value when it runs.</p>
|
||
<p>We’ve added a new <code>use</code> line to bring <code>process</code> from the standard library into
|
||
scope. The code in the closure that will be run in the error case is only two
|
||
lines: we print the <code>err</code> value and then call <code>process::exit</code>. The
|
||
<code>process::exit</code> function will stop the program immediately and return the
|
||
number that was passed as the exit status code. This is similar to the
|
||
<code>panic!</code>-based handling we used in Listing 12-8, but we no longer get all the
|
||
extra output. Let’s try it:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.48 secs
|
||
Running `target/debug/minigrep`
|
||
Problem parsing arguments: not enough arguments
|
||
</code></pre>
|
||
<p>Great! This output is much friendlier for our users.</p>
|
||
<h3><a class="header" href="#extracting-logic-from-main" id="extracting-logic-from-main">Extracting Logic from <code>main</code></a></h3>
|
||
<p>Now that we’ve finished refactoring the configuration parsing, let’s turn to
|
||
the program’s logic. As we stated in <a href="ch12-03-improving-error-handling-and-modularity.html#separation-of-concerns-for-binary-projects">“Separation of Concerns for Binary
|
||
Projects”</a><!-- ignore -->, we’ll
|
||
extract a function named <code>run</code> that will hold all the logic currently in the
|
||
<code>main</code> function that isn’t involved with setting up configuration or handling
|
||
errors. When we’re done, <code>main</code> will be concise and easy to verify by
|
||
inspection, and we’ll be able to write tests for all the other logic.</p>
|
||
<p>Listing 12-11 shows the extracted <code>run</code> function. For now, we’re just making
|
||
the small, incremental improvement of extracting the function. We’re still
|
||
defining the function in <em>src/main.rs</em>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
// --snip--
|
||
|
||
println!("Searching for {}", config.query);
|
||
println!("In file {}", config.filename);
|
||
|
||
run(config);
|
||
}
|
||
|
||
fn run(config: Config) {
|
||
let contents = fs::read_to_string(config.filename)
|
||
.expect("Something went wrong reading the file");
|
||
|
||
println!("With text:\n{}", contents);
|
||
}
|
||
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-11: Extracting a <code>run</code> function containing the
|
||
rest of the program logic</span></p>
|
||
<p>The <code>run</code> function now contains all the remaining logic from <code>main</code>, starting
|
||
from reading the file. The <code>run</code> function takes the <code>Config</code> instance as an
|
||
argument.</p>
|
||
<h4><a class="header" href="#returning-errors-from-the-run-function" id="returning-errors-from-the-run-function">Returning Errors from the <code>run</code> Function</a></h4>
|
||
<p>With the remaining program logic separated into the <code>run</code> function, we can
|
||
improve the error handling, as we did with <code>Config::new</code> in Listing 12-9.
|
||
Instead of allowing the program to panic by calling <code>expect</code>, the <code>run</code>
|
||
function will return a <code>Result<T, E></code> when something goes wrong. This will let
|
||
us further consolidate into <code>main</code> the logic around handling errors in a
|
||
user-friendly way. Listing 12-12 shows the changes we need to make to the
|
||
signature and body of <code>run</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::error::Error;
|
||
|
||
// --snip--
|
||
|
||
fn run(config: Config) -> Result<(), Box<dyn Error>> {
|
||
let contents = fs::read_to_string(config.filename)?;
|
||
|
||
println!("With text:\n{}", contents);
|
||
|
||
Ok(())
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-12: Changing the <code>run</code> function to return
|
||
<code>Result</code></span></p>
|
||
<p>We’ve made three significant changes here. First, we changed the return type of
|
||
the <code>run</code> function to <code>Result<(), Box<dyn Error>></code>. This function previously
|
||
returned the unit type, <code>()</code>, and we keep that as the value returned in the
|
||
<code>Ok</code> case.</p>
|
||
<p>For the error type, we used the <em>trait object</em> <code>Box<dyn Error></code> (and we’ve
|
||
brought <code>std::error::Error</code> into scope with a <code>use</code> statement at the top).
|
||
We’ll cover trait objects in <a href="ch17-00-oop.html">Chapter 17</a><!-- ignore -->. For now, just
|
||
know that <code>Box<dyn Error></code> means the function will return a type that
|
||
implements the <code>Error</code> trait, but we don’t have to specify what particular type
|
||
the return value will be. This gives us flexibility to return error values that
|
||
may be of different types in different error cases. The <code>dyn</code> keyword is short
|
||
for “dynamic.”</p>
|
||
<p>Second, we’ve removed the call to <code>expect</code> in favor of the <code>?</code> operator, as we
|
||
talked about in <a href="ch09-02-recoverable-errors-with-result.html#a-shortcut-for-propagating-errors-the--operator">Chapter 9</a><!-- ignore -->. Rather than
|
||
<code>panic!</code> on an error, <code>?</code> will return the error value from the current function
|
||
for the caller to handle.</p>
|
||
<p>Third, the <code>run</code> function now returns an <code>Ok</code> value in the success case. We’ve
|
||
declared the <code>run</code> function’s success type as <code>()</code> in the signature, which
|
||
means we need to wrap the unit type value in the <code>Ok</code> value. This <code>Ok(())</code>
|
||
syntax might look a bit strange at first, but using <code>()</code> like this is the
|
||
idiomatic way to indicate that we’re calling <code>run</code> for its side effects only;
|
||
it doesn’t return a value we need.</p>
|
||
<p>When you run this code, it will compile but will display a warning:</p>
|
||
<pre><code class="language-text">warning: unused `std::result::Result` that must be used
|
||
--> src/main.rs:17:5
|
||
|
|
||
17 | run(config);
|
||
| ^^^^^^^^^^^^
|
||
|
|
||
= note: #[warn(unused_must_use)] on by default
|
||
= note: this `Result` may be an `Err` variant, which should be handled
|
||
</code></pre>
|
||
<p>Rust tells us that our code ignored the <code>Result</code> value and the <code>Result</code> value
|
||
might indicate that an error occurred. But we’re not checking to see whether or
|
||
not there was an error, and the compiler reminds us that we probably meant to
|
||
have some error-handling code here! Let’s rectify that problem now.</p>
|
||
<h4><a class="header" href="#handling-errors-returned-from-run-in-main" id="handling-errors-returned-from-run-in-main">Handling Errors Returned from <code>run</code> in <code>main</code></a></h4>
|
||
<p>We’ll check for errors and handle them using a technique similar to one we used
|
||
with <code>Config::new</code> in Listing 12-10, but with a slight difference:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
// --snip--
|
||
|
||
println!("Searching for {}", config.query);
|
||
println!("In file {}", config.filename);
|
||
|
||
if let Err(e) = run(config) {
|
||
println!("Application error: {}", e);
|
||
|
||
process::exit(1);
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>We use <code>if let</code> rather than <code>unwrap_or_else</code> to check whether <code>run</code> returns an
|
||
<code>Err</code> value and call <code>process::exit(1)</code> if it does. The <code>run</code> function doesn’t
|
||
return a value that we want to <code>unwrap</code> in the same way that <code>Config::new</code>
|
||
returns the <code>Config</code> instance. Because <code>run</code> returns <code>()</code> in the success case,
|
||
we only care about detecting an error, so we don’t need <code>unwrap_or_else</code> to
|
||
return the unwrapped value because it would only be <code>()</code>.</p>
|
||
<p>The bodies of the <code>if let</code> and the <code>unwrap_or_else</code> functions are the same in
|
||
both cases: we print the error and exit.</p>
|
||
<h3><a class="header" href="#splitting-code-into-a-library-crate" id="splitting-code-into-a-library-crate">Splitting Code into a Library Crate</a></h3>
|
||
<p>Our <code>minigrep</code> project is looking good so far! Now we’ll split the
|
||
<em>src/main.rs</em> file and put some code into the <em>src/lib.rs</em> file so we can test
|
||
it and have a <em>src/main.rs</em> file with fewer responsibilities.</p>
|
||
<p>Let’s move all the code that isn’t the <code>main</code> function from <em>src/main.rs</em> to
|
||
<em>src/lib.rs</em>:</p>
|
||
<ul>
|
||
<li>The <code>run</code> function definition</li>
|
||
<li>The relevant <code>use</code> statements</li>
|
||
<li>The definition of <code>Config</code></li>
|
||
<li>The <code>Config::new</code> function definition</li>
|
||
</ul>
|
||
<p>The contents of <em>src/lib.rs</em> should have the signatures shown in Listing 12-13
|
||
(we’ve omitted the bodies of the functions for brevity). Note that this won’t
|
||
compile until we modify <em>src/main.rs</em> in Listing 12-14.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::error::Error;
|
||
use std::fs;
|
||
|
||
pub struct Config {
|
||
pub query: String,
|
||
pub filename: String,
|
||
}
|
||
|
||
impl Config {
|
||
pub fn new(args: &[String]) -> Result<Config, &'static str> {
|
||
// --snip--
|
||
}
|
||
}
|
||
|
||
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-13: Moving <code>Config</code> and <code>run</code> into
|
||
<em>src/lib.rs</em></span></p>
|
||
<p>We’ve made liberal use of the <code>pub</code> keyword: on <code>Config</code>, on its fields and its
|
||
<code>new</code> method, and on the <code>run</code> function. We now have a library crate that has a
|
||
public API that we can test!</p>
|
||
<p>Now we need to bring the code we moved to <em>src/lib.rs</em> into the scope of the
|
||
binary crate in <em>src/main.rs</em>, as shown in Listing 12-14.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use std::env;
|
||
use std::process;
|
||
|
||
use minigrep::Config;
|
||
|
||
fn main() {
|
||
// --snip--
|
||
if let Err(e) = minigrep::run(config) {
|
||
// --snip--
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-14: Using the <code>minigrep</code> library crate in
|
||
<em>src/main.rs</em></span></p>
|
||
<p>We add a <code>use minigrep::Config</code> line to bring the <code>Config</code> type from the
|
||
library crate into the binary crate’s scope, and we prefix the <code>run</code> function
|
||
with our crate name. Now all the functionality should be connected and should
|
||
work. Run the program with <code>cargo run</code> and make sure everything works
|
||
correctly.</p>
|
||
<p>Whew! That was a lot of work, but we’ve set ourselves up for success in the
|
||
future. Now it’s much easier to handle errors, and we’ve made the code more
|
||
modular. Almost all of our work will be done in <em>src/lib.rs</em> from here on out.</p>
|
||
<p>Let’s take advantage of this newfound modularity by doing something that would
|
||
have been difficult with the old code but is easy with the new code: we’ll
|
||
write some tests!</p>
|
||
<h2><a class="header" href="#developing-the-librarys-functionality-with-test-driven-development" id="developing-the-librarys-functionality-with-test-driven-development">Developing the Library’s Functionality with Test-Driven Development</a></h2>
|
||
<p>Now that we’ve extracted the logic into <em>src/lib.rs</em> and left the argument
|
||
collecting and error handling in <em>src/main.rs</em>, it’s much easier to write tests
|
||
for the core functionality of our code. We can call functions directly with
|
||
various arguments and check return values without having to call our binary
|
||
from the command line. Feel free to write some tests for the functionality in
|
||
the <code>Config::new</code> and <code>run</code> functions on your own.</p>
|
||
<p>In this section, we’ll add the searching logic to the <code>minigrep</code> program by
|
||
using the Test-driven development (TDD) process. This software development
|
||
technique follows these steps:</p>
|
||
<ol>
|
||
<li>Write a test that fails and run it to make sure it fails for the reason you
|
||
expect.</li>
|
||
<li>Write or modify just enough code to make the new test pass.</li>
|
||
<li>Refactor the code you just added or changed and make sure the tests
|
||
continue to pass.</li>
|
||
<li>Repeat from step 1!</li>
|
||
</ol>
|
||
<p>This process is just one of many ways to write software, but TDD can help drive
|
||
code design as well. Writing the test before you write the code that makes the
|
||
test pass helps to maintain high test coverage throughout the process.</p>
|
||
<p>We’ll test drive the implementation of the functionality that will actually do
|
||
the searching for the query string in the file contents and produce a list of
|
||
lines that match the query. We’ll add this functionality in a function called
|
||
<code>search</code>.</p>
|
||
<h3><a class="header" href="#writing-a-failing-test" id="writing-a-failing-test">Writing a Failing Test</a></h3>
|
||
<p>Because we don’t need them anymore, let’s remove the <code>println!</code> statements from
|
||
<em>src/lib.rs</em> and <em>src/main.rs</em> that we used to check the program’s behavior.
|
||
Then, in <em>src/lib.rs</em>, we’ll add a <code>tests</code> module with a test function, as we
|
||
did in <a href="ch11-01-writing-tests.html#the-anatomy-of-a-test-function">Chapter 11</a><!-- ignore -->. The test function specifies
|
||
the behavior we want the <code>search</code> function to have: it will take a query and
|
||
the text to search for the query in, and it will return only the lines from the
|
||
text that contain the query. Listing 12-15 shows this test, which won’t compile
|
||
yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
</span><span class="boring"> vec![]
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn one_result() {
|
||
let query = "duct";
|
||
let contents = "\
|
||
Rust:
|
||
safe, fast, productive.
|
||
Pick three.";
|
||
|
||
assert_eq!(
|
||
vec!["safe, fast, productive."],
|
||
search(query, contents)
|
||
);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-15: Creating a failing test for the <code>search</code>
|
||
function we wish we had</span></p>
|
||
<p>This test searches for the string <code>"duct"</code>. The text we’re searching is three
|
||
lines, only one of which contains <code>"duct"</code>. We assert that the value returned
|
||
from the <code>search</code> function contains only the line we expect.</p>
|
||
<p>We aren’t able to run this test and watch it fail because the test doesn’t even
|
||
compile: the <code>search</code> function doesn’t exist yet! So now we’ll add just enough
|
||
code to get the test to compile and run by adding a definition of the <code>search</code>
|
||
function that always returns an empty vector, as shown in Listing 12-16. Then
|
||
the test should compile and fail because an empty vector doesn’t match a vector
|
||
containing the line <code>"safe, fast, productive."</code></p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
vec![]
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-16: Defining just enough of the <code>search</code>
|
||
function so our test will compile</span></p>
|
||
<p>Notice that we need an explicit lifetime <code>'a</code> defined in the signature of
|
||
<code>search</code> and used with the <code>contents</code> argument and the return value. Recall in
|
||
<a href="ch10-03-lifetime-syntax.html">Chapter 10</a><!-- ignore --> that the lifetime parameters
|
||
specify which argument lifetime is connected to the lifetime of the return
|
||
value. In this case, we indicate that the returned vector should contain string
|
||
slices that reference slices of the argument <code>contents</code> (rather than the
|
||
argument <code>query</code>).</p>
|
||
<p>In other words, we tell Rust that the data returned by the <code>search</code> function
|
||
will live as long as the data passed into the <code>search</code> function in the
|
||
<code>contents</code> argument. This is important! The data referenced <em>by</em> a slice needs
|
||
to be valid for the reference to be valid; if the compiler assumes we’re making
|
||
string slices of <code>query</code> rather than <code>contents</code>, it will do its safety checking
|
||
incorrectly.</p>
|
||
<p>If we forget the lifetime annotations and try to compile this function, we’ll
|
||
get this error:</p>
|
||
<pre><code class="language-text">error[E0106]: missing lifetime specifier
|
||
--> src/lib.rs:5:51
|
||
|
|
||
5 | pub fn search(query: &str, contents: &str) -> Vec<&str> {
|
||
| ^ expected lifetime
|
||
parameter
|
||
|
|
||
= help: this function's return type contains a borrowed value, but the
|
||
signature does not say whether it is borrowed from `query` or `contents`
|
||
</code></pre>
|
||
<p>Rust can’t possibly know which of the two arguments we need, so we need to tell
|
||
it. Because <code>contents</code> is the argument that contains all of our text and we
|
||
want to return the parts of that text that match, we know <code>contents</code> is the
|
||
argument that should be connected to the return value using the lifetime syntax.</p>
|
||
<p>Other programming languages don’t require you to connect arguments to return
|
||
values in the signature. Although this might seem strange, it will get easier
|
||
over time. You might want to compare this example with the <a href="ch10-03-lifetime-syntax.html#validating-references-with-lifetimes">“Validating
|
||
References with Lifetimes”</a><!-- ignore
|
||
--> section in Chapter 10.</p>
|
||
<p>Now let’s run the test:</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
--warnings--
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.43 secs
|
||
Running target/debug/deps/minigrep-abcabcabc
|
||
|
||
running 1 test
|
||
test tests::one_result ... FAILED
|
||
|
||
failures:
|
||
|
||
---- tests::one_result stdout ----
|
||
thread 'tests::one_result' panicked at 'assertion failed: `(left ==
|
||
right)`
|
||
left: `["safe, fast, productive."]`,
|
||
right: `[]`)', src/lib.rs:48:8
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
|
||
|
||
failures:
|
||
tests::one_result
|
||
|
||
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
error: test failed, to rerun pass '--lib'
|
||
</code></pre>
|
||
<p>Great, the test fails, exactly as we expected. Let’s get the test to pass!</p>
|
||
<h3><a class="header" href="#writing-code-to-pass-the-test" id="writing-code-to-pass-the-test">Writing Code to Pass the Test</a></h3>
|
||
<p>Currently, our test is failing because we always return an empty vector. To fix
|
||
that and implement <code>search</code>, our program needs to follow these steps:</p>
|
||
<ul>
|
||
<li>Iterate through each line of the contents.</li>
|
||
<li>Check whether the line contains our query string.</li>
|
||
<li>If it does, add it to the list of values we’re returning.</li>
|
||
<li>If it doesn’t, do nothing.</li>
|
||
<li>Return the list of results that match.</li>
|
||
</ul>
|
||
<p>Let’s work through each step, starting with iterating through lines.</p>
|
||
<h4><a class="header" href="#iterating-through-lines-with-the-lines-method" id="iterating-through-lines-with-the-lines-method">Iterating Through Lines with the <code>lines</code> Method</a></h4>
|
||
<p>Rust has a helpful method to handle line-by-line iteration of strings,
|
||
conveniently named <code>lines</code>, that works as shown in Listing 12-17. Note this
|
||
won’t compile yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
for line in contents.lines() {
|
||
// do something with line
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-17: Iterating through each line in <code>contents</code>
|
||
</span></p>
|
||
<p>The <code>lines</code> method returns an iterator. We’ll talk about iterators in depth in
|
||
<a href="ch13-02-iterators.html">Chapter 13</a><!-- ignore -->, but recall that you saw this way of using an
|
||
iterator in <a href="ch03-05-control-flow.html#looping-through-a-collection-with-for">Listing 3-5</a><!-- ignore -->, where we used a <code>for</code> loop
|
||
with an iterator to run some code on each item in a collection.</p>
|
||
<h4><a class="header" href="#searching-each-line-for-the-query" id="searching-each-line-for-the-query">Searching Each Line for the Query</a></h4>
|
||
<p>Next, we’ll check whether the current line contains our query string.
|
||
Fortunately, strings have a helpful method named <code>contains</code> that does this for
|
||
us! Add a call to the <code>contains</code> method in the <code>search</code> function, as shown in
|
||
Listing 12-18. Note this still won’t compile yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
for line in contents.lines() {
|
||
if line.contains(query) {
|
||
// do something with line
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-18: Adding functionality to see whether the
|
||
line contains the string in <code>query</code></span></p>
|
||
<h4><a class="header" href="#storing-matching-lines" id="storing-matching-lines">Storing Matching Lines</a></h4>
|
||
<p>We also need a way to store the lines that contain our query string. For that,
|
||
we can make a mutable vector before the <code>for</code> loop and call the <code>push</code> method
|
||
to store a <code>line</code> in the vector. After the <code>for</code> loop, we return the vector, as
|
||
shown in Listing 12-19.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
let mut results = Vec::new();
|
||
|
||
for line in contents.lines() {
|
||
if line.contains(query) {
|
||
results.push(line);
|
||
}
|
||
}
|
||
|
||
results
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-19: Storing the lines that match so we can
|
||
return them</span></p>
|
||
<p>Now the <code>search</code> function should return only the lines that contain <code>query</code>,
|
||
and our test should pass. Let’s run the test:</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
--snip--
|
||
running 1 test
|
||
test tests::one_result ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Our test passed, so we know it works!</p>
|
||
<p>At this point, we could consider opportunities for refactoring the
|
||
implementation of the search function while keeping the tests passing to
|
||
maintain the same functionality. The code in the search function isn’t too bad,
|
||
but it doesn’t take advantage of some useful features of iterators. We’ll
|
||
return to this example in <a href="ch13-02-iterators.html">Chapter 13</a><!-- ignore -->, where we’ll
|
||
explore iterators in detail, and look at how to improve it.</p>
|
||
<h4><a class="header" href="#using-the-search-function-in-the-run-function" id="using-the-search-function-in-the-run-function">Using the <code>search</code> Function in the <code>run</code> Function</a></h4>
|
||
<p>Now that the <code>search</code> function is working and tested, we need to call <code>search</code>
|
||
from our <code>run</code> function. We need to pass the <code>config.query</code> value and the
|
||
<code>contents</code> that <code>run</code> reads from the file to the <code>search</code> function. Then <code>run</code>
|
||
will print each line returned from <code>search</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
|
||
let contents = fs::read_to_string(config.filename)?;
|
||
|
||
for line in search(&config.query, &contents) {
|
||
println!("{}", line);
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
</code></pre>
|
||
<p>We’re still using a <code>for</code> loop to return each line from <code>search</code> and print it.</p>
|
||
<p>Now the entire program should work! Let’s try it out, first with a word that
|
||
should return exactly one line from the Emily Dickinson poem, “frog”:</p>
|
||
<pre><code class="language-text">$ cargo run frog poem.txt
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.38 secs
|
||
Running `target/debug/minigrep frog poem.txt`
|
||
How public, like a frog
|
||
</code></pre>
|
||
<p>Cool! Now let’s try a word that will match multiple lines, like “body”:</p>
|
||
<pre><code class="language-text">$ cargo run body poem.txt
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep body poem.txt`
|
||
I’m nobody! Who are you?
|
||
Are you nobody, too?
|
||
How dreary to be somebody!
|
||
</code></pre>
|
||
<p>And finally, let’s make sure that we don’t get any lines when we search for a
|
||
word that isn’t anywhere in the poem, such as “monomorphization”:</p>
|
||
<pre><code class="language-text">$ cargo run monomorphization poem.txt
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep monomorphization poem.txt`
|
||
</code></pre>
|
||
<p>Excellent! We’ve built our own mini version of a classic tool and learned a lot
|
||
about how to structure applications. We’ve also learned a bit about file input
|
||
and output, lifetimes, testing, and command line parsing.</p>
|
||
<p>To round out this project, we’ll briefly demonstrate how to work with
|
||
environment variables and how to print to standard error, both of which are
|
||
useful when you’re writing command line programs.</p>
|
||
<h2><a class="header" href="#working-with-environment-variables" id="working-with-environment-variables">Working with Environment Variables</a></h2>
|
||
<p>We’ll improve <code>minigrep</code> by adding an extra feature: an option for
|
||
case-insensitive searching that the user can turn on via an environment
|
||
variable. We could make this feature a command line option and require that
|
||
users enter it each time they want it to apply, but instead we’ll use an
|
||
environment variable. Doing so allows our users to set the environment variable
|
||
once and have all their searches be case insensitive in that terminal session.</p>
|
||
<h3><a class="header" href="#writing-a-failing-test-for-the-case-insensitive-search-function" id="writing-a-failing-test-for-the-case-insensitive-search-function">Writing a Failing Test for the Case-Insensitive <code>search</code> Function</a></h3>
|
||
<p>We want to add a new <code>search_case_insensitive</code> function that we’ll call when
|
||
the environment variable is on. We’ll continue to follow the TDD process, so
|
||
the first step is again to write a failing test. We’ll add a new test for the
|
||
new <code>search_case_insensitive</code> function and rename our old test from
|
||
<code>one_result</code> to <code>case_sensitive</code> to clarify the differences between the two
|
||
tests, as shown in Listing 12-20.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn case_sensitive() {
|
||
let query = "duct";
|
||
let contents = "\
|
||
Rust:
|
||
safe, fast, productive.
|
||
Pick three.
|
||
Duct tape.";
|
||
|
||
assert_eq!(
|
||
vec!["safe, fast, productive."],
|
||
search(query, contents)
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn case_insensitive() {
|
||
let query = "rUsT";
|
||
let contents = "\
|
||
Rust:
|
||
safe, fast, productive.
|
||
Pick three.
|
||
Trust me.";
|
||
|
||
assert_eq!(
|
||
vec!["Rust:", "Trust me."],
|
||
search_case_insensitive(query, contents)
|
||
);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-20: Adding a new failing test for the
|
||
case-insensitive function we’re about to add</span></p>
|
||
<p>Note that we’ve edited the old test’s <code>contents</code> too. We’ve added a new line
|
||
with the text <code>"Duct tape."</code> using a capital D that shouldn’t match the query
|
||
<code>"duct"</code> when we’re searching in a case-sensitive manner. Changing the old test
|
||
in this way helps ensure that we don’t accidentally break the case-sensitive
|
||
search functionality that we’ve already implemented. This test should pass now
|
||
and should continue to pass as we work on the case-insensitive search.</p>
|
||
<p>The new test for the case-<em>insensitive</em> search uses <code>"rUsT"</code> as its query. In
|
||
the <code>search_case_insensitive</code> function we’re about to add, the query <code>"rUsT"</code>
|
||
should match the line containing <code>"Rust:"</code> with a capital R and match the line
|
||
<code>"Trust me."</code> even though both have different casing from the query. This is
|
||
our failing test, and it will fail to compile because we haven’t yet defined
|
||
the <code>search_case_insensitive</code> function. Feel free to add a skeleton
|
||
implementation that always returns an empty vector, similar to the way we did
|
||
for the <code>search</code> function in Listing 12-16 to see the test compile and fail.</p>
|
||
<h3><a class="header" href="#implementing-the-search_case_insensitive-function" id="implementing-the-search_case_insensitive-function">Implementing the <code>search_case_insensitive</code> Function</a></h3>
|
||
<p>The <code>search_case_insensitive</code> function, shown in Listing 12-21, will be almost
|
||
the same as the <code>search</code> function. The only difference is that we’ll lowercase
|
||
the <code>query</code> and each <code>line</code> so whatever the case of the input arguments,
|
||
they’ll be the same case when we check whether the line contains the query.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
let query = query.to_lowercase();
|
||
let mut results = Vec::new();
|
||
|
||
for line in contents.lines() {
|
||
if line.to_lowercase().contains(&query) {
|
||
results.push(line);
|
||
}
|
||
}
|
||
|
||
results
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-21: Defining the <code>search_case_insensitive</code>
|
||
function to lowercase the query and the line before comparing them</span></p>
|
||
<p>First, we lowercase the <code>query</code> string and store it in a shadowed variable with
|
||
the same name. Calling <code>to_lowercase</code> on the query is necessary so no matter
|
||
whether the user’s query is <code>"rust"</code>, <code>"RUST"</code>, <code>"Rust"</code>, or <code>"rUsT"</code>, we’ll
|
||
treat the query as if it were <code>"rust"</code> and be insensitive to the case.</p>
|
||
<p>Note that <code>query</code> is now a <code>String</code> rather than a string slice, because calling
|
||
<code>to_lowercase</code> creates new data rather than referencing existing data. Say the
|
||
query is <code>"rUsT"</code>, as an example: that string slice doesn’t contain a lowercase
|
||
<code>u</code> or <code>t</code> for us to use, so we have to allocate a new <code>String</code> containing
|
||
<code>"rust"</code>. When we pass <code>query</code> as an argument to the <code>contains</code> method now, we
|
||
need to add an ampersand because the signature of <code>contains</code> is defined to take
|
||
a string slice.</p>
|
||
<p>Next, we add a call to <code>to_lowercase</code> on each <code>line</code> before we check whether it
|
||
contains <code>query</code> to lowercase all characters. Now that we’ve converted <code>line</code>
|
||
and <code>query</code> to lowercase, we’ll find matches no matter what the case of the
|
||
query is.</p>
|
||
<p>Let’s see if this implementation passes the tests:</p>
|
||
<pre><code class="language-text">running 2 tests
|
||
test tests::case_insensitive ... ok
|
||
test tests::case_sensitive ... ok
|
||
|
||
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Great! They passed. Now, let’s call the new <code>search_case_insensitive</code> function
|
||
from the <code>run</code> function. First, we’ll add a configuration option to the
|
||
<code>Config</code> struct to switch between case-sensitive and case-insensitive search.
|
||
Adding this field will cause compiler errors because we aren’t initializing
|
||
this field anywhere yet:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct Config {
|
||
pub query: String,
|
||
pub filename: String,
|
||
pub case_sensitive: bool,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Note that we added the <code>case_sensitive</code> field that holds a Boolean. Next, we
|
||
need the <code>run</code> function to check the <code>case_sensitive</code> field’s value and use
|
||
that to decide whether to call the <code>search</code> function or the
|
||
<code>search_case_insensitive</code> function, as shown in Listing 12-22. Note this still
|
||
won’t compile yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::error::Error;
|
||
</span><span class="boring">use std::fs::{self, File};
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
</span><span class="boring"> vec![]
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
</span><span class="boring"> vec![]
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub struct Config {
|
||
</span><span class="boring"> query: String,
|
||
</span><span class="boring"> filename: String,
|
||
</span><span class="boring"> case_sensitive: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
|
||
let contents = fs::read_to_string(config.filename)?;
|
||
|
||
let results = if config.case_sensitive {
|
||
search(&config.query, &contents)
|
||
} else {
|
||
search_case_insensitive(&config.query, &contents)
|
||
};
|
||
|
||
for line in results {
|
||
println!("{}", line);
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-22: Calling either <code>search</code> or
|
||
<code>search_case_insensitive</code> based on the value in <code>config.case_sensitive</code></span></p>
|
||
<p>Finally, we need to check for the environment variable. The functions for
|
||
working with environment variables are in the <code>env</code> module in the standard
|
||
library, so we want to bring that module into scope with a <code>use std::env;</code> line
|
||
at the top of <em>src/lib.rs</em>. Then we’ll use the <code>var</code> function from the <code>env</code>
|
||
module to check for an environment variable named <code>CASE_INSENSITIVE</code>, as shown
|
||
in Listing 12-23.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::env;
|
||
<span class="boring">struct Config {
|
||
</span><span class="boring"> query: String,
|
||
</span><span class="boring"> filename: String,
|
||
</span><span class="boring"> case_sensitive: bool,
|
||
</span><span class="boring">}
|
||
</span>
|
||
// --snip--
|
||
|
||
impl Config {
|
||
pub fn new(args: &[String]) -> Result<Config, &'static str> {
|
||
if args.len() < 3 {
|
||
return Err("not enough arguments");
|
||
}
|
||
|
||
let query = args[1].clone();
|
||
let filename = args[2].clone();
|
||
|
||
let case_sensitive = env::var("CASE_INSENSITIVE").is_err();
|
||
|
||
Ok(Config { query, filename, case_sensitive })
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 12-23: Checking for an environment variable named
|
||
<code>CASE_INSENSITIVE</code></span></p>
|
||
<p>Here, we create a new variable <code>case_sensitive</code>. To set its value, we call the
|
||
<code>env::var</code> function and pass it the name of the <code>CASE_INSENSITIVE</code> environment
|
||
variable. The <code>env::var</code> function returns a <code>Result</code> that will be the successful
|
||
<code>Ok</code> variant that contains the value of the environment variable if the
|
||
environment variable is set. It will return the <code>Err</code> variant if the
|
||
environment variable is not set.</p>
|
||
<p>We’re using the <code>is_err</code> method on the <code>Result</code> to check whether it’s an error
|
||
and therefore unset, which means it <em>should</em> do a case-sensitive search. If the
|
||
<code>CASE_INSENSITIVE</code> environment variable is set to anything, <code>is_err</code> will
|
||
return false and the program will perform a case-insensitive search. We don’t
|
||
care about the <em>value</em> of the environment variable, just whether it’s set or
|
||
unset, so we’re checking <code>is_err</code> rather than using <code>unwrap</code>, <code>expect</code>, or any
|
||
of the other methods we’ve seen on <code>Result</code>.</p>
|
||
<p>We pass the value in the <code>case_sensitive</code> variable to the <code>Config</code> instance so
|
||
the <code>run</code> function can read that value and decide whether to call <code>search</code> or
|
||
<code>search_case_insensitive</code>, as we implemented in Listing 12-22.</p>
|
||
<p>Let’s give it a try! First, we’ll run our program without the environment
|
||
variable set and with the query <code>to</code>, which should match any line that contains
|
||
the word “to” in all lowercase:</p>
|
||
<pre><code class="language-text">$ cargo run to poem.txt
|
||
Compiling minigrep v0.1.0 (file:///projects/minigrep)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep to poem.txt`
|
||
Are you nobody, too?
|
||
How dreary to be somebody!
|
||
</code></pre>
|
||
<p>Looks like that still works! Now, let’s run the program with <code>CASE_INSENSITIVE</code>
|
||
set to <code>1</code> but with the same query <code>to</code>.</p>
|
||
<p>If you’re using PowerShell, you will need to set the environment variable and
|
||
run the program in two commands rather than one:</p>
|
||
<pre><code class="language-text">$ $env:CASE_INSENSITIVE=1
|
||
$ cargo run to poem.txt
|
||
</code></pre>
|
||
<p>We should get lines that contain “to” that might have uppercase letters:</p>
|
||
<pre><code class="language-text">$ CASE_INSENSITIVE=1 cargo run to poem.txt
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/minigrep to poem.txt`
|
||
Are you nobody, too?
|
||
How dreary to be somebody!
|
||
To tell your name the livelong day
|
||
To an admiring bog!
|
||
</code></pre>
|
||
<p>Excellent, we also got lines containing “To”! Our <code>minigrep</code> program can now do
|
||
case-insensitive searching controlled by an environment variable. Now you know
|
||
how to manage options set using either command line arguments or environment
|
||
variables.</p>
|
||
<p>Some programs allow arguments <em>and</em> environment variables for the same
|
||
configuration. In those cases, the programs decide that one or the other takes
|
||
precedence. For another exercise on your own, try controlling case
|
||
insensitivity through either a command line argument or an environment
|
||
variable. Decide whether the command line argument or the environment variable
|
||
should take precedence if the program is run with one set to case sensitive and
|
||
one set to case insensitive.</p>
|
||
<p>The <code>std::env</code> module contains many more useful features for dealing with
|
||
environment variables: check out its documentation to see what is available.</p>
|
||
<h2><a class="header" href="#writing-error-messages-to-standard-error-instead-of-standard-output" id="writing-error-messages-to-standard-error-instead-of-standard-output">Writing Error Messages to Standard Error Instead of Standard Output</a></h2>
|
||
<p>At the moment, we’re writing all of our output to the terminal using the
|
||
<code>println!</code> function. Most terminals provide two kinds of output: <em>standard
|
||
output</em> (<code>stdout</code>) for general information and <em>standard error</em> (<code>stderr</code>)
|
||
for error messages. This distinction enables users to choose to direct the
|
||
successful output of a program to a file but still print error messages to the
|
||
screen.</p>
|
||
<p>The <code>println!</code> function is only capable of printing to standard output, so we
|
||
have to use something else to print to standard error.</p>
|
||
<h3><a class="header" href="#checking-where-errors-are-written" id="checking-where-errors-are-written">Checking Where Errors Are Written</a></h3>
|
||
<p>First, let’s observe how the content printed by <code>minigrep</code> is currently being
|
||
written to standard output, including any error messages we want to write to
|
||
standard error instead. We’ll do that by redirecting the standard output stream
|
||
to a file while also intentionally causing an error. We won’t redirect the
|
||
standard error stream, so any content sent to standard error will continue to
|
||
display on the screen.</p>
|
||
<p>Command line programs are expected to send error messages to the standard error
|
||
stream so we can still see error messages on the screen even if we redirect the
|
||
standard output stream to a file. Our program is not currently well-behaved:
|
||
we’re about to see that it saves the error message output to a file instead!</p>
|
||
<p>The way to demonstrate this behavior is by running the program with <code>></code> and the
|
||
filename, <em>output.txt</em>, that we want to redirect the standard output stream to.
|
||
We won’t pass any arguments, which should cause an error:</p>
|
||
<pre><code class="language-text">$ cargo run > output.txt
|
||
</code></pre>
|
||
<p>The <code>></code> syntax tells the shell to write the contents of standard output to
|
||
<em>output.txt</em> instead of the screen. We didn’t see the error message we were
|
||
expecting printed to the screen, so that means it must have ended up in the
|
||
file. This is what <em>output.txt</em> contains:</p>
|
||
<pre><code class="language-text">Problem parsing arguments: not enough arguments
|
||
</code></pre>
|
||
<p>Yup, our error message is being printed to standard output. It’s much more
|
||
useful for error messages like this to be printed to standard error so only
|
||
data from a successful run ends up in the file. We’ll change that.</p>
|
||
<h3><a class="header" href="#printing-errors-to-standard-error" id="printing-errors-to-standard-error">Printing Errors to Standard Error</a></h3>
|
||
<p>We’ll use the code in Listing 12-24 to change how error messages are printed.
|
||
Because of the refactoring we did earlier in this chapter, all the code that
|
||
prints error messages is in one function, <code>main</code>. The standard library provides
|
||
the <code>eprintln!</code> macro that prints to the standard error stream, so let’s change
|
||
the two places we were calling <code>println!</code> to print errors to use <code>eprintln!</code>
|
||
instead.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let config = Config::new(&args).unwrap_or_else(|err| {
|
||
eprintln!("Problem parsing arguments: {}", err);
|
||
process::exit(1);
|
||
});
|
||
|
||
if let Err(e) = minigrep::run(config) {
|
||
eprintln!("Application error: {}", e);
|
||
|
||
process::exit(1);
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 12-24: Writing error messages to standard error
|
||
instead of standard output using <code>eprintln!</code></span></p>
|
||
<p>After changing <code>println!</code> to <code>eprintln!</code>, let’s run the program again in the
|
||
same way, without any arguments and redirecting standard output with <code>></code>:</p>
|
||
<pre><code class="language-text">$ cargo run > output.txt
|
||
Problem parsing arguments: not enough arguments
|
||
</code></pre>
|
||
<p>Now we see the error onscreen and <em>output.txt</em> contains nothing, which is the
|
||
behavior we expect of command line programs.</p>
|
||
<p>Let’s run the program again with arguments that don’t cause an error but still
|
||
redirect standard output to a file, like so:</p>
|
||
<pre><code class="language-text">$ cargo run to poem.txt > output.txt
|
||
</code></pre>
|
||
<p>We won’t see any output to the terminal, and <em>output.txt</em> will contain our
|
||
results:</p>
|
||
<p><span class="filename">Filename: output.txt</span></p>
|
||
<pre><code class="language-text">Are you nobody, too?
|
||
How dreary to be somebody!
|
||
</code></pre>
|
||
<p>This demonstrates that we’re now using standard output for successful output
|
||
and standard error for error output as appropriate.</p>
|
||
<h2><a class="header" href="#summary-11" id="summary-11">Summary</a></h2>
|
||
<p>This chapter recapped some of the major concepts you’ve learned so far and
|
||
covered how to perform common I/O operations in Rust. By using command line
|
||
arguments, files, environment variables, and the <code>eprintln!</code> macro for printing
|
||
errors, you’re now prepared to write command line applications. By using the
|
||
concepts in previous chapters, your code will be well organized, store data
|
||
effectively in the appropriate data structures, handle errors nicely, and be
|
||
well tested.</p>
|
||
<p>Next, we’ll explore some Rust features that were influenced by functional
|
||
languages: closures and iterators.</p>
|
||
<h1><a class="header" href="#functional-language-features-iterators-and-closures" id="functional-language-features-iterators-and-closures">Functional Language Features: Iterators and Closures</a></h1>
|
||
<p>Rust’s design has taken inspiration from many existing languages and
|
||
techniques, and one significant influence is <em>functional programming</em>.
|
||
Programming in a functional style often includes using functions as values by
|
||
passing them in arguments, returning them from other functions, assigning them
|
||
to variables for later execution, and so forth.</p>
|
||
<p>In this chapter, we won’t debate the issue of what functional programming is or
|
||
isn’t but will instead discuss some features of Rust that are similar to
|
||
features in many languages often referred to as functional.</p>
|
||
<p>More specifically, we’ll cover:</p>
|
||
<ul>
|
||
<li><em>Closures</em>, a function-like construct you can store in a variable</li>
|
||
<li><em>Iterators</em>, a way of processing a series of elements</li>
|
||
<li>How to use these two features to improve the I/O project in Chapter 12</li>
|
||
<li>The performance of these two features (Spoiler alert: they’re faster than you
|
||
might think!)</li>
|
||
</ul>
|
||
<p>Other Rust features, such as pattern matching and enums, which we’ve covered in
|
||
other chapters, are influenced by the functional style as well. Mastering
|
||
closures and iterators is an important part of writing idiomatic, fast Rust
|
||
code, so we’ll devote this entire chapter to them.</p>
|
||
<h2><a class="header" href="#closures-anonymous-functions-that-can-capture-their-environment" id="closures-anonymous-functions-that-can-capture-their-environment">Closures: Anonymous Functions that Can Capture Their Environment</a></h2>
|
||
<p>Rust’s closures are anonymous functions you can save in a variable or pass as
|
||
arguments to other functions. You can create the closure in one place and then
|
||
call the closure to evaluate it in a different context. Unlike functions,
|
||
closures can capture values from the scope in which they’re defined. We’ll
|
||
demonstrate how these closure features allow for code reuse and behavior
|
||
customization.</p>
|
||
<h3><a class="header" href="#creating-an-abstraction-of-behavior-with-closures" id="creating-an-abstraction-of-behavior-with-closures">Creating an Abstraction of Behavior with Closures</a></h3>
|
||
<p>Let’s work on an example of a situation in which it’s useful to store a closure
|
||
to be executed later. Along the way, we’ll talk about the syntax of closures,
|
||
type inference, and traits.</p>
|
||
<p>Consider this hypothetical situation: we work at a startup that’s making an app
|
||
to generate custom exercise workout plans. The backend is written in Rust, and
|
||
the algorithm that generates the workout plan takes into account many factors,
|
||
such as the app user’s age, body mass index, exercise preferences, recent
|
||
workouts, and an intensity number they specify. The actual algorithm used isn’t
|
||
important in this example; what’s important is that this calculation takes a
|
||
few seconds. We want to call this algorithm only when we need to and only call
|
||
it once so we don’t make the user wait more than necessary.</p>
|
||
<p>We’ll simulate calling this hypothetical algorithm with the function
|
||
<code>simulated_expensive_calculation</code> shown in Listing 13-1, which will print
|
||
<code>calculating slowly...</code>, wait for two seconds, and then return whatever number
|
||
we passed in.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::thread;
|
||
use std::time::Duration;
|
||
|
||
fn simulated_expensive_calculation(intensity: u32) -> u32 {
|
||
println!("calculating slowly...");
|
||
thread::sleep(Duration::from_secs(2));
|
||
intensity
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-1: A function to stand in for a hypothetical
|
||
calculation that takes about 2 seconds to run</span></p>
|
||
<p>Next is the <code>main</code> function, which contains the parts of the workout app
|
||
important for this example. This function represents the code that the app will
|
||
call when a user asks for a workout plan. Because the interaction with the
|
||
app’s frontend isn’t relevant to the use of closures, we’ll hardcode values
|
||
representing inputs to our program and print the outputs.</p>
|
||
<p>The required inputs are these:</p>
|
||
<ul>
|
||
<li>An intensity number from the user, which is specified when they request
|
||
a workout to indicate whether they want a low-intensity workout or a
|
||
high-intensity workout</li>
|
||
<li>A random number that will generate some variety in the workout plans</li>
|
||
</ul>
|
||
<p>The output will be the recommended workout plan. Listing 13-2 shows the <code>main</code>
|
||
function we’ll use.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let simulated_user_specified_value = 10;
|
||
let simulated_random_number = 7;
|
||
|
||
generate_workout(
|
||
simulated_user_specified_value,
|
||
simulated_random_number
|
||
);
|
||
}
|
||
<span class="boring">fn generate_workout(intensity: u32, random_number: u32) {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-2: A <code>main</code> function with hardcoded values to
|
||
simulate user input and random number generation</span></p>
|
||
<p>We’ve hardcoded the variable <code>simulated_user_specified_value</code> as 10 and the
|
||
variable <code>simulated_random_number</code> as 7 for simplicity’s sake; in an actual
|
||
program, we’d get the intensity number from the app frontend, and we’d use the
|
||
<code>rand</code> crate to generate a random number, as we did in the Guessing Game
|
||
example in Chapter 2. The <code>main</code> function calls a <code>generate_workout</code> function
|
||
with the simulated input values.</p>
|
||
<p>Now that we have the context, let’s get to the algorithm. The function
|
||
<code>generate_workout</code> in Listing 13-3 contains the business logic of the
|
||
app that we’re most concerned with in this example. The rest of the code
|
||
changes in this example will be made to this function.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn simulated_expensive_calculation(num: u32) -> u32 {
|
||
</span><span class="boring"> println!("calculating slowly...");
|
||
</span><span class="boring"> thread::sleep(Duration::from_secs(2));
|
||
</span><span class="boring"> num
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn generate_workout(intensity: u32, random_number: u32) {
|
||
if intensity < 25 {
|
||
println!(
|
||
"Today, do {} pushups!",
|
||
simulated_expensive_calculation(intensity)
|
||
);
|
||
println!(
|
||
"Next, do {} situps!",
|
||
simulated_expensive_calculation(intensity)
|
||
);
|
||
} else {
|
||
if random_number == 3 {
|
||
println!("Take a break today! Remember to stay hydrated!");
|
||
} else {
|
||
println!(
|
||
"Today, run for {} minutes!",
|
||
simulated_expensive_calculation(intensity)
|
||
);
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-3: The business logic that prints the workout
|
||
plans based on the inputs and calls to the <code>simulated_expensive_calculation</code>
|
||
function</span></p>
|
||
<p>The code in Listing 13-3 has multiple calls to the slow calculation function.
|
||
The first <code>if</code> block calls <code>simulated_expensive_calculation</code> twice, the <code>if</code>
|
||
inside the outer <code>else</code> doesn’t call it at all, and the code inside the
|
||
second <code>else</code> case calls it once.</p>
|
||
<p>The desired behavior of the <code>generate_workout</code> function is to first check
|
||
whether the user wants a low-intensity workout (indicated by a number less than
|
||
25) or a high-intensity workout (a number of 25 or greater).</p>
|
||
<p>Low-intensity workout plans will recommend a number of push-ups and sit-ups
|
||
based on the complex algorithm we’re simulating.</p>
|
||
<p>If the user wants a high-intensity workout, there’s some additional logic: if
|
||
the value of the random number generated by the app happens to be 3, the app
|
||
will recommend a break and hydration. If not, the user will get a number of
|
||
minutes of running based on the complex algorithm.</p>
|
||
<p>This code works the way the business wants it to now, but let’s say the data
|
||
science team decides that we need to make some changes to the way we call the
|
||
<code>simulated_expensive_calculation</code> function in the future. To simplify the
|
||
update when those changes happen, we want to refactor this code so it calls the
|
||
<code>simulated_expensive_calculation</code> function only once. We also want to cut the
|
||
place where we’re currently unnecessarily calling the function twice without
|
||
adding any other calls to that function in the process. That is, we don’t want
|
||
to call it if the result isn’t needed, and we still want to call it only once.</p>
|
||
<h4><a class="header" href="#refactoring-using-functions" id="refactoring-using-functions">Refactoring Using Functions</a></h4>
|
||
<p>We could restructure the workout program in many ways. First, we’ll try
|
||
extracting the duplicated call to the <code>simulated_expensive_calculation</code>
|
||
function into a variable, as shown in Listing 13-4.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn simulated_expensive_calculation(num: u32) -> u32 {
|
||
</span><span class="boring"> println!("calculating slowly...");
|
||
</span><span class="boring"> thread::sleep(Duration::from_secs(2));
|
||
</span><span class="boring"> num
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn generate_workout(intensity: u32, random_number: u32) {
|
||
let expensive_result =
|
||
simulated_expensive_calculation(intensity);
|
||
|
||
if intensity < 25 {
|
||
println!(
|
||
"Today, do {} pushups!",
|
||
expensive_result
|
||
);
|
||
println!(
|
||
"Next, do {} situps!",
|
||
expensive_result
|
||
);
|
||
} else {
|
||
if random_number == 3 {
|
||
println!("Take a break today! Remember to stay hydrated!");
|
||
} else {
|
||
println!(
|
||
"Today, run for {} minutes!",
|
||
expensive_result
|
||
);
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-4: Extracting the calls to
|
||
<code>simulated_expensive_calculation</code> to one place and storing the result in the
|
||
<code>expensive_result</code> variable</span></p>
|
||
<p>This change unifies all the calls to <code>simulated_expensive_calculation</code> and
|
||
solves the problem of the first <code>if</code> block unnecessarily calling the function
|
||
twice. Unfortunately, we’re now calling this function and waiting for the
|
||
result in all cases, which includes the inner <code>if</code> block that doesn’t use the
|
||
result value at all.</p>
|
||
<p>We want to define code in one place in our program, but only <em>execute</em> that
|
||
code where we actually need the result. This is a use case for closures!</p>
|
||
<h4><a class="header" href="#refactoring-with-closures-to-store-code" id="refactoring-with-closures-to-store-code">Refactoring with Closures to Store Code</a></h4>
|
||
<p>Instead of always calling the <code>simulated_expensive_calculation</code> function before
|
||
the <code>if</code> blocks, we can define a closure and store the <em>closure</em> in a variable
|
||
rather than storing the result of the function call, as shown in Listing 13-5.
|
||
We can actually move the whole body of <code>simulated_expensive_calculation</code> within
|
||
the closure we’re introducing here.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span>let expensive_closure = |num| {
|
||
println!("calculating slowly...");
|
||
thread::sleep(Duration::from_secs(2));
|
||
num
|
||
};
|
||
<span class="boring">expensive_closure(5);
|
||
</span><span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-5: Defining a closure and storing it in the
|
||
<code>expensive_closure</code> variable</span></p>
|
||
<p>The closure definition comes after the <code>=</code> to assign it to the variable
|
||
<code>expensive_closure</code>. To define a closure, we start with a pair of vertical
|
||
pipes (<code>|</code>), inside which we specify the parameters to the closure; this syntax
|
||
was chosen because of its similarity to closure definitions in Smalltalk and
|
||
Ruby. This closure has one parameter named <code>num</code>: if we had more than one
|
||
parameter, we would separate them with commas, like <code>|param1, param2|</code>.</p>
|
||
<p>After the parameters, we place curly brackets that hold the body of the
|
||
closure—these are optional if the closure body is a single expression. The end
|
||
of the closure, after the curly brackets, needs a semicolon to complete the
|
||
<code>let</code> statement. The value returned from the last line in the closure body
|
||
(<code>num</code>) will be the value returned from the closure when it’s called, because
|
||
that line doesn’t end in a semicolon; just as in function bodies.</p>
|
||
<p>Note that this <code>let</code> statement means <code>expensive_closure</code> contains the
|
||
<em>definition</em> of an anonymous function, not the <em>resulting value</em> of calling the
|
||
anonymous function. Recall that we’re using a closure because we want to define
|
||
the code to call at one point, store that code, and call it at a later point;
|
||
the code we want to call is now stored in <code>expensive_closure</code>.</p>
|
||
<p>With the closure defined, we can change the code in the <code>if</code> blocks to call the
|
||
closure to execute the code and get the resulting value. We call a closure like
|
||
we do a function: we specify the variable name that holds the closure
|
||
definition and follow it with parentheses containing the argument values we
|
||
want to use, as shown in Listing 13-6.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span>fn generate_workout(intensity: u32, random_number: u32) {
|
||
let expensive_closure = |num| {
|
||
println!("calculating slowly...");
|
||
thread::sleep(Duration::from_secs(2));
|
||
num
|
||
};
|
||
|
||
if intensity < 25 {
|
||
println!(
|
||
"Today, do {} pushups!",
|
||
expensive_closure(intensity)
|
||
);
|
||
println!(
|
||
"Next, do {} situps!",
|
||
expensive_closure(intensity)
|
||
);
|
||
} else {
|
||
if random_number == 3 {
|
||
println!("Take a break today! Remember to stay hydrated!");
|
||
} else {
|
||
println!(
|
||
"Today, run for {} minutes!",
|
||
expensive_closure(intensity)
|
||
);
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-6: Calling the <code>expensive_closure</code> we’ve
|
||
defined</span></p>
|
||
<p>Now the expensive calculation is called in only one place, and we’re only
|
||
executing that code where we need the results.</p>
|
||
<p>However, we’ve reintroduced one of the problems from Listing 13-3: we’re still
|
||
calling the closure twice in the first <code>if</code> block, which will call the
|
||
expensive code twice and make the user wait twice as long as they need to. We
|
||
could fix this problem by creating a variable local to that <code>if</code> block to hold
|
||
the result of calling the closure, but closures provide us with another
|
||
solution. We’ll talk about that solution in a bit. But first let’s talk about
|
||
why there aren’t type annotations in the closure definition and the traits
|
||
involved with closures.</p>
|
||
<h3><a class="header" href="#closure-type-inference-and-annotation" id="closure-type-inference-and-annotation">Closure Type Inference and Annotation</a></h3>
|
||
<p>Closures don’t require you to annotate the types of the parameters or the
|
||
return value like <code>fn</code> functions do. Type annotations are required on functions
|
||
because they’re part of an explicit interface exposed to your users. Defining
|
||
this interface rigidly is important for ensuring that everyone agrees on what
|
||
types of values a function uses and returns. But closures aren’t used in an
|
||
exposed interface like this: they’re stored in variables and used without
|
||
naming them and exposing them to users of our library.</p>
|
||
<p>Closures are usually short and relevant only within a narrow context rather
|
||
than in any arbitrary scenario. Within these limited contexts, the compiler is
|
||
reliably able to infer the types of the parameters and the return type, similar
|
||
to how it’s able to infer the types of most variables.</p>
|
||
<p>Making programmers annotate the types in these small, anonymous functions would
|
||
be annoying and largely redundant with the information the compiler already has
|
||
available.</p>
|
||
<p>As with variables, we can add type annotations if we want to increase
|
||
explicitness and clarity at the cost of being more verbose than is strictly
|
||
necessary. Annotating the types for the closure we defined in Listing 13-5
|
||
would look like the definition shown in Listing 13-7.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span>let expensive_closure = |num: u32| -> u32 {
|
||
println!("calculating slowly...");
|
||
thread::sleep(Duration::from_secs(2));
|
||
num
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-7: Adding optional type annotations of the
|
||
parameter and return value types in the closure</span></p>
|
||
<p>With type annotations added, the syntax of closures looks more similar to the
|
||
syntax of functions. The following is a vertical comparison of the syntax for
|
||
the definition of a function that adds 1 to its parameter and a closure that
|
||
has the same behavior. We’ve added some spaces to line up the relevant parts.
|
||
This illustrates how closure syntax is similar to function syntax except for
|
||
the use of pipes and the amount of syntax that is optional:</p>
|
||
<pre><code class="language-rust ignore">fn add_one_v1 (x: u32) -> u32 { x + 1 }
|
||
let add_one_v2 = |x: u32| -> u32 { x + 1 };
|
||
let add_one_v3 = |x| { x + 1 };
|
||
let add_one_v4 = |x| x + 1 ;
|
||
</code></pre>
|
||
<p>The first line shows a function definition, and the second line shows a fully
|
||
annotated closure definition. The third line removes the type annotations from
|
||
the closure definition, and the fourth line removes the brackets, which are
|
||
optional because the closure body has only one expression. These are all valid
|
||
definitions that will produce the same behavior when they’re called.</p>
|
||
<p>Closure definitions will have one concrete type inferred for each of their
|
||
parameters and for their return value. For instance, Listing 13-8 shows the
|
||
definition of a short closure that just returns the value it receives as a
|
||
parameter. This closure isn’t very useful except for the purposes of this
|
||
example. Note that we haven’t added any type annotations to the definition: if
|
||
we then try to call the closure twice, using a <code>String</code> as an argument the
|
||
first time and a <code>u32</code> the second time, we’ll get an error.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">let example_closure = |x| x;
|
||
|
||
let s = example_closure(String::from("hello"));
|
||
let n = example_closure(5);
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-8: Attempting to call a closure whose types
|
||
are inferred with two different types</span></p>
|
||
<p>The compiler gives us this error:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs
|
||
|
|
||
| let n = example_closure(5);
|
||
| ^ expected struct `std::string::String`, found
|
||
integer
|
||
|
|
||
= note: expected type `std::string::String`
|
||
found type `{integer}`
|
||
</code></pre>
|
||
<p>The first time we call <code>example_closure</code> with the <code>String</code> value, the compiler
|
||
infers the type of <code>x</code> and the return type of the closure to be <code>String</code>. Those
|
||
types are then locked in to the closure in <code>example_closure</code>, and we get a type
|
||
error if we try to use a different type with the same closure.</p>
|
||
<h3><a class="header" href="#storing-closures-using-generic-parameters-and-the-fn-traits" id="storing-closures-using-generic-parameters-and-the-fn-traits">Storing Closures Using Generic Parameters and the <code>Fn</code> Traits</a></h3>
|
||
<p>Let’s return to our workout generation app. In Listing 13-6, our code was still
|
||
calling the expensive calculation closure more times than it needed to. One
|
||
option to solve this issue is to save the result of the expensive closure in a
|
||
variable for reuse and use the variable in each place we need the result,
|
||
instead of calling the closure again. However, this method could result in a
|
||
lot of repeated code.</p>
|
||
<p>Fortunately, another solution is available to us. We can create a struct that
|
||
will hold the closure and the resulting value of calling the closure. The
|
||
struct will execute the closure only if we need the resulting value, and it
|
||
will cache the resulting value so the rest of our code doesn’t have to be
|
||
responsible for saving and reusing the result. You may know this pattern as
|
||
<em>memoization</em> or <em>lazy evaluation</em>.</p>
|
||
<p>To make a struct that holds a closure, we need to specify the type of the
|
||
closure, because a struct definition needs to know the types of each of its
|
||
fields. Each closure instance has its own unique anonymous type: that is, even
|
||
if two closures have the same signature, their types are still considered
|
||
different. To define structs, enums, or function parameters that use closures,
|
||
we use generics and trait bounds, as we discussed in Chapter 10.</p>
|
||
<p>The <code>Fn</code> traits are provided by the standard library. All closures implement at
|
||
least one of the traits: <code>Fn</code>, <code>FnMut</code>, or <code>FnOnce</code>. We’ll discuss the
|
||
difference between these traits in the <a href="ch13-01-closures.html#capturing-the-environment-with-closures">“Capturing the Environment with
|
||
Closures”</a><!-- ignore --> section; in
|
||
this example, we can use the <code>Fn</code> trait.</p>
|
||
<p>We add types to the <code>Fn</code> trait bound to represent the types of the parameters
|
||
and return values the closures must have to match this trait bound. In this
|
||
case, our closure has a parameter of type <code>u32</code> and returns a <code>u32</code>, so the
|
||
trait bound we specify is <code>Fn(u32) -> u32</code>.</p>
|
||
<p>Listing 13-9 shows the definition of the <code>Cacher</code> struct that holds a closure
|
||
and an optional result value.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct Cacher<T>
|
||
where T: Fn(u32) -> u32
|
||
{
|
||
calculation: T,
|
||
value: Option<u32>,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-9: Defining a <code>Cacher</code> struct that holds a
|
||
closure in <code>calculation</code> and an optional result in <code>value</code></span></p>
|
||
<p>The <code>Cacher</code> struct has a <code>calculation</code> field of the generic type <code>T</code>. The
|
||
trait bounds on <code>T</code> specify that it’s a closure by using the <code>Fn</code> trait. Any
|
||
closure we want to store in the <code>calculation</code> field must have one <code>u32</code>
|
||
parameter (specified within the parentheses after <code>Fn</code>) and must return a
|
||
<code>u32</code> (specified after the <code>-></code>).</p>
|
||
<blockquote>
|
||
<p>Note: Functions can implement all three of the <code>Fn</code> traits too. If what we
|
||
want to do doesn’t require capturing a value from the environment, we can use
|
||
a function rather than a closure where we need something that implements an
|
||
<code>Fn</code> trait.</p>
|
||
</blockquote>
|
||
<p>The <code>value</code> field is of type <code>Option<u32></code>. Before we execute the closure,
|
||
<code>value</code> will be <code>None</code>. When code using a <code>Cacher</code> asks for the <em>result</em> of the
|
||
closure, the <code>Cacher</code> will execute the closure at that time and store the
|
||
result within a <code>Some</code> variant in the <code>value</code> field. Then if the code asks for
|
||
the result of the closure again, instead of executing the closure again, the
|
||
<code>Cacher</code> will return the result held in the <code>Some</code> variant.</p>
|
||
<p>The logic around the <code>value</code> field we’ve just described is defined in Listing
|
||
13-10.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Cacher<T>
|
||
</span><span class="boring"> where T: Fn(u32) -> u32
|
||
</span><span class="boring">{
|
||
</span><span class="boring"> calculation: T,
|
||
</span><span class="boring"> value: Option<u32>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl<T> Cacher<T>
|
||
where T: Fn(u32) -> u32
|
||
{
|
||
fn new(calculation: T) -> Cacher<T> {
|
||
Cacher {
|
||
calculation,
|
||
value: None,
|
||
}
|
||
}
|
||
|
||
fn value(&mut self, arg: u32) -> u32 {
|
||
match self.value {
|
||
Some(v) => v,
|
||
None => {
|
||
let v = (self.calculation)(arg);
|
||
self.value = Some(v);
|
||
v
|
||
},
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-10: The caching logic of <code>Cacher</code></span></p>
|
||
<p>We want <code>Cacher</code> to manage the struct fields’ values rather than letting the
|
||
calling code potentially change the values in these fields directly, so these
|
||
fields are private.</p>
|
||
<p>The <code>Cacher::new</code> function takes a generic parameter <code>T</code>, which we’ve defined
|
||
as having the same trait bound as the <code>Cacher</code> struct. Then <code>Cacher::new</code>
|
||
returns a <code>Cacher</code> instance that holds the closure specified in the
|
||
<code>calculation</code> field and a <code>None</code> value in the <code>value</code> field, because we haven’t
|
||
executed the closure yet.</p>
|
||
<p>When the calling code needs the result of evaluating the closure, instead of
|
||
calling the closure directly, it will call the <code>value</code> method. This method
|
||
checks whether we already have a resulting value in <code>self.value</code> in a <code>Some</code>;
|
||
if we do, it returns the value within the <code>Some</code> without executing the closure
|
||
again.</p>
|
||
<p>If <code>self.value</code> is <code>None</code>, the code calls the closure stored in
|
||
<code>self.calculation</code>, saves the result in <code>self.value</code> for future use, and
|
||
returns the value as well.</p>
|
||
<p>Listing 13-11 shows how we can use this <code>Cacher</code> struct in the function
|
||
<code>generate_workout</code> from Listing 13-6.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct Cacher<T>
|
||
</span><span class="boring"> where T: Fn(u32) -> u32
|
||
</span><span class="boring">{
|
||
</span><span class="boring"> calculation: T,
|
||
</span><span class="boring"> value: Option<u32>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<T> Cacher<T>
|
||
</span><span class="boring"> where T: Fn(u32) -> u32
|
||
</span><span class="boring">{
|
||
</span><span class="boring"> fn new(calculation: T) -> Cacher<T> {
|
||
</span><span class="boring"> Cacher {
|
||
</span><span class="boring"> calculation,
|
||
</span><span class="boring"> value: None,
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span><span class="boring"> fn value(&mut self, arg: u32) -> u32 {
|
||
</span><span class="boring"> match self.value {
|
||
</span><span class="boring"> Some(v) => v,
|
||
</span><span class="boring"> None => {
|
||
</span><span class="boring"> let v = (self.calculation)(arg);
|
||
</span><span class="boring"> self.value = Some(v);
|
||
</span><span class="boring"> v
|
||
</span><span class="boring"> },
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn generate_workout(intensity: u32, random_number: u32) {
|
||
let mut expensive_result = Cacher::new(|num| {
|
||
println!("calculating slowly...");
|
||
thread::sleep(Duration::from_secs(2));
|
||
num
|
||
});
|
||
|
||
if intensity < 25 {
|
||
println!(
|
||
"Today, do {} pushups!",
|
||
expensive_result.value(intensity)
|
||
);
|
||
println!(
|
||
"Next, do {} situps!",
|
||
expensive_result.value(intensity)
|
||
);
|
||
} else {
|
||
if random_number == 3 {
|
||
println!("Take a break today! Remember to stay hydrated!");
|
||
} else {
|
||
println!(
|
||
"Today, run for {} minutes!",
|
||
expensive_result.value(intensity)
|
||
);
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-11: Using <code>Cacher</code> in the <code>generate_workout</code>
|
||
function to abstract away the caching logic</span></p>
|
||
<p>Instead of saving the closure in a variable directly, we save a new instance of
|
||
<code>Cacher</code> that holds the closure. Then, in each place we want the result, we
|
||
call the <code>value</code> method on the <code>Cacher</code> instance. We can call the <code>value</code>
|
||
method as many times as we want, or not call it at all, and the expensive
|
||
calculation will be run a maximum of once.</p>
|
||
<p>Try running this program with the <code>main</code> function from Listing 13-2. Change the
|
||
values in the <code>simulated_user_specified_value</code> and <code>simulated_random_number</code>
|
||
variables to verify that in all the cases in the various <code>if</code> and <code>else</code>
|
||
blocks, <code>calculating slowly...</code> appears only once and only when needed. The
|
||
<code>Cacher</code> takes care of the logic necessary to ensure we aren’t calling the
|
||
expensive calculation more than we need to so <code>generate_workout</code> can focus on
|
||
the business logic.</p>
|
||
<h3><a class="header" href="#limitations-of-the-cacher-implementation" id="limitations-of-the-cacher-implementation">Limitations of the <code>Cacher</code> Implementation</a></h3>
|
||
<p>Caching values is a generally useful behavior that we might want to use in
|
||
other parts of our code with different closures. However, there are two
|
||
problems with the current implementation of <code>Cacher</code> that would make reusing it
|
||
in different contexts difficult.</p>
|
||
<p>The first problem is that a <code>Cacher</code> instance assumes it will always get the
|
||
same value for the parameter <code>arg</code> to the <code>value</code> method. That is, this test of
|
||
<code>Cacher</code> will fail:</p>
|
||
<pre><code class="language-rust ignore panics">#[test]
|
||
fn call_with_different_values() {
|
||
let mut c = Cacher::new(|a| a);
|
||
|
||
let v1 = c.value(1);
|
||
let v2 = c.value(2);
|
||
|
||
assert_eq!(v2, 2);
|
||
}
|
||
</code></pre>
|
||
<p>This test creates a new <code>Cacher</code> instance with a closure that returns the value
|
||
passed into it. We call the <code>value</code> method on this <code>Cacher</code> instance with an
|
||
<code>arg</code> value of 1 and then an <code>arg</code> value of 2, and we expect the call to
|
||
<code>value</code> with the <code>arg</code> value of 2 to return 2.</p>
|
||
<p>Run this test with the <code>Cacher</code> implementation in Listing 13-9 and Listing
|
||
13-10, and the test will fail on the <code>assert_eq!</code> with this message:</p>
|
||
<pre><code class="language-text">thread 'call_with_different_values' panicked at 'assertion failed: `(left == right)`
|
||
left: `1`,
|
||
right: `2`', src/main.rs
|
||
</code></pre>
|
||
<p>The problem is that the first time we called <code>c.value</code> with 1, the <code>Cacher</code>
|
||
instance saved <code>Some(1)</code> in <code>self.value</code>. Thereafter, no matter what we pass in
|
||
to the <code>value</code> method, it will always return 1.</p>
|
||
<p>Try modifying <code>Cacher</code> to hold a hash map rather than a single value. The keys
|
||
of the hash map will be the <code>arg</code> values that are passed in, and the values of
|
||
the hash map will be the result of calling the closure on that key. Instead of
|
||
looking at whether <code>self.value</code> directly has a <code>Some</code> or a <code>None</code> value, the
|
||
<code>value</code> function will look up the <code>arg</code> in the hash map and return the value if
|
||
it’s present. If it’s not present, the <code>Cacher</code> will call the closure and save
|
||
the resulting value in the hash map associated with its <code>arg</code> value.</p>
|
||
<p>The second problem with the current <code>Cacher</code> implementation is that it only
|
||
accepts closures that take one parameter of type <code>u32</code> and return a <code>u32</code>. We
|
||
might want to cache the results of closures that take a string slice and return
|
||
<code>usize</code> values, for example. To fix this issue, try introducing more generic
|
||
parameters to increase the flexibility of the <code>Cacher</code> functionality.</p>
|
||
<h3><a class="header" href="#capturing-the-environment-with-closures" id="capturing-the-environment-with-closures">Capturing the Environment with Closures</a></h3>
|
||
<p>In the workout generator example, we only used closures as inline anonymous
|
||
functions. However, closures have an additional capability that functions don’t
|
||
have: they can capture their environment and access variables from the scope in
|
||
which they’re defined.</p>
|
||
<p>Listing 13-12 has an example of a closure stored in the <code>equal_to_x</code> variable
|
||
that uses the <code>x</code> variable from the closure’s surrounding environment.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 4;
|
||
|
||
let equal_to_x = |z| z == x;
|
||
|
||
let y = 4;
|
||
|
||
assert!(equal_to_x(y));
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 13-12: Example of a closure that refers to a
|
||
variable in its enclosing scope</span></p>
|
||
<p>Here, even though <code>x</code> is not one of the parameters of <code>equal_to_x</code>, the
|
||
<code>equal_to_x</code> closure is allowed to use the <code>x</code> variable that’s defined in the
|
||
same scope that <code>equal_to_x</code> is defined in.</p>
|
||
<p>We can’t do the same with functions; if we try with the following example, our
|
||
code won’t compile:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = 4;
|
||
|
||
fn equal_to_x(z: i32) -> bool { z == x }
|
||
|
||
let y = 4;
|
||
|
||
assert!(equal_to_x(y));
|
||
}
|
||
</code></pre>
|
||
<p>We get an error:</p>
|
||
<pre><code class="language-text">error[E0434]: can't capture dynamic environment in a fn item; use the || { ...
|
||
} closure form instead
|
||
--> src/main.rs
|
||
|
|
||
4 | fn equal_to_x(z: i32) -> bool { z == x }
|
||
| ^
|
||
</code></pre>
|
||
<p>The compiler even reminds us that this only works with closures!</p>
|
||
<p>When a closure captures a value from its environment, it uses memory to store
|
||
the values for use in the closure body. This use of memory is overhead that we
|
||
don’t want to pay in more common cases where we want to execute code that
|
||
doesn’t capture its environment. Because functions are never allowed to capture
|
||
their environment, defining and using functions will never incur this overhead.</p>
|
||
<p>Closures can capture values from their environment in three ways, which
|
||
directly map to the three ways a function can take a parameter: taking
|
||
ownership, borrowing mutably, and borrowing immutably. These are encoded in the
|
||
three <code>Fn</code> traits as follows:</p>
|
||
<ul>
|
||
<li><code>FnOnce</code> consumes the variables it captures from its enclosing scope, known
|
||
as the closure’s <em>environment</em>. To consume the captured variables, the
|
||
closure must take ownership of these variables and move them into the closure
|
||
when it is defined. The <code>Once</code> part of the name represents the fact that the
|
||
closure can’t take ownership of the same variables more than once, so it can
|
||
be called only once.</li>
|
||
<li><code>FnMut</code> can change the environment because it mutably borrows values.</li>
|
||
<li><code>Fn</code> borrows values from the environment immutably.</li>
|
||
</ul>
|
||
<p>When you create a closure, Rust infers which trait to use based on how the
|
||
closure uses the values from the environment. All closures implement <code>FnOnce</code>
|
||
because they can all be called at least once. Closures that don’t move the
|
||
captured variables also implement <code>FnMut</code>, and closures that don’t need mutable
|
||
access to the captured variables also implement <code>Fn</code>. In Listing 13-12, the
|
||
<code>equal_to_x</code> closure borrows <code>x</code> immutably (so <code>equal_to_x</code> has the <code>Fn</code> trait)
|
||
because the body of the closure only needs to read the value in <code>x</code>.</p>
|
||
<p>If you want to force the closure to take ownership of the values it uses in the
|
||
environment, you can use the <code>move</code> keyword before the parameter list. This
|
||
technique is mostly useful when passing a closure to a new thread to move the
|
||
data so it’s owned by the new thread.</p>
|
||
<p>We’ll have more examples of <code>move</code> closures in Chapter 16 when we talk about
|
||
concurrency. For now, here’s the code from Listing 13-12 with the <code>move</code>
|
||
keyword added to the closure definition and using vectors instead of integers,
|
||
because integers can be copied rather than moved; note that this code will not
|
||
yet compile.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = vec![1, 2, 3];
|
||
|
||
let equal_to_x = move |z| z == x;
|
||
|
||
println!("can't use x here: {:?}", x);
|
||
|
||
let y = vec![1, 2, 3];
|
||
|
||
assert!(equal_to_x(y));
|
||
}
|
||
</code></pre>
|
||
<p>We receive the following error:</p>
|
||
<pre><code class="language-text">error[E0382]: use of moved value: `x`
|
||
--> src/main.rs:6:40
|
||
|
|
||
4 | let equal_to_x = move |z| z == x;
|
||
| -------- value moved (into closure) here
|
||
5 |
|
||
6 | println!("can't use x here: {:?}", x);
|
||
| ^ value used here after move
|
||
|
|
||
= note: move occurs because `x` has type `std::vec::Vec<i32>`, which does not
|
||
implement the `Copy` trait
|
||
</code></pre>
|
||
<p>The <code>x</code> value is moved into the closure when the closure is defined, because we
|
||
added the <code>move</code> keyword. The closure then has ownership of <code>x</code>, and <code>main</code>
|
||
isn’t allowed to use <code>x</code> anymore in the <code>println!</code> statement. Removing
|
||
<code>println!</code> will fix this example.</p>
|
||
<p>Most of the time when specifying one of the <code>Fn</code> trait bounds, you can start
|
||
with <code>Fn</code> and the compiler will tell you if you need <code>FnMut</code> or <code>FnOnce</code> based
|
||
on what happens in the closure body.</p>
|
||
<p>To illustrate situations where closures that can capture their environment are
|
||
useful as function parameters, let’s move on to our next topic: iterators.</p>
|
||
<h2><a class="header" href="#processing-a-series-of-items-with-iterators" id="processing-a-series-of-items-with-iterators">Processing a Series of Items with Iterators</a></h2>
|
||
<p>The iterator pattern allows you to perform some task on a sequence of items in
|
||
turn. An iterator is responsible for the logic of iterating over each item and
|
||
determining when the sequence has finished. When you use iterators, you don’t
|
||
have to reimplement that logic yourself.</p>
|
||
<p>In Rust, iterators are <em>lazy</em>, meaning they have no effect until you call
|
||
methods that consume the iterator to use it up. For example, the code in
|
||
Listing 13-13 creates an iterator over the items in the vector <code>v1</code> by calling
|
||
the <code>iter</code> method defined on <code>Vec<T></code>. This code by itself doesn’t do anything
|
||
useful.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v1 = vec![1, 2, 3];
|
||
|
||
let v1_iter = v1.iter();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-13: Creating an iterator</span></p>
|
||
<p>Once we’ve created an iterator, we can use it in a variety of ways. In Listing
|
||
3-5 in Chapter 3, we used iterators with <code>for</code> loops to execute some code on
|
||
each item, although we glossed over what the call to <code>iter</code> did until now.</p>
|
||
<p>The example in Listing 13-14 separates the creation of the iterator from the
|
||
use of the iterator in the <code>for</code> loop. The iterator is stored in the <code>v1_iter</code>
|
||
variable, and no iteration takes place at that time. When the <code>for</code> loop is
|
||
called using the iterator in <code>v1_iter</code>, each element in the iterator is used in
|
||
one iteration of the loop, which prints out each value.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v1 = vec![1, 2, 3];
|
||
|
||
let v1_iter = v1.iter();
|
||
|
||
for val in v1_iter {
|
||
println!("Got: {}", val);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-14: Using an iterator in a <code>for</code> loop</span></p>
|
||
<p>In languages that don’t have iterators provided by their standard libraries,
|
||
you would likely write this same functionality by starting a variable at index
|
||
0, using that variable to index into the vector to get a value, and
|
||
incrementing the variable value in a loop until it reached the total number of
|
||
items in the vector.</p>
|
||
<p>Iterators handle all that logic for you, cutting down on repetitive code you
|
||
could potentially mess up. Iterators give you more flexibility to use the same
|
||
logic with many different kinds of sequences, not just data structures you can
|
||
index into, like vectors. Let’s examine how iterators do that.</p>
|
||
<h3><a class="header" href="#the-iterator-trait-and-the-next-method" id="the-iterator-trait-and-the-next-method">The <code>Iterator</code> Trait and the <code>next</code> Method</a></h3>
|
||
<p>All iterators implement a trait named <code>Iterator</code> that is defined in the
|
||
standard library. The definition of the trait looks like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Iterator {
|
||
type Item;
|
||
|
||
fn next(&mut self) -> Option<Self::Item>;
|
||
|
||
// methods with default implementations elided
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Notice this definition uses some new syntax: <code>type Item</code> and <code>Self::Item</code>,
|
||
which are defining an <em>associated type</em> with this trait. We’ll talk about
|
||
associated types in depth in Chapter 19. For now, all you need to know is that
|
||
this code says implementing the <code>Iterator</code> trait requires that you also define
|
||
an <code>Item</code> type, and this <code>Item</code> type is used in the return type of the <code>next</code>
|
||
method. In other words, the <code>Item</code> type will be the type returned from the
|
||
iterator.</p>
|
||
<p>The <code>Iterator</code> trait only requires implementors to define one method: the
|
||
<code>next</code> method, which returns one item of the iterator at a time wrapped in
|
||
<code>Some</code> and, when iteration is over, returns <code>None</code>.</p>
|
||
<p>We can call the <code>next</code> method on iterators directly; Listing 13-15 demonstrates
|
||
what values are returned from repeated calls to <code>next</code> on the iterator created
|
||
from the vector.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[test]
|
||
fn iterator_demonstration() {
|
||
let v1 = vec![1, 2, 3];
|
||
|
||
let mut v1_iter = v1.iter();
|
||
|
||
assert_eq!(v1_iter.next(), Some(&1));
|
||
assert_eq!(v1_iter.next(), Some(&2));
|
||
assert_eq!(v1_iter.next(), Some(&3));
|
||
assert_eq!(v1_iter.next(), None);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-15: Calling the <code>next</code> method on an
|
||
iterator</span></p>
|
||
<p>Note that we needed to make <code>v1_iter</code> mutable: calling the <code>next</code> method on an
|
||
iterator changes internal state that the iterator uses to keep track of where
|
||
it is in the sequence. In other words, this code <em>consumes</em>, or uses up, the
|
||
iterator. Each call to <code>next</code> eats up an item from the iterator. We didn’t need
|
||
to make <code>v1_iter</code> mutable when we used a <code>for</code> loop because the loop took
|
||
ownership of <code>v1_iter</code> and made it mutable behind the scenes.</p>
|
||
<p>Also note that the values we get from the calls to <code>next</code> are immutable
|
||
references to the values in the vector. The <code>iter</code> method produces an iterator
|
||
over immutable references. If we want to create an iterator that takes
|
||
ownership of <code>v1</code> and returns owned values, we can call <code>into_iter</code> instead of
|
||
<code>iter</code>. Similarly, if we want to iterate over mutable references, we can call
|
||
<code>iter_mut</code> instead of <code>iter</code>.</p>
|
||
<h3><a class="header" href="#methods-that-consume-the-iterator" id="methods-that-consume-the-iterator">Methods that Consume the Iterator</a></h3>
|
||
<p>The <code>Iterator</code> trait has a number of different methods with default
|
||
implementations provided by the standard library; you can find out about these
|
||
methods by looking in the standard library API documentation for the <code>Iterator</code>
|
||
trait. Some of these methods call the <code>next</code> method in their definition, which
|
||
is why you’re required to implement the <code>next</code> method when implementing the
|
||
<code>Iterator</code> trait.</p>
|
||
<p>Methods that call <code>next</code> are called <em>consuming adaptors</em>, because calling them
|
||
uses up the iterator. One example is the <code>sum</code> method, which takes ownership of
|
||
the iterator and iterates through the items by repeatedly calling <code>next</code>, thus
|
||
consuming the iterator. As it iterates through, it adds each item to a running
|
||
total and returns the total when iteration is complete. Listing 13-16 has a
|
||
test illustrating a use of the <code>sum</code> method:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[test]
|
||
fn iterator_sum() {
|
||
let v1 = vec![1, 2, 3];
|
||
|
||
let v1_iter = v1.iter();
|
||
|
||
let total: i32 = v1_iter.sum();
|
||
|
||
assert_eq!(total, 6);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-16: Calling the <code>sum</code> method to get the total
|
||
of all items in the iterator</span></p>
|
||
<p>We aren’t allowed to use <code>v1_iter</code> after the call to <code>sum</code> because <code>sum</code> takes
|
||
ownership of the iterator we call it on.</p>
|
||
<h3><a class="header" href="#methods-that-produce-other-iterators" id="methods-that-produce-other-iterators">Methods that Produce Other Iterators</a></h3>
|
||
<p>Other methods defined on the <code>Iterator</code> trait, known as <em>iterator adaptors</em>,
|
||
allow you to change iterators into different kinds of iterators. You can chain
|
||
multiple calls to iterator adaptors to perform complex actions in a readable
|
||
way. But because all iterators are lazy, you have to call one of the consuming
|
||
adaptor methods to get results from calls to iterator adaptors.</p>
|
||
<p>Listing 13-17 shows an example of calling the iterator adaptor method <code>map</code>,
|
||
which takes a closure to call on each item to produce a new iterator. The
|
||
closure here creates a new iterator in which each item from the vector has been
|
||
incremented by 1. However, this code produces a warning:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust not_desired_behavior">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v1: Vec<i32> = vec![1, 2, 3];
|
||
|
||
v1.iter().map(|x| x + 1);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-17: Calling the iterator adaptor <code>map</code> to
|
||
create a new iterator</span></p>
|
||
<p>The warning we get is this:</p>
|
||
<pre><code class="language-text">warning: unused `std::iter::Map` which must be used: iterator adaptors are lazy
|
||
and do nothing unless consumed
|
||
--> src/main.rs:4:5
|
||
|
|
||
4 | v1.iter().map(|x| x + 1);
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
|
|
||
= note: #[warn(unused_must_use)] on by default
|
||
</code></pre>
|
||
<p>The code in Listing 13-17 doesn’t do anything; the closure we’ve specified
|
||
never gets called. The warning reminds us why: iterator adaptors are lazy, and
|
||
we need to consume the iterator here.</p>
|
||
<p>To fix this and consume the iterator, we’ll use the <code>collect</code> method, which we
|
||
used in Chapter 12 with <code>env::args</code> in Listing 12-1. This method consumes the
|
||
iterator and collects the resulting values into a collection data type.</p>
|
||
<p>In Listing 13-18, we collect the results of iterating over the iterator that’s
|
||
returned from the call to <code>map</code> into a vector. This vector will end up
|
||
containing each item from the original vector incremented by 1.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v1: Vec<i32> = vec![1, 2, 3];
|
||
|
||
let v2: Vec<_> = v1.iter().map(|x| x + 1).collect();
|
||
|
||
assert_eq!(v2, vec![2, 3, 4]);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-18: Calling the <code>map</code> method to create a new
|
||
iterator and then calling the <code>collect</code> method to consume the new iterator and
|
||
create a vector</span></p>
|
||
<p>Because <code>map</code> takes a closure, we can specify any operation we want to perform
|
||
on each item. This is a great example of how closures let you customize some
|
||
behavior while reusing the iteration behavior that the <code>Iterator</code> trait
|
||
provides.</p>
|
||
<h3><a class="header" href="#using-closures-that-capture-their-environment" id="using-closures-that-capture-their-environment">Using Closures that Capture Their Environment</a></h3>
|
||
<p>Now that we’ve introduced iterators, we can demonstrate a common use of
|
||
closures that capture their environment by using the <code>filter</code> iterator adaptor.
|
||
The <code>filter</code> method on an iterator takes a closure that takes each item from
|
||
the iterator and returns a Boolean. If the closure returns <code>true</code>, the value
|
||
will be included in the iterator produced by <code>filter</code>. If the closure returns
|
||
<code>false</code>, the value won’t be included in the resulting iterator.</p>
|
||
<p>In Listing 13-19, we use <code>filter</code> with a closure that captures the <code>shoe_size</code>
|
||
variable from its environment to iterate over a collection of <code>Shoe</code> struct
|
||
instances. It will return only shoes that are the specified size.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[derive(PartialEq, Debug)]
|
||
struct Shoe {
|
||
size: u32,
|
||
style: String,
|
||
}
|
||
|
||
fn shoes_in_my_size(shoes: Vec<Shoe>, shoe_size: u32) -> Vec<Shoe> {
|
||
shoes.into_iter()
|
||
.filter(|s| s.size == shoe_size)
|
||
.collect()
|
||
}
|
||
|
||
#[test]
|
||
fn filters_by_size() {
|
||
let shoes = vec![
|
||
Shoe { size: 10, style: String::from("sneaker") },
|
||
Shoe { size: 13, style: String::from("sandal") },
|
||
Shoe { size: 10, style: String::from("boot") },
|
||
];
|
||
|
||
let in_my_size = shoes_in_my_size(shoes, 10);
|
||
|
||
assert_eq!(
|
||
in_my_size,
|
||
vec![
|
||
Shoe { size: 10, style: String::from("sneaker") },
|
||
Shoe { size: 10, style: String::from("boot") },
|
||
]
|
||
);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-19: Using the <code>filter</code> method with a closure
|
||
that captures <code>shoe_size</code></span></p>
|
||
<p>The <code>shoes_in_my_size</code> function takes ownership of a vector of shoes and a shoe
|
||
size as parameters. It returns a vector containing only shoes of the specified
|
||
size.</p>
|
||
<p>In the body of <code>shoes_in_my_size</code>, we call <code>into_iter</code> to create an iterator
|
||
that takes ownership of the vector. Then we call <code>filter</code> to adapt that
|
||
iterator into a new iterator that only contains elements for which the closure
|
||
returns <code>true</code>.</p>
|
||
<p>The closure captures the <code>shoe_size</code> parameter from the environment and
|
||
compares the value with each shoe’s size, keeping only shoes of the size
|
||
specified. Finally, calling <code>collect</code> gathers the values returned by the
|
||
adapted iterator into a vector that’s returned by the function.</p>
|
||
<p>The test shows that when we call <code>shoes_in_my_size</code>, we get back only shoes
|
||
that have the same size as the value we specified.</p>
|
||
<h3><a class="header" href="#creating-our-own-iterators-with-the-iterator-trait" id="creating-our-own-iterators-with-the-iterator-trait">Creating Our Own Iterators with the <code>Iterator</code> Trait</a></h3>
|
||
<p>We’ve shown that you can create an iterator by calling <code>iter</code>, <code>into_iter</code>, or
|
||
<code>iter_mut</code> on a vector. You can create iterators from the other collection
|
||
types in the standard library, such as hash map. You can also create iterators
|
||
that do anything you want by implementing the <code>Iterator</code> trait on your own
|
||
types. As previously mentioned, the only method you’re required to provide a
|
||
definition for is the <code>next</code> method. Once you’ve done that, you can use all
|
||
other methods that have default implementations provided by the <code>Iterator</code>
|
||
trait!</p>
|
||
<p>To demonstrate, let’s create an iterator that will only ever count from 1 to 5.
|
||
First, we’ll create a struct to hold some values. Then we’ll make this struct
|
||
into an iterator by implementing the <code>Iterator</code> trait and using the values in
|
||
that implementation.</p>
|
||
<p>Listing 13-20 has the definition of the <code>Counter</code> struct and an associated
|
||
<code>new</code> function to create instances of <code>Counter</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct Counter {
|
||
count: u32,
|
||
}
|
||
|
||
impl Counter {
|
||
fn new() -> Counter {
|
||
Counter { count: 0 }
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-20: Defining the <code>Counter</code> struct and a <code>new</code>
|
||
function that creates instances of <code>Counter</code> with an initial value of 0 for
|
||
<code>count</code></span></p>
|
||
<p>The <code>Counter</code> struct has one field named <code>count</code>. This field holds a <code>u32</code>
|
||
value that will keep track of where we are in the process of iterating from 1
|
||
to 5. The <code>count</code> field is private because we want the implementation of
|
||
<code>Counter</code> to manage its value. The <code>new</code> function enforces the behavior of
|
||
always starting new instances with a value of 0 in the <code>count</code> field.</p>
|
||
<p>Next, we’ll implement the <code>Iterator</code> trait for our <code>Counter</code> type by defining
|
||
the body of the <code>next</code> method to specify what we want to happen when this
|
||
iterator is used, as shown in Listing 13-21:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Counter {
|
||
</span><span class="boring"> count: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Iterator for Counter {
|
||
type Item = u32;
|
||
|
||
fn next(&mut self) -> Option<Self::Item> {
|
||
self.count += 1;
|
||
|
||
if self.count < 6 {
|
||
Some(self.count)
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-21: Implementing the <code>Iterator</code> trait on our
|
||
<code>Counter</code> struct</span></p>
|
||
<p>We set the associated <code>Item</code> type for our iterator to <code>u32</code>, meaning the
|
||
iterator will return <code>u32</code> values. Again, don’t worry about associated types
|
||
yet, we’ll cover them in Chapter 19.</p>
|
||
<p>We want our iterator to add 1 to the current state, so we initialized <code>count</code>
|
||
to 0 so it would return 1 first. If the value of <code>count</code> is less than 6, <code>next</code>
|
||
will return the current value wrapped in <code>Some</code>, but if <code>count</code> is 6 or higher,
|
||
our iterator will return <code>None</code>.</p>
|
||
<h4><a class="header" href="#using-our-counter-iterators-next-method" id="using-our-counter-iterators-next-method">Using Our <code>Counter</code> Iterator’s <code>next</code> Method</a></h4>
|
||
<p>Once we’ve implemented the <code>Iterator</code> trait, we have an iterator! Listing 13-22
|
||
shows a test demonstrating that we can use the iterator functionality of our
|
||
<code>Counter</code> struct by calling the <code>next</code> method on it directly, just as we did
|
||
with the iterator created from a vector in Listing 13-15.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Counter {
|
||
</span><span class="boring"> count: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Iterator for Counter {
|
||
</span><span class="boring"> type Item = u32;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> fn next(&mut self) -> Option<Self::Item> {
|
||
</span><span class="boring"> self.count += 1;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> if self.count < 6 {
|
||
</span><span class="boring"> Some(self.count)
|
||
</span><span class="boring"> } else {
|
||
</span><span class="boring"> None
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>#[test]
|
||
fn calling_next_directly() {
|
||
let mut counter = Counter::new();
|
||
|
||
assert_eq!(counter.next(), Some(1));
|
||
assert_eq!(counter.next(), Some(2));
|
||
assert_eq!(counter.next(), Some(3));
|
||
assert_eq!(counter.next(), Some(4));
|
||
assert_eq!(counter.next(), Some(5));
|
||
assert_eq!(counter.next(), None);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-22: Testing the functionality of the <code>next</code>
|
||
method implementation</span></p>
|
||
<p>This test creates a new <code>Counter</code> instance in the <code>counter</code> variable and then
|
||
calls <code>next</code> repeatedly, verifying that we have implemented the behavior we
|
||
want this iterator to have: returning the values from 1 to 5.</p>
|
||
<h4><a class="header" href="#using-other-iterator-trait-methods" id="using-other-iterator-trait-methods">Using Other <code>Iterator</code> Trait Methods</a></h4>
|
||
<p>We implemented the <code>Iterator</code> trait by defining the <code>next</code> method, so we
|
||
can now use any <code>Iterator</code> trait method’s default implementations as defined in
|
||
the standard library, because they all use the <code>next</code> method’s functionality.</p>
|
||
<p>For example, if for some reason we wanted to take the values produced by an
|
||
instance of <code>Counter</code>, pair them with values produced by another <code>Counter</code>
|
||
instance after skipping the first value, multiply each pair together, keep only
|
||
those results that are divisible by 3, and add all the resulting values
|
||
together, we could do so, as shown in the test in Listing 13-23:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Counter {
|
||
</span><span class="boring"> count: u32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Counter {
|
||
</span><span class="boring"> fn new() -> Counter {
|
||
</span><span class="boring"> Counter { count: 0 }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Iterator for Counter {
|
||
</span><span class="boring"> // Our iterator will produce u32s
|
||
</span><span class="boring"> type Item = u32;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> fn next(&mut self) -> Option<Self::Item> {
|
||
</span><span class="boring"> // increment our count. This is why we started at zero.
|
||
</span><span class="boring"> self.count += 1;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> // check to see if we've finished counting or not.
|
||
</span><span class="boring"> if self.count < 6 {
|
||
</span><span class="boring"> Some(self.count)
|
||
</span><span class="boring"> } else {
|
||
</span><span class="boring"> None
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>#[test]
|
||
fn using_other_iterator_trait_methods() {
|
||
let sum: u32 = Counter::new().zip(Counter::new().skip(1))
|
||
.map(|(a, b)| a * b)
|
||
.filter(|x| x % 3 == 0)
|
||
.sum();
|
||
assert_eq!(18, sum);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 13-23: Using a variety of <code>Iterator</code> trait
|
||
methods on our <code>Counter</code> iterator</span></p>
|
||
<p>Note that <code>zip</code> produces only four pairs; the theoretical fifth pair <code>(5, None)</code> is never produced because <code>zip</code> returns <code>None</code> when either of its input
|
||
iterators return <code>None</code>.</p>
|
||
<p>All of these method calls are possible because we specified how the <code>next</code>
|
||
method works, and the standard library provides default implementations for
|
||
other methods that call <code>next</code>.</p>
|
||
<h2><a class="header" href="#improving-our-io-project" id="improving-our-io-project">Improving Our I/O Project</a></h2>
|
||
<p>With this new knowledge about iterators, we can improve the I/O project in
|
||
Chapter 12 by using iterators to make places in the code clearer and more
|
||
concise. Let’s look at how iterators can improve our implementation of the
|
||
<code>Config::new</code> function and the <code>search</code> function.</p>
|
||
<h3><a class="header" href="#removing-a-clone-using-an-iterator" id="removing-a-clone-using-an-iterator">Removing a <code>clone</code> Using an Iterator</a></h3>
|
||
<p>In Listing 12-6, we added code that took a slice of <code>String</code> values and created
|
||
an instance of the <code>Config</code> struct by indexing into the slice and cloning the
|
||
values, allowing the <code>Config</code> struct to own those values. In Listing 13-24,
|
||
we’ve reproduced the implementation of the <code>Config::new</code> function as it was in
|
||
Listing 12-23:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Config {
|
||
pub fn new(args: &[String]) -> Result<Config, &'static str> {
|
||
if args.len() < 3 {
|
||
return Err("not enough arguments");
|
||
}
|
||
|
||
let query = args[1].clone();
|
||
let filename = args[2].clone();
|
||
|
||
let case_sensitive = env::var("CASE_INSENSITIVE").is_err();
|
||
|
||
Ok(Config { query, filename, case_sensitive })
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-24: Reproduction of the <code>Config::new</code> function
|
||
from Listing 12-23</span></p>
|
||
<p>At the time, we said not to worry about the inefficient <code>clone</code> calls because
|
||
we would remove them in the future. Well, that time is now!</p>
|
||
<p>We needed <code>clone</code> here because we have a slice with <code>String</code> elements in the
|
||
parameter <code>args</code>, but the <code>new</code> function doesn’t own <code>args</code>. To return
|
||
ownership of a <code>Config</code> instance, we had to clone the values from the <code>query</code>
|
||
and <code>filename</code> fields of <code>Config</code> so the <code>Config</code> instance can own its values.</p>
|
||
<p>With our new knowledge about iterators, we can change the <code>new</code> function to
|
||
take ownership of an iterator as its argument instead of borrowing a slice.
|
||
We’ll use the iterator functionality instead of the code that checks the length
|
||
of the slice and indexes into specific locations. This will clarify what the
|
||
<code>Config::new</code> function is doing because the iterator will access the values.</p>
|
||
<p>Once <code>Config::new</code> takes ownership of the iterator and stops using indexing
|
||
operations that borrow, we can move the <code>String</code> values from the iterator into
|
||
<code>Config</code> rather than calling <code>clone</code> and making a new allocation.</p>
|
||
<h4><a class="header" href="#using-the-returned-iterator-directly" id="using-the-returned-iterator-directly">Using the Returned Iterator Directly</a></h4>
|
||
<p>Open your I/O project’s <em>src/main.rs</em> file, which should look like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let args: Vec<String> = env::args().collect();
|
||
|
||
let config = Config::new(&args).unwrap_or_else(|err| {
|
||
eprintln!("Problem parsing arguments: {}", err);
|
||
process::exit(1);
|
||
});
|
||
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>We’ll change the start of the <code>main</code> function that we had in Listing 12-24 to
|
||
the code in Listing 13-25. This won’t compile until we update <code>Config::new</code> as
|
||
well.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let config = Config::new(env::args()).unwrap_or_else(|err| {
|
||
eprintln!("Problem parsing arguments: {}", err);
|
||
process::exit(1);
|
||
});
|
||
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-25: Passing the return value of <code>env::args</code> to
|
||
<code>Config::new</code></span></p>
|
||
<p>The <code>env::args</code> function returns an iterator! Rather than collecting the
|
||
iterator values into a vector and then passing a slice to <code>Config::new</code>, now
|
||
we’re passing ownership of the iterator returned from <code>env::args</code> to
|
||
<code>Config::new</code> directly.</p>
|
||
<p>Next, we need to update the definition of <code>Config::new</code>. In your I/O project’s
|
||
<em>src/lib.rs</em> file, let’s change the signature of <code>Config::new</code> to look like
|
||
Listing 13-26. This still won’t compile because we need to update the function
|
||
body.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Config {
|
||
pub fn new(mut args: std::env::Args) -> Result<Config, &'static str> {
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-26: Updating the signature of <code>Config::new</code> to
|
||
expect an iterator</span></p>
|
||
<p>The standard library documentation for the <code>env::args</code> function shows that the
|
||
type of the iterator it returns is <code>std::env::Args</code>. We’ve updated the
|
||
signature of the <code>Config::new</code> function so the parameter <code>args</code> has the type
|
||
<code>std::env::Args</code> instead of <code>&[String]</code>. Because we’re taking ownership of
|
||
<code>args</code> and we’ll be mutating <code>args</code> by iterating over it, we can add the <code>mut</code>
|
||
keyword into the specification of the <code>args</code> parameter to make it mutable.</p>
|
||
<h4><a class="header" href="#using-iterator-trait-methods-instead-of-indexing" id="using-iterator-trait-methods-instead-of-indexing">Using <code>Iterator</code> Trait Methods Instead of Indexing</a></h4>
|
||
<p>Next, we’ll fix the body of <code>Config::new</code>. The standard library documentation
|
||
also mentions that <code>std::env::Args</code> implements the <code>Iterator</code> trait, so we know
|
||
we can call the <code>next</code> method on it! Listing 13-27 updates the code from
|
||
Listing 12-23 to use the <code>next</code> method:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span><span class="boring">use std::env;
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct Config {
|
||
</span><span class="boring"> query: String,
|
||
</span><span class="boring"> filename: String,
|
||
</span><span class="boring"> case_sensitive: bool,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Config {
|
||
pub fn new(mut args: std::env::Args) -> Result<Config, &'static str> {
|
||
args.next();
|
||
|
||
let query = match args.next() {
|
||
Some(arg) => arg,
|
||
None => return Err("Didn't get a query string"),
|
||
};
|
||
|
||
let filename = match args.next() {
|
||
Some(arg) => arg,
|
||
None => return Err("Didn't get a file name"),
|
||
};
|
||
|
||
let case_sensitive = env::var("CASE_INSENSITIVE").is_err();
|
||
|
||
Ok(Config { query, filename, case_sensitive })
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 13-27: Changing the body of <code>Config::new</code> to use
|
||
iterator methods</span></p>
|
||
<p>Remember that the first value in the return value of <code>env::args</code> is the name of
|
||
the program. We want to ignore that and get to the next value, so first we call
|
||
<code>next</code> and do nothing with the return value. Second, we call <code>next</code> to get the
|
||
value we want to put in the <code>query</code> field of <code>Config</code>. If <code>next</code> returns a
|
||
<code>Some</code>, we use a <code>match</code> to extract the value. If it returns <code>None</code>, it means
|
||
not enough arguments were given and we return early with an <code>Err</code> value. We do
|
||
the same thing for the <code>filename</code> value.</p>
|
||
<h3><a class="header" href="#making-code-clearer-with-iterator-adaptors" id="making-code-clearer-with-iterator-adaptors">Making Code Clearer with Iterator Adaptors</a></h3>
|
||
<p>We can also take advantage of iterators in the <code>search</code> function in our I/O
|
||
project, which is reproduced here in Listing 13-28 as it was in Listing 12-19:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
let mut results = Vec::new();
|
||
|
||
for line in contents.lines() {
|
||
if line.contains(query) {
|
||
results.push(line);
|
||
}
|
||
}
|
||
|
||
results
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-28: The implementation of the <code>search</code>
|
||
function from Listing 12-19</span></p>
|
||
<p>We can write this code in a more concise way using iterator adaptor methods.
|
||
Doing so also lets us avoid having a mutable intermediate <code>results</code> vector. The
|
||
functional programming style prefers to minimize the amount of mutable state to
|
||
make code clearer. Removing the mutable state might enable a future enhancement
|
||
to make searching happen in parallel, because we wouldn’t have to manage
|
||
concurrent access to the <code>results</code> vector. Listing 13-29 shows this change:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
|
||
contents.lines()
|
||
.filter(|line| line.contains(query))
|
||
.collect()
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 13-29: Using iterator adaptor methods in the
|
||
implementation of the <code>search</code> function</span></p>
|
||
<p>Recall that the purpose of the <code>search</code> function is to return all lines in
|
||
<code>contents</code> that contain the <code>query</code>. Similar to the <code>filter</code> example in Listing
|
||
13-19, this code uses the <code>filter</code> adaptor to keep only the lines that
|
||
<code>line.contains(query)</code> returns <code>true</code> for. We then collect the matching lines
|
||
into another vector with <code>collect</code>. Much simpler! Feel free to make the same
|
||
change to use iterator methods in the <code>search_case_insensitive</code> function as
|
||
well.</p>
|
||
<p>The next logical question is which style you should choose in your own code and
|
||
why: the original implementation in Listing 13-28 or the version using
|
||
iterators in Listing 13-29. Most Rust programmers prefer to use the iterator
|
||
style. It’s a bit tougher to get the hang of at first, but once you get a feel
|
||
for the various iterator adaptors and what they do, iterators can be easier to
|
||
understand. Instead of fiddling with the various bits of looping and building
|
||
new vectors, the code focuses on the high-level objective of the loop. This
|
||
abstracts away some of the commonplace code so it’s easier to see the concepts
|
||
that are unique to this code, such as the filtering condition each element in
|
||
the iterator must pass.</p>
|
||
<p>But are the two implementations truly equivalent? The intuitive assumption
|
||
might be that the more low-level loop will be faster. Let’s talk about
|
||
performance.</p>
|
||
<h2><a class="header" href="#comparing-performance-loops-vs-iterators" id="comparing-performance-loops-vs-iterators">Comparing Performance: Loops vs. Iterators</a></h2>
|
||
<p>To determine whether to use loops or iterators, you need to know which version
|
||
of our <code>search</code> functions is faster: the version with an explicit <code>for</code> loop or
|
||
the version with iterators.</p>
|
||
<p>We ran a benchmark by loading the entire contents of <em>The Adventures of
|
||
Sherlock Holmes</em> by Sir Arthur Conan Doyle into a <code>String</code> and looking for the
|
||
word <em>the</em> in the contents. Here are the results of the benchmark on the
|
||
version of <code>search</code> using the <code>for</code> loop and the version using iterators:</p>
|
||
<pre><code class="language-text">test bench_search_for ... bench: 19,620,300 ns/iter (+/- 915,700)
|
||
test bench_search_iter ... bench: 19,234,900 ns/iter (+/- 657,200)
|
||
</code></pre>
|
||
<p>The iterator version was slightly faster! We won’t explain the benchmark code
|
||
here, because the point is not to prove that the two versions are equivalent
|
||
but to get a general sense of how these two implementations compare
|
||
performance-wise.</p>
|
||
<p>For a more comprehensive benchmark, you should check using various texts of
|
||
various sizes as the <code>contents</code>, different words and words of different lengths
|
||
as the <code>query</code>, and all kinds of other variations. The point is this:
|
||
iterators, although a high-level abstraction, get compiled down to roughly the
|
||
same code as if you’d written the lower-level code yourself. Iterators are one
|
||
of Rust’s <em>zero-cost abstractions</em>, by which we mean using the abstraction
|
||
imposes no additional runtime overhead. This is analogous to how Bjarne
|
||
Stroustrup, the original designer and implementor of C++, defines
|
||
<em>zero-overhead</em> in “Foundations of C++” (2012):</p>
|
||
<blockquote>
|
||
<p>In general, C++ implementations obey the zero-overhead principle: What you
|
||
don’t use, you don’t pay for. And further: What you do use, you couldn’t hand
|
||
code any better.</p>
|
||
</blockquote>
|
||
<p>As another example, the following code is taken from an audio decoder. The
|
||
decoding algorithm uses the linear prediction mathematical operation to
|
||
estimate future values based on a linear function of the previous samples. This
|
||
code uses an iterator chain to do some math on three variables in scope: a
|
||
<code>buffer</code> slice of data, an array of 12 <code>coefficients</code>, and an amount by which
|
||
to shift data in <code>qlp_shift</code>. We’ve declared the variables within this example
|
||
but not given them any values; although this code doesn’t have much meaning
|
||
outside of its context, it’s still a concise, real-world example of how Rust
|
||
translates high-level ideas to low-level code.</p>
|
||
<pre><code class="language-rust ignore">let buffer: &mut [i32];
|
||
let coefficients: [i64; 12];
|
||
let qlp_shift: i16;
|
||
|
||
for i in 12..buffer.len() {
|
||
let prediction = coefficients.iter()
|
||
.zip(&buffer[i - 12..i])
|
||
.map(|(&c, &s)| c * s as i64)
|
||
.sum::<i64>() >> qlp_shift;
|
||
let delta = buffer[i];
|
||
buffer[i] = prediction as i32 + delta;
|
||
}
|
||
</code></pre>
|
||
<p>To calculate the value of <code>prediction</code>, this code iterates through each of the
|
||
12 values in <code>coefficients</code> and uses the <code>zip</code> method to pair the coefficient
|
||
values with the previous 12 values in <code>buffer</code>. Then, for each pair, we
|
||
multiply the values together, sum all the results, and shift the bits in the
|
||
sum <code>qlp_shift</code> bits to the right.</p>
|
||
<p>Calculations in applications like audio decoders often prioritize performance
|
||
most highly. Here, we’re creating an iterator, using two adaptors, and then
|
||
consuming the value. What assembly code would this Rust code compile to? Well,
|
||
as of this writing, it compiles down to the same assembly you’d write by hand.
|
||
There’s no loop at all corresponding to the iteration over the values in
|
||
<code>coefficients</code>: Rust knows that there are 12 iterations, so it “unrolls” the
|
||
loop. <em>Unrolling</em> is an optimization that removes the overhead of the loop
|
||
controlling code and instead generates repetitive code for each iteration of
|
||
the loop.</p>
|
||
<p>All of the coefficients get stored in registers, which means accessing the
|
||
values is very fast. There are no bounds checks on the array access at runtime.
|
||
All these optimizations that Rust is able to apply make the resulting code
|
||
extremely efficient. Now that you know this, you can use iterators and closures
|
||
without fear! They make code seem like it’s higher level but don’t impose a
|
||
runtime performance penalty for doing so.</p>
|
||
<h2><a class="header" href="#summary-12" id="summary-12">Summary</a></h2>
|
||
<p>Closures and iterators are Rust features inspired by functional programming
|
||
language ideas. They contribute to Rust’s capability to clearly express
|
||
high-level ideas at low-level performance. The implementations of closures and
|
||
iterators are such that runtime performance is not affected. This is part of
|
||
Rust’s goal to strive to provide zero-cost abstractions.</p>
|
||
<p>Now that we’ve improved the expressiveness of our I/O project, let’s look at
|
||
some more features of <code>cargo</code> that will help us share the project with the
|
||
world.</p>
|
||
<h1><a class="header" href="#more-about-cargo-and-cratesio" id="more-about-cargo-and-cratesio">More About Cargo and Crates.io</a></h1>
|
||
<p>So far we’ve used only the most basic features of Cargo to build, run, and test
|
||
our code, but it can do a lot more. In this chapter, we’ll discuss some of its
|
||
other, more advanced features to show you how to do the following:</p>
|
||
<ul>
|
||
<li>Customize your build through release profiles</li>
|
||
<li>Publish libraries on <a href="https://crates.io/">crates.io</a><!-- ignore --></li>
|
||
<li>Organize large projects with workspaces</li>
|
||
<li>Install binaries from <a href="https://crates.io/">crates.io</a><!-- ignore --></li>
|
||
<li>Extend Cargo using custom commands</li>
|
||
</ul>
|
||
<p>Cargo can do even more than what we cover in this chapter, so for a full
|
||
explanation of all its features, see <a href="https://doc.rust-lang.org/cargo/">its
|
||
documentation</a>.</p>
|
||
<h2><a class="header" href="#customizing-builds-with-release-profiles" id="customizing-builds-with-release-profiles">Customizing Builds with Release Profiles</a></h2>
|
||
<p>In Rust, <em>release profiles</em> are predefined and customizable profiles with
|
||
different configurations that allow a programmer to have more control over
|
||
various options for compiling code. Each profile is configured independently of
|
||
the others.</p>
|
||
<p>Cargo has two main profiles: the <code>dev</code> profile Cargo uses when you run <code>cargo build</code> and the <code>release</code> profile Cargo uses when you run <code>cargo build --release</code>. The <code>dev</code> profile is defined with good defaults for development,
|
||
and the <code>release</code> profile has good defaults for release builds.</p>
|
||
<p>These profile names might be familiar from the output of your builds:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
$ cargo build --release
|
||
Finished release [optimized] target(s) in 0.0 secs
|
||
</code></pre>
|
||
<p>The <code>dev</code> and <code>release</code> shown in this build output indicate that the compiler
|
||
is using different profiles.</p>
|
||
<p>Cargo has default settings for each of the profiles that apply when there
|
||
aren’t any <code>[profile.*]</code> sections in the project’s <em>Cargo.toml</em> file. By adding
|
||
<code>[profile.*]</code> sections for any profile you want to customize, you can override
|
||
any subset of the default settings. For example, here are the default values
|
||
for the <code>opt-level</code> setting for the <code>dev</code> and <code>release</code> profiles:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[profile.dev]
|
||
opt-level = 0
|
||
|
||
[profile.release]
|
||
opt-level = 3
|
||
</code></pre>
|
||
<p>The <code>opt-level</code> setting controls the number of optimizations Rust will apply to
|
||
your code, with a range of 0 to 3. Applying more optimizations extends
|
||
compiling time, so if you’re in development and compiling your code often,
|
||
you’ll want faster compiling even if the resulting code runs slower. That is
|
||
the reason the default <code>opt-level</code> for <code>dev</code> is <code>0</code>. When you’re ready to
|
||
release your code, it’s best to spend more time compiling. You’ll only compile
|
||
in release mode once, but you’ll run the compiled program many times, so
|
||
release mode trades longer compile time for code that runs faster. That is why
|
||
the default <code>opt-level</code> for the <code>release</code> profile is <code>3</code>.</p>
|
||
<p>You can override any default setting by adding a different value for it in
|
||
<em>Cargo.toml</em>. For example, if we want to use optimization level 1 in the
|
||
development profile, we can add these two lines to our project’s <em>Cargo.toml</em>
|
||
file:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[profile.dev]
|
||
opt-level = 1
|
||
</code></pre>
|
||
<p>This code overrides the default setting of <code>0</code>. Now when we run <code>cargo build</code>,
|
||
Cargo will use the defaults for the <code>dev</code> profile plus our customization to
|
||
<code>opt-level</code>. Because we set <code>opt-level</code> to <code>1</code>, Cargo will apply more
|
||
optimizations than the default, but not as many as in a release build.</p>
|
||
<p>For the full list of configuration options and defaults for each profile, see
|
||
<a href="https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections">Cargo’s documentation</a>.</p>
|
||
<h2><a class="header" href="#publishing-a-crate-to-cratesio" id="publishing-a-crate-to-cratesio">Publishing a Crate to Crates.io</a></h2>
|
||
<p>We’ve used packages from <a href="https://crates.io/">crates.io</a><!-- ignore --> as
|
||
dependencies of our project, but you can also share your code with other people
|
||
by publishing your own packages. The crate registry at
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore --> distributes the source code of
|
||
your packages, so it primarily hosts code that is open source.</p>
|
||
<p>Rust and Cargo have features that help make your published package easier for
|
||
people to use and to find in the first place. We’ll talk about some of these
|
||
features next and then explain how to publish a package.</p>
|
||
<h3><a class="header" href="#making-useful-documentation-comments" id="making-useful-documentation-comments">Making Useful Documentation Comments</a></h3>
|
||
<p>Accurately documenting your packages will help other users know how and when to
|
||
use them, so it’s worth investing the time to write documentation. In Chapter
|
||
3, we discussed how to comment Rust code using two slashes, <code>//</code>. Rust also has
|
||
a particular kind of comment for documentation, known conveniently as a
|
||
<em>documentation comment</em>, that will generate HTML documentation. The HTML
|
||
displays the contents of documentation comments for public API items intended
|
||
for programmers interested in knowing how to <em>use</em> your crate as opposed to how
|
||
your crate is <em>implemented</em>.</p>
|
||
<p>Documentation comments use three slashes, <code>///</code>, instead of two and support
|
||
Markdown notation for formatting the text. Place documentation comments just
|
||
before the item they’re documenting. Listing 14-1 shows documentation comments
|
||
for an <code>add_one</code> function in a crate named <code>my_crate</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">/// Adds one to the number given.
|
||
///
|
||
/// # Examples
|
||
///
|
||
/// ```
|
||
/// let arg = 5;
|
||
/// let answer = my_crate::add_one(arg);
|
||
///
|
||
/// assert_eq!(6, answer);
|
||
/// ```
|
||
pub fn add_one(x: i32) -> i32 {
|
||
x + 1
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-1: A documentation comment for a
|
||
function</span></p>
|
||
<p>Here, we give a description of what the <code>add_one</code> function does, start a
|
||
section with the heading <code>Examples</code>, and then provide code that demonstrates
|
||
how to use the <code>add_one</code> function. We can generate the HTML documentation from
|
||
this documentation comment by running <code>cargo doc</code>. This command runs the
|
||
<code>rustdoc</code> tool distributed with Rust and puts the generated HTML documentation
|
||
in the <em>target/doc</em> directory.</p>
|
||
<p>For convenience, running <code>cargo doc --open</code> will build the HTML for your
|
||
current crate’s documentation (as well as the documentation for all of your
|
||
crate’s dependencies) and open the result in a web browser. Navigate to the
|
||
<code>add_one</code> function and you’ll see how the text in the documentation comments is
|
||
rendered, as shown in Figure 14-1:</p>
|
||
<img alt="Rendered HTML documentation for the `add_one` function of `my_crate`" src="img/trpl14-01.png" class="center" />
|
||
<p><span class="caption">Figure 14-1: HTML documentation for the <code>add_one</code>
|
||
function</span></p>
|
||
<h4><a class="header" href="#commonly-used-sections" id="commonly-used-sections">Commonly Used Sections</a></h4>
|
||
<p>We used the <code># Examples</code> Markdown heading in Listing 14-1 to create a section
|
||
in the HTML with the title “Examples.” Here are some other sections that crate
|
||
authors commonly use in their documentation:</p>
|
||
<ul>
|
||
<li><strong>Panics</strong>: The scenarios in which the function being documented could
|
||
panic. Callers of the function who don’t want their programs to panic should
|
||
make sure they don’t call the function in these situations.</li>
|
||
<li><strong>Errors</strong>: If the function returns a <code>Result</code>, describing the kinds of
|
||
errors that might occur and what conditions might cause those errors to be
|
||
returned can be helpful to callers so they can write code to handle the
|
||
different kinds of errors in different ways.</li>
|
||
<li><strong>Safety</strong>: If the function is <code>unsafe</code> to call (we discuss unsafety in
|
||
Chapter 19), there should be a section explaining why the function is unsafe
|
||
and covering the invariants that the function expects callers to uphold.</li>
|
||
</ul>
|
||
<p>Most documentation comments don’t need all of these sections, but this is a
|
||
good checklist to remind you of the aspects of your code that people calling
|
||
your code will be interested in knowing about.</p>
|
||
<h4><a class="header" href="#documentation-comments-as-tests" id="documentation-comments-as-tests">Documentation Comments as Tests</a></h4>
|
||
<p>Adding example code blocks in your documentation comments can help demonstrate
|
||
how to use your library, and doing so has an additional bonus: running <code>cargo test</code> will run the code examples in your documentation as tests! Nothing is
|
||
better than documentation with examples. But nothing is worse than examples
|
||
that don’t work because the code has changed since the documentation was
|
||
written. If we run <code>cargo test</code> with the documentation for the <code>add_one</code>
|
||
function from Listing 14-1, we will see a section in the test results like this:</p>
|
||
<pre><code class="language-text"> Doc-tests my_crate
|
||
|
||
running 1 test
|
||
test src/lib.rs - add_one (line 5) ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>Now if we change either the function or the example so the <code>assert_eq!</code> in the
|
||
example panics and run <code>cargo test</code> again, we’ll see that the doc tests catch
|
||
that the example and the code are out of sync with each other!</p>
|
||
<h4><a class="header" href="#commenting-contained-items" id="commenting-contained-items">Commenting Contained Items</a></h4>
|
||
<p>Another style of doc comment, <code>//!</code>, adds documentation to the item that
|
||
contains the comments rather than adding documentation to the items following
|
||
the comments. We typically use these doc comments inside the crate root file
|
||
(<em>src/lib.rs</em> by convention) or inside a module to document the crate or the
|
||
module as a whole.</p>
|
||
<p>For example, if we want to add documentation that describes the purpose of the
|
||
<code>my_crate</code> crate that contains the <code>add_one</code> function, we can add documentation
|
||
comments that start with <code>//!</code> to the beginning of the <em>src/lib.rs</em> file, as
|
||
shown in Listing 14-2:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">//! # My Crate
|
||
//!
|
||
//! `my_crate` is a collection of utilities to make performing certain
|
||
//! calculations more convenient.
|
||
|
||
/// Adds one to the number given.
|
||
// --snip--
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-2: Documentation for the <code>my_crate</code> crate as a
|
||
whole</span></p>
|
||
<p>Notice there isn’t any code after the last line that begins with <code>//!</code>. Because
|
||
we started the comments with <code>//!</code> instead of <code>///</code>, we’re documenting the item
|
||
that contains this comment rather than an item that follows this comment. In
|
||
this case, the item that contains this comment is the <em>src/lib.rs</em> file, which
|
||
is the crate root. These comments describe the entire crate.</p>
|
||
<p>When we run <code>cargo doc --open</code>, these comments will display on the front
|
||
page of the documentation for <code>my_crate</code> above the list of public items in the
|
||
crate, as shown in Figure 14-2:</p>
|
||
<img alt="Rendered HTML documentation with a comment for the crate as a whole" src="img/trpl14-02.png" class="center" />
|
||
<p><span class="caption">Figure 14-2: Rendered documentation for <code>my_crate</code>,
|
||
including the comment describing the crate as a whole</span></p>
|
||
<p>Documentation comments within items are useful for describing crates and
|
||
modules especially. Use them to explain the overall purpose of the container to
|
||
help your users understand the crate’s organization.</p>
|
||
<h3><a class="header" href="#exporting-a-convenient-public-api-with-pub-use" id="exporting-a-convenient-public-api-with-pub-use">Exporting a Convenient Public API with <code>pub use</code></a></h3>
|
||
<p>In Chapter 7, we covered how to organize our code into modules using the <code>mod</code>
|
||
keyword, how to make items public using the <code>pub</code> keyword, and how to bring
|
||
items into a scope with the <code>use</code> keyword. However, the structure that makes
|
||
sense to you while you’re developing a crate might not be very convenient for
|
||
your users. You might want to organize your structs in a hierarchy containing
|
||
multiple levels, but then people who want to use a type you’ve defined deep in
|
||
the hierarchy might have trouble finding out that type exists. They might also
|
||
be annoyed at having to enter <code>use</code>
|
||
<code>my_crate::some_module::another_module::UsefulType;</code> rather than <code>use</code>
|
||
<code>my_crate::UsefulType;</code>.</p>
|
||
<p>The structure of your public API is a major consideration when publishing a
|
||
crate. People who use your crate are less familiar with the structure than you
|
||
are and might have difficulty finding the pieces they want to use if your crate
|
||
has a large module hierarchy.</p>
|
||
<p>The good news is that if the structure <em>isn’t</em> convenient for others to use
|
||
from another library, you don’t have to rearrange your internal organization:
|
||
instead, you can re-export items to make a public structure that’s different
|
||
from your private structure by using <code>pub use</code>. Re-exporting takes a public
|
||
item in one location and makes it public in another location, as if it were
|
||
defined in the other location instead.</p>
|
||
<p>For example, say we made a library named <code>art</code> for modeling artistic concepts.
|
||
Within this library are two modules: a <code>kinds</code> module containing two enums
|
||
named <code>PrimaryColor</code> and <code>SecondaryColor</code> and a <code>utils</code> module containing a
|
||
function named <code>mix</code>, as shown in Listing 14-3:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">//! # Art
|
||
//!
|
||
//! A library for modeling artistic concepts.
|
||
|
||
pub mod kinds {
|
||
/// The primary colors according to the RYB color model.
|
||
pub enum PrimaryColor {
|
||
Red,
|
||
Yellow,
|
||
Blue,
|
||
}
|
||
|
||
/// The secondary colors according to the RYB color model.
|
||
pub enum SecondaryColor {
|
||
Orange,
|
||
Green,
|
||
Purple,
|
||
}
|
||
}
|
||
|
||
pub mod utils {
|
||
use crate::kinds::*;
|
||
|
||
/// Combines two primary colors in equal amounts to create
|
||
/// a secondary color.
|
||
pub fn mix(c1: PrimaryColor, c2: PrimaryColor) -> SecondaryColor {
|
||
// --snip--
|
||
<span class="boring"> SecondaryColor::Orange
|
||
</span> }
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 14-3: An <code>art</code> library with items organized into
|
||
<code>kinds</code> and <code>utils</code> modules</span></p>
|
||
<p>Figure 14-3 shows what the front page of the documentation for this crate
|
||
generated by <code>cargo doc</code> would look like:</p>
|
||
<img alt="Rendered documentation for the `art` crate that lists the `kinds` and `utils` modules" src="img/trpl14-03.png" class="center" />
|
||
<p><span class="caption">Figure 14-3: Front page of the documentation for <code>art</code>
|
||
that lists the <code>kinds</code> and <code>utils</code> modules</span></p>
|
||
<p>Note that the <code>PrimaryColor</code> and <code>SecondaryColor</code> types aren’t listed on the
|
||
front page, nor is the <code>mix</code> function. We have to click <code>kinds</code> and <code>utils</code> to
|
||
see them.</p>
|
||
<p>Another crate that depends on this library would need <code>use</code> statements that
|
||
bring the items from <code>art</code> into scope, specifying the module structure that’s
|
||
currently defined. Listing 14-4 shows an example of a crate that uses the
|
||
<code>PrimaryColor</code> and <code>mix</code> items from the <code>art</code> crate:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use art::kinds::PrimaryColor;
|
||
use art::utils::mix;
|
||
|
||
fn main() {
|
||
let red = PrimaryColor::Red;
|
||
let yellow = PrimaryColor::Yellow;
|
||
mix(red, yellow);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-4: A crate using the <code>art</code> crate’s items with
|
||
its internal structure exported</span></p>
|
||
<p>The author of the code in Listing 14-4, which uses the <code>art</code> crate, had to
|
||
figure out that <code>PrimaryColor</code> is in the <code>kinds</code> module and <code>mix</code> is in the
|
||
<code>utils</code> module. The module structure of the <code>art</code> crate is more relevant to
|
||
developers working on the <code>art</code> crate than to developers using the <code>art</code> crate.
|
||
The internal structure that organizes parts of the crate into the <code>kinds</code>
|
||
module and the <code>utils</code> module doesn’t contain any useful information for
|
||
someone trying to understand how to use the <code>art</code> crate. Instead, the <code>art</code>
|
||
crate’s module structure causes confusion because developers have to figure out
|
||
where to look, and the structure is inconvenient because developers must
|
||
specify the module names in the <code>use</code> statements.</p>
|
||
<p>To remove the internal organization from the public API, we can modify the
|
||
<code>art</code> crate code in Listing 14-3 to add <code>pub use</code> statements to re-export the
|
||
items at the top level, as shown in Listing 14-5:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">//! # Art
|
||
//!
|
||
//! A library for modeling artistic concepts.
|
||
|
||
pub use self::kinds::PrimaryColor;
|
||
pub use self::kinds::SecondaryColor;
|
||
pub use self::utils::mix;
|
||
|
||
pub mod kinds {
|
||
// --snip--
|
||
}
|
||
|
||
pub mod utils {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-5: Adding <code>pub use</code> statements to re-export
|
||
items</span></p>
|
||
<p>The API documentation that <code>cargo doc</code> generates for this crate will now list
|
||
and link re-exports on the front page, as shown in Figure 14-4, making the
|
||
<code>PrimaryColor</code> and <code>SecondaryColor</code> types and the <code>mix</code> function easier to find.</p>
|
||
<img alt="Rendered documentation for the `art` crate with the re-exports on the front page" src="img/trpl14-04.png" class="center" />
|
||
<p><span class="caption">Figure 14-4: The front page of the documentation for <code>art</code>
|
||
that lists the re-exports</span></p>
|
||
<p>The <code>art</code> crate users can still see and use the internal structure from Listing
|
||
14-3 as demonstrated in Listing 14-4, or they can use the more convenient
|
||
structure in Listing 14-5, as shown in Listing 14-6:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use art::PrimaryColor;
|
||
use art::mix;
|
||
|
||
fn main() {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-6: A program using the re-exported items from
|
||
the <code>art</code> crate</span></p>
|
||
<p>In cases where there are many nested modules, re-exporting the types at the top
|
||
level with <code>pub use</code> can make a significant difference in the experience of
|
||
people who use the crate.</p>
|
||
<p>Creating a useful public API structure is more of an art than a science, and
|
||
you can iterate to find the API that works best for your users. Choosing <code>pub use</code> gives you flexibility in how you structure your crate internally and
|
||
decouples that internal structure from what you present to your users. Look at
|
||
some of the code of crates you’ve installed to see if their internal structure
|
||
differs from their public API.</p>
|
||
<h3><a class="header" href="#setting-up-a-cratesio-account" id="setting-up-a-cratesio-account">Setting Up a Crates.io Account</a></h3>
|
||
<p>Before you can publish any crates, you need to create an account on
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore --> and get an API token. To do so,
|
||
visit the home page at <a href="https://crates.io/">crates.io</a><!-- ignore --> and log in
|
||
via a GitHub account. (The GitHub account is currently a requirement, but the
|
||
site might support other ways of creating an account in the future.) Once
|
||
you’re logged in, visit your account settings at
|
||
<a href="https://crates.io/me/">https://crates.io/me/</a><!-- ignore --> and retrieve your
|
||
API key. Then run the <code>cargo login</code> command with your API key, like this:</p>
|
||
<pre><code class="language-text">$ cargo login abcdefghijklmnopqrstuvwxyz012345
|
||
</code></pre>
|
||
<p>This command will inform Cargo of your API token and store it locally in
|
||
<em>~/.cargo/credentials</em>. Note that this token is a <em>secret</em>: do not share it
|
||
with anyone else. If you do share it with anyone for any reason, you should
|
||
revoke it and generate a new token on <a href="https://crates.io/">crates.io</a><!-- ignore
|
||
-->.</p>
|
||
<h3><a class="header" href="#adding-metadata-to-a-new-crate" id="adding-metadata-to-a-new-crate">Adding Metadata to a New Crate</a></h3>
|
||
<p>Now that you have an account, let’s say you have a crate you want to publish.
|
||
Before publishing, you’ll need to add some metadata to your crate by adding it
|
||
to the <code>[package]</code> section of the crate’s <em>Cargo.toml</em> file.</p>
|
||
<p>Your crate will need a unique name. While you’re working on a crate locally,
|
||
you can name a crate whatever you’d like. However, crate names on
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore --> are allocated on a first-come,
|
||
first-served basis. Once a crate name is taken, no one else can publish a crate
|
||
with that name. Before attempting to publish a crate, search for the name you
|
||
want to use on the site. If the name has been used by another crate, you will
|
||
need to find another name and edit the <code>name</code> field in the <em>Cargo.toml</em> file
|
||
under the <code>[package]</code> section to use the new name for publishing, like so:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[package]
|
||
name = "guessing_game"
|
||
</code></pre>
|
||
<p>Even if you’ve chosen a unique name, when you run <code>cargo publish</code> to publish
|
||
the crate at this point, you’ll get a warning and then an error:</p>
|
||
<pre><code class="language-text">$ cargo publish
|
||
Updating registry `https://github.com/rust-lang/crates.io-index`
|
||
warning: manifest has no description, license, license-file, documentation,
|
||
homepage or repository.
|
||
--snip--
|
||
error: api errors: missing or empty metadata fields: description, license.
|
||
</code></pre>
|
||
<p>The reason is that you’re missing some crucial information: a description and
|
||
license are required so people will know what your crate does and under what
|
||
terms they can use it. To rectify this error, you need to include this
|
||
information in the <em>Cargo.toml</em> file.</p>
|
||
<p>Add a description that is just a sentence or two, because it will appear with
|
||
your crate in search results. For the <code>license</code> field, you need to give a
|
||
<em>license identifier value</em>. The <a href="http://spdx.org/licenses/">Linux Foundation’s Software Package Data
|
||
Exchange (SPDX)</a> lists the identifiers you can use for this value. For
|
||
example, to specify that you’ve licensed your crate using the MIT License, add
|
||
the <code>MIT</code> identifier:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[package]
|
||
name = "guessing_game"
|
||
license = "MIT"
|
||
</code></pre>
|
||
<p>If you want to use a license that doesn’t appear in the SPDX, you need to place
|
||
the text of that license in a file, include the file in your project, and then
|
||
use <code>license-file</code> to specify the name of that file instead of using the
|
||
<code>license</code> key.</p>
|
||
<p>Guidance on which license is appropriate for your project is beyond the scope
|
||
of this book. Many people in the Rust community license their projects in the
|
||
same way as Rust by using a dual license of <code>MIT OR Apache-2.0</code>. This practice
|
||
demonstrates that you can also specify multiple license identifiers separated
|
||
by <code>OR</code> to have multiple licenses for your project.</p>
|
||
<p>With a unique name, the version, the author details that <code>cargo new</code> added
|
||
when you created the crate, your description, and a license added, the
|
||
<em>Cargo.toml</em> file for a project that is ready to publish might look like this:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[package]
|
||
name = "guessing_game"
|
||
version = "0.1.0"
|
||
authors = ["Your Name <you@example.com>"]
|
||
edition = "2018"
|
||
description = "A fun game where you guess what number the computer has chosen."
|
||
license = "MIT OR Apache-2.0"
|
||
|
||
[dependencies]
|
||
</code></pre>
|
||
<p><a href="https://doc.rust-lang.org/cargo/">Cargo’s documentation</a> describes other
|
||
metadata you can specify to ensure others can discover and use your crate more
|
||
easily.</p>
|
||
<h3><a class="header" href="#publishing-to-cratesio" id="publishing-to-cratesio">Publishing to Crates.io</a></h3>
|
||
<p>Now that you’ve created an account, saved your API token, chosen a name for
|
||
your crate, and specified the required metadata, you’re ready to publish!
|
||
Publishing a crate uploads a specific version to
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore --> for others to use.</p>
|
||
<p>Be careful when publishing a crate because a publish is <em>permanent</em>. The
|
||
version can never be overwritten, and the code cannot be deleted. One major
|
||
goal of <a href="https://crates.io/">crates.io</a><!-- ignore --> is to act as a permanent
|
||
archive of code so that builds of all projects that depend on crates from
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore --> will continue to work. Allowing
|
||
version deletions would make fulfilling that goal impossible. However, there is
|
||
no limit to the number of crate versions you can publish.</p>
|
||
<p>Run the <code>cargo publish</code> command again. It should succeed now:</p>
|
||
<pre><code class="language-text">$ cargo publish
|
||
Updating registry `https://github.com/rust-lang/crates.io-index`
|
||
Packaging guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Verifying guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
Compiling guessing_game v0.1.0
|
||
(file:///projects/guessing_game/target/package/guessing_game-0.1.0)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.19 secs
|
||
Uploading guessing_game v0.1.0 (file:///projects/guessing_game)
|
||
</code></pre>
|
||
<p>Congratulations! You’ve now shared your code with the Rust community, and
|
||
anyone can easily add your crate as a dependency of their project.</p>
|
||
<h3><a class="header" href="#publishing-a-new-version-of-an-existing-crate" id="publishing-a-new-version-of-an-existing-crate">Publishing a New Version of an Existing Crate</a></h3>
|
||
<p>When you’ve made changes to your crate and are ready to release a new version,
|
||
you change the <code>version</code> value specified in your <em>Cargo.toml</em> file and
|
||
republish. Use the <a href="http://semver.org/">Semantic Versioning rules</a> to decide what an
|
||
appropriate next version number is based on the kinds of changes you’ve made.
|
||
Then run <code>cargo publish</code> to upload the new version.</p>
|
||
<h3><a class="header" href="#removing-versions-from-cratesio-with-cargo-yank" id="removing-versions-from-cratesio-with-cargo-yank">Removing Versions from Crates.io with <code>cargo yank</code></a></h3>
|
||
<p>Although you can’t remove previous versions of a crate, you can prevent any
|
||
future projects from adding them as a new dependency. This is useful when a
|
||
crate version is broken for one reason or another. In such situations, Cargo
|
||
supports <em>yanking</em> a crate version.</p>
|
||
<p>Yanking a version prevents new projects from starting to depend on that version
|
||
while allowing all existing projects that depend on it to continue to download
|
||
and depend on that version. Essentially, a yank means that all projects with a
|
||
<em>Cargo.lock</em> will not break, and any future <em>Cargo.lock</em> files generated will
|
||
not use the yanked version.</p>
|
||
<p>To yank a version of a crate, run <code>cargo yank</code> and specify which version you
|
||
want to yank:</p>
|
||
<pre><code class="language-text">$ cargo yank --vers 1.0.1
|
||
</code></pre>
|
||
<p>By adding <code>--undo</code> to the command, you can also undo a yank and allow projects
|
||
to start depending on a version again:</p>
|
||
<pre><code class="language-text">$ cargo yank --vers 1.0.1 --undo
|
||
</code></pre>
|
||
<p>A yank <em>does not</em> delete any code. For example, the yank feature is not
|
||
intended for deleting accidentally uploaded secrets. If that happens, you must
|
||
reset those secrets immediately.</p>
|
||
<h2><a class="header" href="#cargo-workspaces" id="cargo-workspaces">Cargo Workspaces</a></h2>
|
||
<p>In Chapter 12, we built a package that included a binary crate and a library
|
||
crate. As your project develops, you might find that the library crate
|
||
continues to get bigger and you want to split up your package further into
|
||
multiple library crates. In this situation, Cargo offers a feature called
|
||
<em>workspaces</em> that can help manage multiple related packages that are developed
|
||
in tandem.</p>
|
||
<h3><a class="header" href="#creating-a-workspace" id="creating-a-workspace">Creating a Workspace</a></h3>
|
||
<p>A <em>workspace</em> is a set of packages that share the same <em>Cargo.lock</em> and output
|
||
directory. Let’s make a project using a workspace—we’ll use trivial code so we
|
||
can concentrate on the structure of the workspace. There are multiple ways to
|
||
structure a workspace; we’re going to show one common way. We’ll have a
|
||
workspace containing a binary and two libraries. The binary, which will provide
|
||
the main functionality, will depend on the two libraries. One library will
|
||
provide an <code>add_one</code> function, and a second library an <code>add_two</code> function.
|
||
These three crates will be part of the same workspace. We’ll start by creating
|
||
a new directory for the workspace:</p>
|
||
<pre><code class="language-text">$ mkdir add
|
||
$ cd add
|
||
</code></pre>
|
||
<p>Next, in the <em>add</em> directory, we create the <em>Cargo.toml</em> file that will
|
||
configure the entire workspace. This file won’t have a <code>[package]</code> section or
|
||
the metadata we’ve seen in other <em>Cargo.toml</em> files. Instead, it will start
|
||
with a <code>[workspace]</code> section that will allow us to add members to the workspace
|
||
by specifying the path to our binary crate; in this case, that path is <em>adder</em>:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[workspace]
|
||
|
||
members = [
|
||
"adder",
|
||
]
|
||
</code></pre>
|
||
<p>Next, we’ll create the <code>adder</code> binary crate by running <code>cargo new</code> within the
|
||
<em>add</em> directory:</p>
|
||
<pre><code class="language-text">$ cargo new adder
|
||
Created binary (application) `adder` project
|
||
</code></pre>
|
||
<p>At this point, we can build the workspace by running <code>cargo build</code>. The files
|
||
in your <em>add</em> directory should look like this:</p>
|
||
<pre><code class="language-text">├── Cargo.lock
|
||
├── Cargo.toml
|
||
├── adder
|
||
│ ├── Cargo.toml
|
||
│ └── src
|
||
│ └── main.rs
|
||
└── target
|
||
</code></pre>
|
||
<p>The workspace has one <em>target</em> directory at the top level for the compiled
|
||
artifacts to be placed into; the <code>adder</code> crate doesn’t have its own <em>target</em>
|
||
directory. Even if we were to run <code>cargo build</code> from inside the <em>adder</em>
|
||
directory, the compiled artifacts would still end up in <em>add/target</em> rather
|
||
than <em>add/adder/target</em>. Cargo structures the <em>target</em> directory in a workspace
|
||
like this because the crates in a workspace are meant to depend on each other.
|
||
If each crate had its own <em>target</em> directory, each crate would have to
|
||
recompile each of the other crates in the workspace to have the artifacts in
|
||
its own <em>target</em> directory. By sharing one <em>target</em> directory, the crates can
|
||
avoid unnecessary rebuilding.</p>
|
||
<h3><a class="header" href="#creating-the-second-crate-in-the-workspace" id="creating-the-second-crate-in-the-workspace">Creating the Second Crate in the Workspace</a></h3>
|
||
<p>Next, let’s create another member crate in the workspace and call it <code>add-one</code>.
|
||
Change the top-level <em>Cargo.toml</em> to specify the <em>add-one</em> path in the
|
||
<code>members</code> list:</p>
|
||
<p><span class="filename">Filename: Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[workspace]
|
||
|
||
members = [
|
||
"adder",
|
||
"add-one",
|
||
]
|
||
</code></pre>
|
||
<p>Then generate a new library crate named <code>add-one</code>:</p>
|
||
<pre><code class="language-text">$ cargo new add-one --lib
|
||
Created library `add-one` project
|
||
</code></pre>
|
||
<p>Your <em>add</em> directory should now have these directories and files:</p>
|
||
<pre><code class="language-text">├── Cargo.lock
|
||
├── Cargo.toml
|
||
├── add-one
|
||
│ ├── Cargo.toml
|
||
│ └── src
|
||
│ └── lib.rs
|
||
├── adder
|
||
│ ├── Cargo.toml
|
||
│ └── src
|
||
│ └── main.rs
|
||
└── target
|
||
</code></pre>
|
||
<p>In the <em>add-one/src/lib.rs</em> file, let’s add an <code>add_one</code> function:</p>
|
||
<p><span class="filename">Filename: add-one/src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn add_one(x: i32) -> i32 {
|
||
x + 1
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Now that we have a library crate in the workspace, we can have the binary crate
|
||
<code>adder</code> depend on the library crate <code>add-one</code>. First, we’ll need to add a path
|
||
dependency on <code>add-one</code> to <em>adder/Cargo.toml</em>.</p>
|
||
<p><span class="filename">Filename: adder/Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
|
||
add-one = { path = "../add-one" }
|
||
</code></pre>
|
||
<p>Cargo doesn’t assume that crates in a workspace will depend on each other, so
|
||
we need to be explicit about the dependency relationships between the crates.</p>
|
||
<p>Next, let’s use the <code>add_one</code> function from the <code>add-one</code> crate in the <code>adder</code>
|
||
crate. Open the <em>adder/src/main.rs</em> file and add a <code>use</code> line at the top to
|
||
bring the new <code>add-one</code> library crate into scope. Then change the <code>main</code>
|
||
function to call the <code>add_one</code> function, as in Listing 14-7.</p>
|
||
<p><span class="filename">Filename: adder/src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use add_one;
|
||
|
||
fn main() {
|
||
let num = 10;
|
||
println!("Hello, world! {} plus one is {}!", num, add_one::add_one(num));
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 14-7: Using the <code>add-one</code> library crate from the
|
||
<code>adder</code> crate</span></p>
|
||
<p>Let’s build the workspace by running <code>cargo build</code> in the top-level <em>add</em>
|
||
directory!</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling add-one v0.1.0 (file:///projects/add/add-one)
|
||
Compiling adder v0.1.0 (file:///projects/add/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.68 secs
|
||
</code></pre>
|
||
<p>To run the binary crate from the <em>add</em> directory, we need to specify which
|
||
package in the workspace we want to use by using the <code>-p</code> argument and the
|
||
package name with <code>cargo run</code>:</p>
|
||
<pre><code class="language-text">$ cargo run -p adder
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running `target/debug/adder`
|
||
Hello, world! 10 plus one is 11!
|
||
</code></pre>
|
||
<p>This runs the code in <em>adder/src/main.rs</em>, which depends on the <code>add-one</code> crate.</p>
|
||
<h4><a class="header" href="#depending-on-an-external-crate-in-a-workspace" id="depending-on-an-external-crate-in-a-workspace">Depending on an External Crate in a Workspace</a></h4>
|
||
<p>Notice that the workspace has only one <em>Cargo.lock</em> file at the top level of
|
||
the workspace rather than having a <em>Cargo.lock</em> in each crate’s directory. This
|
||
ensures that all crates are using the same version of all dependencies. If we
|
||
add the <code>rand</code> crate to the <em>adder/Cargo.toml</em> and <em>add-one/Cargo.toml</em>
|
||
files, Cargo will resolve both of those to one version of <code>rand</code> and record
|
||
that in the one <em>Cargo.lock</em>. Making all crates in the workspace use the same
|
||
dependencies means the crates in the workspace will always be compatible with
|
||
each other. Let’s add the <code>rand</code> crate to the <code>[dependencies]</code> section in the
|
||
<em>add-one/Cargo.toml</em> file to be able to use the <code>rand</code> crate in the <code>add-one</code>
|
||
crate:</p>
|
||
<!-- When updating the version of `rand` used, also update the version of
|
||
`rand` used in these files so they all match:
|
||
* ch02-00-guessing-game-tutorial.md
|
||
* ch07-04-bringing-paths-into-scope-with-the-use-keyword.md
|
||
-->
|
||
<p><span class="filename">Filename: add-one/Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
rand = "0.5.5"
|
||
</code></pre>
|
||
<p>We can now add <code>use rand;</code> to the <em>add-one/src/lib.rs</em> file, and building the
|
||
whole workspace by running <code>cargo build</code> in the <em>add</em> directory will bring in
|
||
and compile the <code>rand</code> crate:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Updating crates.io index
|
||
Downloaded rand v0.5.5
|
||
--snip--
|
||
Compiling rand v0.5.5
|
||
Compiling add-one v0.1.0 (file:///projects/add/add-one)
|
||
Compiling adder v0.1.0 (file:///projects/add/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 10.18 secs
|
||
</code></pre>
|
||
<p>The top-level <em>Cargo.lock</em> now contains information about the dependency of
|
||
<code>add-one</code> on <code>rand</code>. However, even though <code>rand</code> is used somewhere in the
|
||
workspace, we can’t use it in other crates in the workspace unless we add
|
||
<code>rand</code> to their <em>Cargo.toml</em> files as well. For example, if we add <code>use rand;</code>
|
||
to the <em>adder/src/main.rs</em> file for the <code>adder</code> crate, we’ll get an error:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling adder v0.1.0 (file:///projects/add/adder)
|
||
error: use of unstable library feature 'rand': use `rand` from crates.io (see
|
||
issue #27703)
|
||
--> adder/src/main.rs:1:1
|
||
|
|
||
1 | use rand;
|
||
</code></pre>
|
||
<p>To fix this, edit the <em>Cargo.toml</em> file for the <code>adder</code> crate and indicate that
|
||
<code>rand</code> is a dependency for that crate as well. Building the <code>adder</code> crate will
|
||
add <code>rand</code> to the list of dependencies for <code>adder</code> in <em>Cargo.lock</em>, but no
|
||
additional copies of <code>rand</code> will be downloaded. Cargo has ensured that every
|
||
crate in the workspace using the <code>rand</code> crate will be using the same version.
|
||
Using the same version of <code>rand</code> across the workspace saves space because we
|
||
won’t have multiple copies and ensures that the crates in the workspace will be
|
||
compatible with each other.</p>
|
||
<h4><a class="header" href="#adding-a-test-to-a-workspace" id="adding-a-test-to-a-workspace">Adding a Test to a Workspace</a></h4>
|
||
<p>For another enhancement, let’s add a test of the <code>add_one::add_one</code> function
|
||
within the <code>add_one</code> crate:</p>
|
||
<p><span class="filename">Filename: add-one/src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub fn add_one(x: i32) -> i32 {
|
||
x + 1
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn it_works() {
|
||
assert_eq!(3, add_one(2));
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Now run <code>cargo test</code> in the top-level <em>add</em> directory:</p>
|
||
<pre><code class="language-text">$ cargo test
|
||
Compiling add-one v0.1.0 (file:///projects/add/add-one)
|
||
Compiling adder v0.1.0 (file:///projects/add/adder)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.27 secs
|
||
Running target/debug/deps/add_one-f0253159197f7841
|
||
|
||
running 1 test
|
||
test tests::it_works ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Running target/debug/deps/adder-f88af9d2cc175a5e
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Doc-tests add-one
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>The first section of the output shows that the <code>it_works</code> test in the <code>add-one</code>
|
||
crate passed. The next section shows that zero tests were found in the <code>adder</code>
|
||
crate, and then the last section shows zero documentation tests were found in
|
||
the <code>add-one</code> crate. Running <code>cargo test</code> in a workspace structured like this
|
||
one will run the tests for all the crates in the workspace.</p>
|
||
<p>We can also run tests for one particular crate in a workspace from the
|
||
top-level directory by using the <code>-p</code> flag and specifying the name of the crate
|
||
we want to test:</p>
|
||
<pre><code class="language-text">$ cargo test -p add-one
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
|
||
Running target/debug/deps/add_one-b3235fea9a156f74
|
||
|
||
running 1 test
|
||
test tests::it_works ... ok
|
||
|
||
test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
|
||
Doc-tests add-one
|
||
|
||
running 0 tests
|
||
|
||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
|
||
</code></pre>
|
||
<p>This output shows <code>cargo test</code> only ran the tests for the <code>add-one</code> crate and
|
||
didn’t run the <code>adder</code> crate tests.</p>
|
||
<p>If you publish the crates in the workspace to <a href="https://crates.io/">crates.io</a>,
|
||
each crate in the workspace will need to be published separately. The <code>cargo publish</code> command does not have an <code>--all</code> flag or a <code>-p</code> flag, so you must
|
||
change to each crate’s directory and run <code>cargo publish</code> on each crate in the
|
||
workspace to publish the crates.</p>
|
||
<p>For additional practice, add an <code>add-two</code> crate to this workspace in a similar
|
||
way as the <code>add-one</code> crate!</p>
|
||
<p>As your project grows, consider using a workspace: it’s easier to understand
|
||
smaller, individual components than one big blob of code. Furthermore, keeping
|
||
the crates in a workspace can make coordination between them easier if they are
|
||
often changed at the same time.</p>
|
||
<h2><a class="header" href="#installing-binaries-from-cratesio-with-cargo-install" id="installing-binaries-from-cratesio-with-cargo-install">Installing Binaries from Crates.io with <code>cargo install</code></a></h2>
|
||
<p>The <code>cargo install</code> command allows you to install and use binary crates
|
||
locally. This isn’t intended to replace system packages; it’s meant to be a
|
||
convenient way for Rust developers to install tools that others have shared on
|
||
<a href="https://crates.io/">crates.io</a><!-- ignore -->. Note that you can only install
|
||
packages that have binary targets. A <em>binary target</em> is the runnable program
|
||
that is created if the crate has a <em>src/main.rs</em> file or another file specified
|
||
as a binary, as opposed to a library target that isn’t runnable on its own but
|
||
is suitable for including within other programs. Usually, crates have
|
||
information in the <em>README</em> file about whether a crate is a library, has a
|
||
binary target, or both.</p>
|
||
<p>All binaries installed with <code>cargo install</code> are stored in the installation
|
||
root’s <em>bin</em> folder. If you installed Rust using <em>rustup.rs</em> and don’t have any
|
||
custom configurations, this directory will be <em>$HOME/.cargo/bin</em>. Ensure that
|
||
directory is in your <code>$PATH</code> to be able to run programs you’ve installed with
|
||
<code>cargo install</code>.</p>
|
||
<p>For example, in Chapter 12 we mentioned that there’s a Rust implementation of
|
||
the <code>grep</code> tool called <code>ripgrep</code> for searching files. If we want to install
|
||
<code>ripgrep</code>, we can run the following:</p>
|
||
<pre><code class="language-text">$ cargo install ripgrep
|
||
Updating registry `https://github.com/rust-lang/crates.io-index`
|
||
Downloading ripgrep v0.3.2
|
||
--snip--
|
||
Compiling ripgrep v0.3.2
|
||
Finished release [optimized + debuginfo] target(s) in 97.91 secs
|
||
Installing ~/.cargo/bin/rg
|
||
</code></pre>
|
||
<p>The last line of the output shows the location and the name of the installed
|
||
binary, which in the case of <code>ripgrep</code> is <code>rg</code>. As long as the installation
|
||
directory is in your <code>$PATH</code>, as mentioned previously, you can then run <code>rg --help</code> and start using a faster, rustier tool for searching files!</p>
|
||
<h2><a class="header" href="#extending-cargo-with-custom-commands" id="extending-cargo-with-custom-commands">Extending Cargo with Custom Commands</a></h2>
|
||
<p>Cargo is designed so you can extend it with new subcommands without having to
|
||
modify Cargo. If a binary in your <code>$PATH</code> is named <code>cargo-something</code>, you can
|
||
run it as if it was a Cargo subcommand by running <code>cargo something</code>. Custom
|
||
commands like this are also listed when you run <code>cargo --list</code>. Being able to
|
||
use <code>cargo install</code> to install extensions and then run them just like the
|
||
built-in Cargo tools is a super convenient benefit of Cargo’s design!</p>
|
||
<h2><a class="header" href="#summary-13" id="summary-13">Summary</a></h2>
|
||
<p>Sharing code with Cargo and <a href="https://crates.io/">crates.io</a><!-- ignore --> is
|
||
part of what makes the Rust ecosystem useful for many different tasks. Rust’s
|
||
standard library is small and stable, but crates are easy to share, use, and
|
||
improve on a timeline different from that of the language. Don’t be shy about
|
||
sharing code that’s useful to you on <a href="https://crates.io/">crates.io</a><!-- ignore
|
||
-->; it’s likely that it will be useful to someone else as well!</p>
|
||
<h1><a class="header" href="#smart-pointers" id="smart-pointers">Smart Pointers</a></h1>
|
||
<p>A <em>pointer</em> is a general concept for a variable that contains an address in
|
||
memory. This address refers to, or “points at,” some other data. The most
|
||
common kind of pointer in Rust is a reference, which you learned about in
|
||
Chapter 4. References are indicated by the <code>&</code> symbol and borrow the value they
|
||
point to. They don’t have any special capabilities other than referring to
|
||
data. Also, they don’t have any overhead and are the kind of pointer we use
|
||
most often.</p>
|
||
<p><em>Smart pointers</em>, on the other hand, are data structures that not only act like
|
||
a pointer but also have additional metadata and capabilities. The concept of
|
||
smart pointers isn’t unique to Rust: smart pointers originated in C++ and exist
|
||
in other languages as well. In Rust, the different smart pointers defined in
|
||
the standard library provide functionality beyond that provided by references.
|
||
One example that we’ll explore in this chapter is the <em>reference counting</em>
|
||
smart pointer type. This pointer enables you to have multiple owners of data by
|
||
keeping track of the number of owners and, when no owners remain, cleaning up
|
||
the data.</p>
|
||
<p>In Rust, which uses the concept of ownership and borrowing, an additional
|
||
difference between references and smart pointers is that references are
|
||
pointers that only borrow data; in contrast, in many cases, smart pointers
|
||
<em>own</em> the data they point to.</p>
|
||
<p>We’ve already encountered a few smart pointers in this book, such as <code>String</code>
|
||
and <code>Vec<T></code> in Chapter 8, although we didn’t call them smart pointers at the
|
||
time. Both these types count as smart pointers because they own some memory and
|
||
allow you to manipulate it. They also have metadata (such as their capacity)
|
||
and extra capabilities or guarantees (such as with <code>String</code> ensuring its data
|
||
will always be valid UTF-8).</p>
|
||
<p>Smart pointers are usually implemented using structs. The characteristic that
|
||
distinguishes a smart pointer from an ordinary struct is that smart pointers
|
||
implement the <code>Deref</code> and <code>Drop</code> traits. The <code>Deref</code> trait allows an instance
|
||
of the smart pointer struct to behave like a reference so you can write code
|
||
that works with either references or smart pointers. The <code>Drop</code> trait allows
|
||
you to customize the code that is run when an instance of the smart pointer
|
||
goes out of scope. In this chapter, we’ll discuss both traits and demonstrate
|
||
why they’re important to smart pointers.</p>
|
||
<p>Given that the smart pointer pattern is a general design pattern used
|
||
frequently in Rust, this chapter won’t cover every existing smart pointer. Many
|
||
libraries have their own smart pointers, and you can even write your own. We’ll
|
||
cover the most common smart pointers in the standard library:</p>
|
||
<ul>
|
||
<li><code>Box<T></code> for allocating values on the heap</li>
|
||
<li><code>Rc<T></code>, a reference counting type that enables multiple ownership</li>
|
||
<li><code>Ref<T></code> and <code>RefMut<T></code>, accessed through <code>RefCell<T></code>, a type that enforces
|
||
the borrowing rules at runtime instead of compile time</li>
|
||
</ul>
|
||
<p>In addition, we’ll cover the <em>interior mutability</em> pattern where an immutable
|
||
type exposes an API for mutating an interior value. We’ll also discuss
|
||
<em>reference cycles</em>: how they can leak memory and how to prevent them.</p>
|
||
<p>Let’s dive in!</p>
|
||
<h2><a class="header" href="#using-boxt-to-point-to-data-on-the-heap" id="using-boxt-to-point-to-data-on-the-heap">Using <code>Box<T></code> to Point to Data on the Heap</a></h2>
|
||
<p>The most straightforward smart pointer is a <em>box</em>, whose type is written
|
||
<code>Box<T></code>. Boxes allow you to store data on the heap rather than the stack. What
|
||
remains on the stack is the pointer to the heap data. Refer to Chapter 4 to
|
||
review the difference between the stack and the heap.</p>
|
||
<p>Boxes don’t have performance overhead, other than storing their data on the
|
||
heap instead of on the stack. But they don’t have many extra capabilities
|
||
either. You’ll use them most often in these situations:</p>
|
||
<ul>
|
||
<li>When you have a type whose size can’t be known at compile time and you want
|
||
to use a value of that type in a context that requires an exact size</li>
|
||
<li>When you have a large amount of data and you want to transfer ownership but
|
||
ensure the data won’t be copied when you do so</li>
|
||
<li>When you want to own a value and you care only that it’s a type that
|
||
implements a particular trait rather than being of a specific type</li>
|
||
</ul>
|
||
<p>We’ll demonstrate the first situation in the <a href="ch15-01-box.html#enabling-recursive-types-with-boxes">“Enabling Recursive Types with
|
||
Boxes”</a><!-- ignore --> section. In the
|
||
second case, transferring ownership of a large amount of data can take a long
|
||
time because the data is copied around on the stack. To improve performance in
|
||
this situation, we can store the large amount of data on the heap in a box.
|
||
Then, only the small amount of pointer data is copied around on the stack,
|
||
while the data it references stays in one place on the heap. The third case is
|
||
known as a <em>trait object</em>, and Chapter 17 devotes an entire section, <a href="ch17-02-trait-objects.html#using-trait-objects-that-allow-for-values-of-different-types">“Using
|
||
Trait Objects That Allow for Values of Different Types,”</a><!--
|
||
ignore --> just to that topic. So what you learn here you’ll apply again in
|
||
Chapter 17!</p>
|
||
<h3><a class="header" href="#using-a-boxt-to-store-data-on-the-heap" id="using-a-boxt-to-store-data-on-the-heap">Using a <code>Box<T></code> to Store Data on the Heap</a></h3>
|
||
<p>Before we discuss this use case for <code>Box<T></code>, we’ll cover the syntax and how to
|
||
interact with values stored within a <code>Box<T></code>.</p>
|
||
<p>Listing 15-1 shows how to use a box to store an <code>i32</code> value on the heap:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let b = Box::new(5);
|
||
println!("b = {}", b);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-1: Storing an <code>i32</code> value on the heap using a
|
||
box</span></p>
|
||
<p>We define the variable <code>b</code> to have the value of a <code>Box</code> that points to the
|
||
value <code>5</code>, which is allocated on the heap. This program will print <code>b = 5</code>; in
|
||
this case, we can access the data in the box similar to how we would if this
|
||
data were on the stack. Just like any owned value, when a box goes out of
|
||
scope, as <code>b</code> does at the end of <code>main</code>, it will be deallocated. The
|
||
deallocation happens for the box (stored on the stack) and the data it points
|
||
to (stored on the heap).</p>
|
||
<p>Putting a single value on the heap isn’t very useful, so you won’t use boxes by
|
||
themselves in this way very often. Having values like a single <code>i32</code> on the
|
||
stack, where they’re stored by default, is more appropriate in the majority of
|
||
situations. Let’s look at a case where boxes allow us to define types that we
|
||
wouldn’t be allowed to if we didn’t have boxes.</p>
|
||
<h3><a class="header" href="#enabling-recursive-types-with-boxes" id="enabling-recursive-types-with-boxes">Enabling Recursive Types with Boxes</a></h3>
|
||
<p>At compile time, Rust needs to know how much space a type takes up. One type
|
||
whose size can’t be known at compile time is a <em>recursive type</em>, where a value
|
||
can have as part of itself another value of the same type. Because this nesting
|
||
of values could theoretically continue infinitely, Rust doesn’t know how much
|
||
space a value of a recursive type needs. However, boxes have a known size, so
|
||
by inserting a box in a recursive type definition, you can have recursive types.</p>
|
||
<p>Let’s explore the <em>cons list</em>, which is a data type common in functional
|
||
programming languages, as an example of a recursive type. The cons list type
|
||
we’ll define is straightforward except for the recursion; therefore, the
|
||
concepts in the example we’ll work with will be useful any time you get into
|
||
more complex situations involving recursive types.</p>
|
||
<h4><a class="header" href="#more-information-about-the-cons-list" id="more-information-about-the-cons-list">More Information About the Cons List</a></h4>
|
||
<p>A <em>cons list</em> is a data structure that comes from the Lisp programming language
|
||
and its dialects. In Lisp, the <code>cons</code> function (short for “construct function”)
|
||
constructs a new pair from its two arguments, which usually are a single value
|
||
and another pair. These pairs containing pairs form a list.</p>
|
||
<p>The cons function concept has made its way into more general functional
|
||
programming jargon: “to cons <em>x</em> onto <em>y</em>” informally means to construct a new
|
||
container instance by putting the element <em>x</em> at the start of this new
|
||
container, followed by the container <em>y</em>.</p>
|
||
<p>Each item in a cons list contains two elements: the value of the current item
|
||
and the next item. The last item in the list contains only a value called <code>Nil</code>
|
||
without a next item. A cons list is produced by recursively calling the <code>cons</code>
|
||
function. The canonical name to denote the base case of the recursion is <code>Nil</code>.
|
||
Note that this is not the same as the “null” or “nil” concept in Chapter 6,
|
||
which is an invalid or absent value.</p>
|
||
<p>Although functional programming languages use cons lists frequently, the cons
|
||
list isn’t a commonly used data structure in Rust. Most of the time when you
|
||
have a list of items in Rust, <code>Vec<T></code> is a better choice to use. Other, more
|
||
complex recursive data types <em>are</em> useful in various situations, but by
|
||
starting with the cons list, we can explore how boxes let us define a recursive
|
||
data type without much distraction.</p>
|
||
<p>Listing 15-2 contains an enum definition for a cons list. Note that this code
|
||
won’t compile yet because the <code>List</code> type doesn’t have a known size, which
|
||
we’ll demonstrate.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">enum List {
|
||
Cons(i32, List),
|
||
Nil,
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-2: The first attempt at defining an enum to
|
||
represent a cons list data structure of <code>i32</code> values</span></p>
|
||
<blockquote>
|
||
<p>Note: We’re implementing a cons list that holds only <code>i32</code> values for the
|
||
purposes of this example. We could have implemented it using generics, as we
|
||
discussed in Chapter 10, to define a cons list type that could store values of
|
||
any type.</p>
|
||
</blockquote>
|
||
<p>Using the <code>List</code> type to store the list <code>1, 2, 3</code> would look like the code in
|
||
Listing 15-3:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use crate::List::{Cons, Nil};
|
||
|
||
fn main() {
|
||
let list = Cons(1, Cons(2, Cons(3, Nil)));
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-3: Using the <code>List</code> enum to store the list <code>1, 2, 3</code></span></p>
|
||
<p>The first <code>Cons</code> value holds <code>1</code> and another <code>List</code> value. This <code>List</code> value is
|
||
another <code>Cons</code> value that holds <code>2</code> and another <code>List</code> value. This <code>List</code> value
|
||
is one more <code>Cons</code> value that holds <code>3</code> and a <code>List</code> value, which is finally
|
||
<code>Nil</code>, the non-recursive variant that signals the end of the list.</p>
|
||
<p>If we try to compile the code in Listing 15-3, we get the error shown in
|
||
Listing 15-4:</p>
|
||
<pre><code class="language-text">error[E0072]: recursive type `List` has infinite size
|
||
--> src/main.rs:1:1
|
||
|
|
||
1 | enum List {
|
||
| ^^^^^^^^^ recursive type has infinite size
|
||
2 | Cons(i32, List),
|
||
| ----- recursive without indirection
|
||
|
|
||
= help: insert indirection (e.g., a `Box`, `Rc`, or `&`) at some point to
|
||
make `List` representable
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-4: The error we get when attempting to define
|
||
a recursive enum</span></p>
|
||
<p>The error shows this type “has infinite size.” The reason is that we’ve defined
|
||
<code>List</code> with a variant that is recursive: it holds another value of itself
|
||
directly. As a result, Rust can’t figure out how much space it needs to store a
|
||
<code>List</code> value. Let’s break down why we get this error a bit. First, let’s look
|
||
at how Rust decides how much space it needs to store a value of a non-recursive
|
||
type.</p>
|
||
<h4><a class="header" href="#computing-the-size-of-a-non-recursive-type" id="computing-the-size-of-a-non-recursive-type">Computing the Size of a Non-Recursive Type</a></h4>
|
||
<p>Recall the <code>Message</code> enum we defined in Listing 6-2 when we discussed enum
|
||
definitions in Chapter 6:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Message {
|
||
Quit,
|
||
Move { x: i32, y: i32 },
|
||
Write(String),
|
||
ChangeColor(i32, i32, i32),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>To determine how much space to allocate for a <code>Message</code> value, Rust goes
|
||
through each of the variants to see which variant needs the most space. Rust
|
||
sees that <code>Message::Quit</code> doesn’t need any space, <code>Message::Move</code> needs enough
|
||
space to store two <code>i32</code> values, and so forth. Because only one variant will be
|
||
used, the most space a <code>Message</code> value will need is the space it would take to
|
||
store the largest of its variants.</p>
|
||
<p>Contrast this with what happens when Rust tries to determine how much space a
|
||
recursive type like the <code>List</code> enum in Listing 15-2 needs. The compiler starts
|
||
by looking at the <code>Cons</code> variant, which holds a value of type <code>i32</code> and a value
|
||
of type <code>List</code>. Therefore, <code>Cons</code> needs an amount of space equal to the size of
|
||
an <code>i32</code> plus the size of a <code>List</code>. To figure out how much memory the <code>List</code>
|
||
type needs, the compiler looks at the variants, starting with the <code>Cons</code>
|
||
variant. The <code>Cons</code> variant holds a value of type <code>i32</code> and a value of type
|
||
<code>List</code>, and this process continues infinitely, as shown in Figure 15-1.</p>
|
||
<img alt="An infinite Cons list" src="img/trpl15-01.svg" class="center" style="width: 50%;" />
|
||
<p><span class="caption">Figure 15-1: An infinite <code>List</code> consisting of infinite
|
||
<code>Cons</code> variants</span></p>
|
||
<h4><a class="header" href="#using-boxt-to-get-a-recursive-type-with-a-known-size" id="using-boxt-to-get-a-recursive-type-with-a-known-size">Using <code>Box<T></code> to Get a Recursive Type with a Known Size</a></h4>
|
||
<p>Rust can’t figure out how much space to allocate for recursively defined types,
|
||
so the compiler gives the error in Listing 15-4. But the error does include
|
||
this helpful suggestion:</p>
|
||
<pre><code class="language-text"> = help: insert indirection (e.g., a `Box`, `Rc`, or `&`) at some point to
|
||
make `List` representable
|
||
</code></pre>
|
||
<p>In this suggestion, “indirection” means that instead of storing a value
|
||
directly, we’ll change the data structure to store the value indirectly by
|
||
storing a pointer to the value instead.</p>
|
||
<p>Because a <code>Box<T></code> is a pointer, Rust always knows how much space a <code>Box<T></code>
|
||
needs: a pointer’s size doesn’t change based on the amount of data it’s
|
||
pointing to. This means we can put a <code>Box<T></code> inside the <code>Cons</code> variant instead
|
||
of another <code>List</code> value directly. The <code>Box<T></code> will point to the next <code>List</code>
|
||
value that will be on the heap rather than inside the <code>Cons</code> variant.
|
||
Conceptually, we still have a list, created with lists “holding” other lists,
|
||
but this implementation is now more like placing the items next to one another
|
||
rather than inside one another.</p>
|
||
<p>We can change the definition of the <code>List</code> enum in Listing 15-2 and the usage
|
||
of the <code>List</code> in Listing 15-3 to the code in Listing 15-5, which will compile:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">enum List {
|
||
Cons(i32, Box<List>),
|
||
Nil,
|
||
}
|
||
|
||
use crate::List::{Cons, Nil};
|
||
|
||
fn main() {
|
||
let list = Cons(1,
|
||
Box::new(Cons(2,
|
||
Box::new(Cons(3,
|
||
Box::new(Nil))))));
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-5: Definition of <code>List</code> that uses <code>Box<T></code> in
|
||
order to have a known size</span></p>
|
||
<p>The <code>Cons</code> variant will need the size of an <code>i32</code> plus the space to store the
|
||
box’s pointer data. The <code>Nil</code> variant stores no values, so it needs less space
|
||
than the <code>Cons</code> variant. We now know that any <code>List</code> value will take up the
|
||
size of an <code>i32</code> plus the size of a box’s pointer data. By using a box, we’ve
|
||
broken the infinite, recursive chain, so the compiler can figure out the size
|
||
it needs to store a <code>List</code> value. Figure 15-2 shows what the <code>Cons</code> variant
|
||
looks like now.</p>
|
||
<img alt="A finite Cons list" src="img/trpl15-02.svg" class="center" />
|
||
<p><span class="caption">Figure 15-2: A <code>List</code> that is not infinitely sized
|
||
because <code>Cons</code> holds a <code>Box</code></span></p>
|
||
<p>Boxes provide only the indirection and heap allocation; they don’t have any
|
||
other special capabilities, like those we’ll see with the other smart pointer
|
||
types. They also don’t have any performance overhead that these special
|
||
capabilities incur, so they can be useful in cases like the cons list where the
|
||
indirection is the only feature we need. We’ll look at more use cases for boxes
|
||
in Chapter 17, too.</p>
|
||
<p>The <code>Box<T></code> type is a smart pointer because it implements the <code>Deref</code> trait,
|
||
which allows <code>Box<T></code> values to be treated like references. When a <code>Box<T></code>
|
||
value goes out of scope, the heap data that the box is pointing to is cleaned
|
||
up as well because of the <code>Drop</code> trait implementation. Let’s explore these two
|
||
traits in more detail. These two traits will be even more important to the
|
||
functionality provided by the other smart pointer types we’ll discuss in the
|
||
rest of this chapter.</p>
|
||
<h2><a class="header" href="#treating-smart-pointers-like-regular-references-with-the-deref-trait" id="treating-smart-pointers-like-regular-references-with-the-deref-trait">Treating Smart Pointers Like Regular References with the <code>Deref</code> Trait</a></h2>
|
||
<p>Implementing the <code>Deref</code> trait allows you to customize the behavior of the
|
||
<em>dereference operator</em>, <code>*</code> (as opposed to the multiplication or glob
|
||
operator). By implementing <code>Deref</code> in such a way that a smart pointer can be
|
||
treated like a regular reference, you can write code that operates on
|
||
references and use that code with smart pointers too.</p>
|
||
<p>Let’s first look at how the dereference operator works with regular references.
|
||
Then we’ll try to define a custom type that behaves like <code>Box<T></code>, and see why
|
||
the dereference operator doesn’t work like a reference on our newly defined
|
||
type. We’ll explore how implementing the <code>Deref</code> trait makes it possible for
|
||
smart pointers to work in ways similar to references. Then we’ll look at
|
||
Rust’s <em>deref coercion</em> feature and how it lets us work with either references
|
||
or smart pointers.</p>
|
||
<blockquote>
|
||
<p>Note: there’s one big difference between the <code>MyBox<T></code> type we’re about to
|
||
build and the real <code>Box<T></code>: our version will not store its data on the heap.
|
||
We are focusing this example on <code>Deref</code>, so where the data is actually stored
|
||
is less important than the pointer-like behavior.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#following-the-pointer-to-the-value-with-the-dereference-operator" id="following-the-pointer-to-the-value-with-the-dereference-operator">Following the Pointer to the Value with the Dereference Operator</a></h3>
|
||
<p>A regular reference is a type of pointer, and one way to think of a pointer is
|
||
as an arrow to a value stored somewhere else. In Listing 15-6, we create a
|
||
reference to an <code>i32</code> value and then use the dereference operator to follow the
|
||
reference to the data:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 5;
|
||
let y = &x;
|
||
|
||
assert_eq!(5, x);
|
||
assert_eq!(5, *y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-6: Using the dereference operator to follow a
|
||
reference to an <code>i32</code> value</span></p>
|
||
<p>The variable <code>x</code> holds an <code>i32</code> value, <code>5</code>. We set <code>y</code> equal to a reference to
|
||
<code>x</code>. We can assert that <code>x</code> is equal to <code>5</code>. However, if we want to make an
|
||
assertion about the value in <code>y</code>, we have to use <code>*y</code> to follow the reference
|
||
to the value it’s pointing to (hence <em>dereference</em>). Once we dereference <code>y</code>,
|
||
we have access to the integer value <code>y</code> is pointing to that we can compare with
|
||
<code>5</code>.</p>
|
||
<p>If we tried to write <code>assert_eq!(5, y);</code> instead, we would get this compilation
|
||
error:</p>
|
||
<pre><code class="language-text">error[E0277]: can't compare `{integer}` with `&{integer}`
|
||
--> src/main.rs:6:5
|
||
|
|
||
6 | assert_eq!(5, y);
|
||
| ^^^^^^^^^^^^^^^^^ no implementation for `{integer} == &{integer}`
|
||
|
|
||
= help: the trait `std::cmp::PartialEq<&{integer}>` is not implemented for
|
||
`{integer}`
|
||
</code></pre>
|
||
<p>Comparing a number and a reference to a number isn’t allowed because they’re
|
||
different types. We must use the dereference operator to follow the reference
|
||
to the value it’s pointing to.</p>
|
||
<h3><a class="header" href="#using-boxt-like-a-reference" id="using-boxt-like-a-reference">Using <code>Box<T></code> Like a Reference</a></h3>
|
||
<p>We can rewrite the code in Listing 15-6 to use a <code>Box<T></code> instead of a
|
||
reference; the dereference operator will work as shown in Listing 15-7:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 5;
|
||
let y = Box::new(x);
|
||
|
||
assert_eq!(5, x);
|
||
assert_eq!(5, *y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-7: Using the dereference operator on a
|
||
<code>Box<i32></code></span></p>
|
||
<p>The only difference between Listing 15-7 and Listing 15-6 is that here we set
|
||
<code>y</code> to be an instance of a box pointing to the value in <code>x</code> rather than a
|
||
reference pointing to the value of <code>x</code>. In the last assertion, we can use the
|
||
dereference operator to follow the box’s pointer in the same way that we did
|
||
when <code>y</code> was a reference. Next, we’ll explore what is special about <code>Box<T></code>
|
||
that enables us to use the dereference operator by defining our own box type.</p>
|
||
<h3><a class="header" href="#defining-our-own-smart-pointer" id="defining-our-own-smart-pointer">Defining Our Own Smart Pointer</a></h3>
|
||
<p>Let’s build a smart pointer similar to the <code>Box<T></code> type provided by the
|
||
standard library to experience how smart pointers behave differently from
|
||
references by default. Then we’ll look at how to add the ability to use the
|
||
dereference operator.</p>
|
||
<p>The <code>Box<T></code> type is ultimately defined as a tuple struct with one element, so
|
||
Listing 15-8 defines a <code>MyBox<T></code> type in the same way. We’ll also define a
|
||
<code>new</code> function to match the <code>new</code> function defined on <code>Box<T></code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct MyBox<T>(T);
|
||
|
||
impl<T> MyBox<T> {
|
||
fn new(x: T) -> MyBox<T> {
|
||
MyBox(x)
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 15-8: Defining a <code>MyBox<T></code> type</span></p>
|
||
<p>We define a struct named <code>MyBox</code> and declare a generic parameter <code>T</code>, because
|
||
we want our type to hold values of any type. The <code>MyBox</code> type is a tuple struct
|
||
with one element of type <code>T</code>. The <code>MyBox::new</code> function takes one parameter of
|
||
type <code>T</code> and returns a <code>MyBox</code> instance that holds the value passed in.</p>
|
||
<p>Let’s try adding the <code>main</code> function in Listing 15-7 to Listing 15-8 and
|
||
changing it to use the <code>MyBox<T></code> type we’ve defined instead of <code>Box<T></code>. The
|
||
code in Listing 15-9 won’t compile because Rust doesn’t know how to dereference
|
||
<code>MyBox</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = 5;
|
||
let y = MyBox::new(x);
|
||
|
||
assert_eq!(5, x);
|
||
assert_eq!(5, *y);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-9: Attempting to use <code>MyBox<T></code> in the same
|
||
way we used references and <code>Box<T></code></span></p>
|
||
<p>Here’s the resulting compilation error:</p>
|
||
<pre><code class="language-text">error[E0614]: type `MyBox<{integer}>` cannot be dereferenced
|
||
--> src/main.rs:14:19
|
||
|
|
||
14 | assert_eq!(5, *y);
|
||
| ^^
|
||
</code></pre>
|
||
<p>Our <code>MyBox<T></code> type can’t be dereferenced because we haven’t implemented that
|
||
ability on our type. To enable dereferencing with the <code>*</code> operator, we
|
||
implement the <code>Deref</code> trait.</p>
|
||
<h3><a class="header" href="#treating-a-type-like-a-reference-by-implementing-the-deref-trait" id="treating-a-type-like-a-reference-by-implementing-the-deref-trait">Treating a Type Like a Reference by Implementing the <code>Deref</code> Trait</a></h3>
|
||
<p>As discussed in Chapter 10, to implement a trait, we need to provide
|
||
implementations for the trait’s required methods. The <code>Deref</code> trait, provided
|
||
by the standard library, requires us to implement one method named <code>deref</code> that
|
||
borrows <code>self</code> and returns a reference to the inner data. Listing 15-10
|
||
contains an implementation of <code>Deref</code> to add to the definition of <code>MyBox</code>:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::ops::Deref;
|
||
|
||
<span class="boring">struct MyBox<T>(T);
|
||
</span>impl<T> Deref for MyBox<T> {
|
||
type Target = T;
|
||
|
||
fn deref(&self) -> &T {
|
||
&self.0
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 15-10: Implementing <code>Deref</code> on <code>MyBox<T></code></span></p>
|
||
<p>The <code>type Target = T;</code> syntax defines an associated type for the <code>Deref</code> trait
|
||
to use. Associated types are a slightly different way of declaring a generic
|
||
parameter, but you don’t need to worry about them for now; we’ll cover them in
|
||
more detail in Chapter 19.</p>
|
||
<p>We fill in the body of the <code>deref</code> method with <code>&self.0</code> so <code>deref</code> returns a
|
||
reference to the value we want to access with the <code>*</code> operator. The <code>main</code>
|
||
function in Listing 15-9 that calls <code>*</code> on the <code>MyBox<T></code> value now compiles,
|
||
and the assertions pass!</p>
|
||
<p>Without the <code>Deref</code> trait, the compiler can only dereference <code>&</code> references.
|
||
The <code>deref</code> method gives the compiler the ability to take a value of any type
|
||
that implements <code>Deref</code> and call the <code>deref</code> method to get a <code>&</code> reference that
|
||
it knows how to dereference.</p>
|
||
<p>When we entered <code>*y</code> in Listing 15-9, behind the scenes Rust actually ran this
|
||
code:</p>
|
||
<pre><code class="language-rust ignore">*(y.deref())
|
||
</code></pre>
|
||
<p>Rust substitutes the <code>*</code> operator with a call to the <code>deref</code> method and then a
|
||
plain dereference so we don’t have to think about whether or not we need to
|
||
call the <code>deref</code> method. This Rust feature lets us write code that functions
|
||
identically whether we have a regular reference or a type that implements
|
||
<code>Deref</code>.</p>
|
||
<p>The reason the <code>deref</code> method returns a reference to a value, and that the plain
|
||
dereference outside the parentheses in <code>*(y.deref())</code> is still necessary, is the
|
||
ownership system. If the <code>deref</code> method returned the value directly instead of
|
||
a reference to the value, the value would be moved out of <code>self</code>. We don’t want
|
||
to take ownership of the inner value inside <code>MyBox<T></code> in this case or in most
|
||
cases where we use the dereference operator.</p>
|
||
<p>Note that the <code>*</code> operator is replaced with a call to the <code>deref</code> method and
|
||
then a call to the <code>*</code> operator just once, each time we use a <code>*</code> in our code.
|
||
Because the substitution of the <code>*</code> operator does not recurse infinitely, we
|
||
end up with data of type <code>i32</code>, which matches the <code>5</code> in <code>assert_eq!</code> in
|
||
Listing 15-9.</p>
|
||
<h3><a class="header" href="#implicit-deref-coercions-with-functions-and-methods" id="implicit-deref-coercions-with-functions-and-methods">Implicit Deref Coercions with Functions and Methods</a></h3>
|
||
<p><em>Deref coercion</em> is a convenience that Rust performs on arguments to functions
|
||
and methods. Deref coercion converts a reference to a type that implements
|
||
<code>Deref</code> into a reference to a type that <code>Deref</code> can convert the original type
|
||
into. Deref coercion happens automatically when we pass a reference to a
|
||
particular type’s value as an argument to a function or method that doesn’t
|
||
match the parameter type in the function or method definition. A sequence of
|
||
calls to the <code>deref</code> method converts the type we provided into the type the
|
||
parameter needs.</p>
|
||
<p>Deref coercion was added to Rust so that programmers writing function and
|
||
method calls don’t need to add as many explicit references and dereferences
|
||
with <code>&</code> and <code>*</code>. The deref coercion feature also lets us write more code that
|
||
can work for either references or smart pointers.</p>
|
||
<p>To see deref coercion in action, let’s use the <code>MyBox<T></code> type we defined in
|
||
Listing 15-8 as well as the implementation of <code>Deref</code> that we added in Listing
|
||
15-10. Listing 15-11 shows the definition of a function that has a string slice
|
||
parameter:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn hello(name: &str) {
|
||
println!("Hello, {}!", name);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 15-11: A <code>hello</code> function that has the parameter
|
||
<code>name</code> of type <code>&str</code></span></p>
|
||
<p>We can call the <code>hello</code> function with a string slice as an argument, such as
|
||
<code>hello("Rust");</code> for example. Deref coercion makes it possible to call <code>hello</code>
|
||
with a reference to a value of type <code>MyBox<String></code>, as shown in Listing 15-12:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::ops::Deref;
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct MyBox<T>(T);
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<T> MyBox<T> {
|
||
</span><span class="boring"> fn new(x: T) -> MyBox<T> {
|
||
</span><span class="boring"> MyBox(x)
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<T> Deref for MyBox<T> {
|
||
</span><span class="boring"> type Target = T;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> fn deref(&self) -> &T {
|
||
</span><span class="boring"> &self.0
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn hello(name: &str) {
|
||
</span><span class="boring"> println!("Hello, {}!", name);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let m = MyBox::new(String::from("Rust"));
|
||
hello(&m);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-12: Calling <code>hello</code> with a reference to a
|
||
<code>MyBox<String></code> value, which works because of deref coercion</span></p>
|
||
<p>Here we’re calling the <code>hello</code> function with the argument <code>&m</code>, which is a
|
||
reference to a <code>MyBox<String></code> value. Because we implemented the <code>Deref</code> trait
|
||
on <code>MyBox<T></code> in Listing 15-10, Rust can turn <code>&MyBox<String></code> into <code>&String</code>
|
||
by calling <code>deref</code>. The standard library provides an implementation of <code>Deref</code>
|
||
on <code>String</code> that returns a string slice, and this is in the API documentation
|
||
for <code>Deref</code>. Rust calls <code>deref</code> again to turn the <code>&String</code> into <code>&str</code>, which
|
||
matches the <code>hello</code> function’s definition.</p>
|
||
<p>If Rust didn’t implement deref coercion, we would have to write the code in
|
||
Listing 15-13 instead of the code in Listing 15-12 to call <code>hello</code> with a value
|
||
of type <code>&MyBox<String></code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::ops::Deref;
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct MyBox<T>(T);
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<T> MyBox<T> {
|
||
</span><span class="boring"> fn new(x: T) -> MyBox<T> {
|
||
</span><span class="boring"> MyBox(x)
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<T> Deref for MyBox<T> {
|
||
</span><span class="boring"> type Target = T;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> fn deref(&self) -> &T {
|
||
</span><span class="boring"> &self.0
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn hello(name: &str) {
|
||
</span><span class="boring"> println!("Hello, {}!", name);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let m = MyBox::new(String::from("Rust"));
|
||
hello(&(*m)[..]);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-13: The code we would have to write if Rust
|
||
didn’t have deref coercion</span></p>
|
||
<p>The <code>(*m)</code> dereferences the <code>MyBox<String></code> into a <code>String</code>. Then the <code>&</code> and
|
||
<code>[..]</code> take a string slice of the <code>String</code> that is equal to the whole string to
|
||
match the signature of <code>hello</code>. The code without deref coercions is harder to
|
||
read, write, and understand with all of these symbols involved. Deref coercion
|
||
allows Rust to handle these conversions for us automatically.</p>
|
||
<p>When the <code>Deref</code> trait is defined for the types involved, Rust will analyze the
|
||
types and use <code>Deref::deref</code> as many times as necessary to get a reference to
|
||
match the parameter’s type. The number of times that <code>Deref::deref</code> needs to be
|
||
inserted is resolved at compile time, so there is no runtime penalty for taking
|
||
advantage of deref coercion!</p>
|
||
<h3><a class="header" href="#how-deref-coercion-interacts-with-mutability" id="how-deref-coercion-interacts-with-mutability">How Deref Coercion Interacts with Mutability</a></h3>
|
||
<p>Similar to how you use the <code>Deref</code> trait to override the <code>*</code> operator on
|
||
immutable references, you can use the <code>DerefMut</code> trait to override the <code>*</code>
|
||
operator on mutable references.</p>
|
||
<p>Rust does deref coercion when it finds types and trait implementations in three
|
||
cases:</p>
|
||
<ul>
|
||
<li>From <code>&T</code> to <code>&U</code> when <code>T: Deref<Target=U></code></li>
|
||
<li>From <code>&mut T</code> to <code>&mut U</code> when <code>T: DerefMut<Target=U></code></li>
|
||
<li>From <code>&mut T</code> to <code>&U</code> when <code>T: Deref<Target=U></code></li>
|
||
</ul>
|
||
<p>The first two cases are the same except for mutability. The first case states
|
||
that if you have a <code>&T</code>, and <code>T</code> implements <code>Deref</code> to some type <code>U</code>, you can
|
||
get a <code>&U</code> transparently. The second case states that the same deref coercion
|
||
happens for mutable references.</p>
|
||
<p>The third case is trickier: Rust will also coerce a mutable reference to an
|
||
immutable one. But the reverse is <em>not</em> possible: immutable references will
|
||
never coerce to mutable references. Because of the borrowing rules, if you have
|
||
a mutable reference, that mutable reference must be the only reference to that
|
||
data (otherwise, the program wouldn’t compile). Converting one mutable
|
||
reference to one immutable reference will never break the borrowing rules.
|
||
Converting an immutable reference to a mutable reference would require that
|
||
there is only one immutable reference to that data, and the borrowing rules
|
||
don’t guarantee that. Therefore, Rust can’t make the assumption that converting
|
||
an immutable reference to a mutable reference is possible.</p>
|
||
<h2><a class="header" href="#running-code-on-cleanup-with-the-drop-trait" id="running-code-on-cleanup-with-the-drop-trait">Running Code on Cleanup with the <code>Drop</code> Trait</a></h2>
|
||
<p>The second trait important to the smart pointer pattern is <code>Drop</code>, which lets
|
||
you customize what happens when a value is about to go out of scope. You can
|
||
provide an implementation for the <code>Drop</code> trait on any type, and the code you
|
||
specify can be used to release resources like files or network connections.
|
||
We’re introducing <code>Drop</code> in the context of smart pointers because the
|
||
functionality of the <code>Drop</code> trait is almost always used when implementing a
|
||
smart pointer. For example, <code>Box<T></code> customizes <code>Drop</code> to deallocate the space
|
||
on the heap that the box points to.</p>
|
||
<p>In some languages, the programmer must call code to free memory or resources
|
||
every time they finish using an instance of a smart pointer. If they forget,
|
||
the system might become overloaded and crash. In Rust, you can specify that a
|
||
particular bit of code be run whenever a value goes out of scope, and the
|
||
compiler will insert this code automatically. As a result, you don’t need to be
|
||
careful about placing cleanup code everywhere in a program that an instance of
|
||
a particular type is finished with—you still won’t leak resources!</p>
|
||
<p>Specify the code to run when a value goes out of scope by implementing the
|
||
<code>Drop</code> trait. The <code>Drop</code> trait requires you to implement one method named
|
||
<code>drop</code> that takes a mutable reference to <code>self</code>. To see when Rust calls <code>drop</code>,
|
||
let’s implement <code>drop</code> with <code>println!</code> statements for now.</p>
|
||
<p>Listing 15-14 shows a <code>CustomSmartPointer</code> struct whose only custom
|
||
functionality is that it will print <code>Dropping CustomSmartPointer!</code> when the
|
||
instance goes out of scope. This example demonstrates when Rust runs the <code>drop</code>
|
||
function.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct CustomSmartPointer {
|
||
data: String,
|
||
}
|
||
|
||
impl Drop for CustomSmartPointer {
|
||
fn drop(&mut self) {
|
||
println!("Dropping CustomSmartPointer with data `{}`!", self.data);
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let c = CustomSmartPointer { data: String::from("my stuff") };
|
||
let d = CustomSmartPointer { data: String::from("other stuff") };
|
||
println!("CustomSmartPointers created.");
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-14: A <code>CustomSmartPointer</code> struct that
|
||
implements the <code>Drop</code> trait where we would put our cleanup code</span></p>
|
||
<p>The <code>Drop</code> trait is included in the prelude, so we don’t need to bring it into
|
||
scope. We implement the <code>Drop</code> trait on <code>CustomSmartPointer</code> and provide an
|
||
implementation for the <code>drop</code> method that calls <code>println!</code>. The body of the
|
||
<code>drop</code> function is where you would place any logic that you wanted to run when
|
||
an instance of your type goes out of scope. We’re printing some text here to
|
||
demonstrate when Rust will call <code>drop</code>.</p>
|
||
<p>In <code>main</code>, we create two instances of <code>CustomSmartPointer</code> and then print
|
||
<code>CustomSmartPointers created</code>. At the end of <code>main</code>, our instances of
|
||
<code>CustomSmartPointer</code> will go out of scope, and Rust will call the code we put
|
||
in the <code>drop</code> method, printing our final message. Note that we didn’t need to
|
||
call the <code>drop</code> method explicitly.</p>
|
||
<p>When we run this program, we’ll see the following output:</p>
|
||
<pre><code class="language-text">CustomSmartPointers created.
|
||
Dropping CustomSmartPointer with data `other stuff`!
|
||
Dropping CustomSmartPointer with data `my stuff`!
|
||
</code></pre>
|
||
<p>Rust automatically called <code>drop</code> for us when our instances went out of scope,
|
||
calling the code we specified. Variables are dropped in the reverse order of
|
||
their creation, so <code>d</code> was dropped before <code>c</code>. This example gives you a visual
|
||
guide to how the <code>drop</code> method works; usually you would specify the cleanup
|
||
code that your type needs to run rather than a print message.</p>
|
||
<h3><a class="header" href="#dropping-a-value-early-with-stdmemdrop" id="dropping-a-value-early-with-stdmemdrop">Dropping a Value Early with <code>std::mem::drop</code></a></h3>
|
||
<p>Unfortunately, it’s not straightforward to disable the automatic <code>drop</code>
|
||
functionality. Disabling <code>drop</code> isn’t usually necessary; the whole point of the
|
||
<code>Drop</code> trait is that it’s taken care of automatically. Occasionally, however,
|
||
you might want to clean up a value early. One example is when using smart
|
||
pointers that manage locks: you might want to force the <code>drop</code> method that
|
||
releases the lock so that other code in the same scope can acquire the lock.
|
||
Rust doesn’t let you call the <code>Drop</code> trait’s <code>drop</code> method manually; instead
|
||
you have to call the <code>std::mem::drop</code> function provided by the standard library
|
||
if you want to force a value to be dropped before the end of its scope.</p>
|
||
<p>If we try to call the <code>Drop</code> trait’s <code>drop</code> method manually by modifying the
|
||
<code>main</code> function from Listing 15-14, as shown in Listing 15-15, we’ll get a
|
||
compiler error:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let c = CustomSmartPointer { data: String::from("some data") };
|
||
println!("CustomSmartPointer created.");
|
||
c.drop();
|
||
println!("CustomSmartPointer dropped before the end of main.");
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-15: Attempting to call the <code>drop</code> method from
|
||
the <code>Drop</code> trait manually to clean up early</span></p>
|
||
<p>When we try to compile this code, we’ll get this error:</p>
|
||
<pre><code class="language-text">error[E0040]: explicit use of destructor method
|
||
--> src/main.rs:14:7
|
||
|
|
||
14 | c.drop();
|
||
| ^^^^ explicit destructor calls not allowed
|
||
</code></pre>
|
||
<p>This error message states that we’re not allowed to explicitly call <code>drop</code>. The
|
||
error message uses the term <em>destructor</em>, which is the general programming term
|
||
for a function that cleans up an instance. A <em>destructor</em> is analogous to a
|
||
<em>constructor</em>, which creates an instance. The <code>drop</code> function in Rust is one
|
||
particular destructor.</p>
|
||
<p>Rust doesn’t let us call <code>drop</code> explicitly because Rust would still
|
||
automatically call <code>drop</code> on the value at the end of <code>main</code>. This would be a
|
||
<em>double free</em> error because Rust would be trying to clean up the same value
|
||
twice.</p>
|
||
<p>We can’t disable the automatic insertion of <code>drop</code> when a value goes out of
|
||
scope, and we can’t call the <code>drop</code> method explicitly. So, if we need to force
|
||
a value to be cleaned up early, we can use the <code>std::mem::drop</code> function.</p>
|
||
<p>The <code>std::mem::drop</code> function is different from the <code>drop</code> method in the <code>Drop</code>
|
||
trait. We call it by passing the value we want to force to be dropped early as
|
||
an argument. The function is in the prelude, so we can modify <code>main</code> in Listing
|
||
15-15 to call the <code>drop</code> function, as shown in Listing 15-16:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">struct CustomSmartPointer {
|
||
</span><span class="boring"> data: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Drop for CustomSmartPointer {
|
||
</span><span class="boring"> fn drop(&mut self) {
|
||
</span><span class="boring"> println!("Dropping CustomSmartPointer with data `{}`!", self.data);
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let c = CustomSmartPointer { data: String::from("some data") };
|
||
println!("CustomSmartPointer created.");
|
||
drop(c);
|
||
println!("CustomSmartPointer dropped before the end of main.");
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-16: Calling <code>std::mem::drop</code> to explicitly
|
||
drop a value before it goes out of scope</span></p>
|
||
<p>Running this code will print the following:</p>
|
||
<pre><code class="language-text">CustomSmartPointer created.
|
||
Dropping CustomSmartPointer with data `some data`!
|
||
CustomSmartPointer dropped before the end of main.
|
||
</code></pre>
|
||
<p>The text <code>Dropping CustomSmartPointer with data `some data`!</code> is printed
|
||
between the <code>CustomSmartPointer created.</code> and <code>CustomSmartPointer dropped before the end of main.</code> text, showing that the <code>drop</code> method code is called to
|
||
drop <code>c</code> at that point.</p>
|
||
<p>You can use code specified in a <code>Drop</code> trait implementation in many ways to
|
||
make cleanup convenient and safe: for instance, you could use it to create your
|
||
own memory allocator! With the <code>Drop</code> trait and Rust’s ownership system, you
|
||
don’t have to remember to clean up because Rust does it automatically.</p>
|
||
<p>You also don’t have to worry about problems resulting from accidentally
|
||
cleaning up values still in use: the ownership system that makes sure
|
||
references are always valid also ensures that <code>drop</code> gets called only once when
|
||
the value is no longer being used.</p>
|
||
<p>Now that we’ve examined <code>Box<T></code> and some of the characteristics of smart
|
||
pointers, let’s look at a few other smart pointers defined in the standard
|
||
library.</p>
|
||
<h2><a class="header" href="#rct-the-reference-counted-smart-pointer" id="rct-the-reference-counted-smart-pointer"><code>Rc<T></code>, the Reference Counted Smart Pointer</a></h2>
|
||
<p>In the majority of cases, ownership is clear: you know exactly which variable
|
||
owns a given value. However, there are cases when a single value might have
|
||
multiple owners. For example, in graph data structures, multiple edges might
|
||
point to the same node, and that node is conceptually owned by all of the edges
|
||
that point to it. A node shouldn’t be cleaned up unless it doesn’t have any
|
||
edges pointing to it.</p>
|
||
<p>To enable multiple ownership, Rust has a type called <code>Rc<T></code>, which is an
|
||
abbreviation for <em>reference counting</em>. The <code>Rc<T></code> type keeps track of the
|
||
number of references to a value which determines whether or not a value is
|
||
still in use. If there are zero references to a value, the value can be cleaned
|
||
up without any references becoming invalid.</p>
|
||
<p>Imagine <code>Rc<T></code> as a TV in a family room. When one person enters to watch TV,
|
||
they turn it on. Others can come into the room and watch the TV. When the last
|
||
person leaves the room, they turn off the TV because it’s no longer being used.
|
||
If someone turns off the TV while others are still watching it, there would be
|
||
uproar from the remaining TV watchers!</p>
|
||
<p>We use the <code>Rc<T></code> type when we want to allocate some data on the heap for
|
||
multiple parts of our program to read and we can’t determine at compile time
|
||
which part will finish using the data last. If we knew which part would finish
|
||
last, we could just make that part the data’s owner, and the normal ownership
|
||
rules enforced at compile time would take effect.</p>
|
||
<p>Note that <code>Rc<T></code> is only for use in single-threaded scenarios. When we discuss
|
||
concurrency in Chapter 16, we’ll cover how to do reference counting in
|
||
multithreaded programs.</p>
|
||
<h3><a class="header" href="#using-rct-to-share-data" id="using-rct-to-share-data">Using <code>Rc<T></code> to Share Data</a></h3>
|
||
<p>Let’s return to our cons list example in Listing 15-5. Recall that we defined
|
||
it using <code>Box<T></code>. This time, we’ll create two lists that both share ownership
|
||
of a third list. Conceptually, this looks similar to Figure 15-3:</p>
|
||
<img alt="Two lists that share ownership of a third list" src="img/trpl15-03.svg" class="center" />
|
||
<p><span class="caption">Figure 15-3: Two lists, <code>b</code> and <code>c</code>, sharing ownership of
|
||
a third list, <code>a</code></span></p>
|
||
<p>We’ll create list <code>a</code> that contains 5 and then 10. Then we’ll make two more
|
||
lists: <code>b</code> that starts with 3 and <code>c</code> that starts with 4. Both <code>b</code> and <code>c</code>
|
||
lists will then continue on to the first <code>a</code> list containing 5 and 10. In other
|
||
words, both lists will share the first list containing 5 and 10.</p>
|
||
<p>Trying to implement this scenario using our definition of <code>List</code> with <code>Box<T></code>
|
||
won’t work, as shown in Listing 15-17:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">enum List {
|
||
Cons(i32, Box<List>),
|
||
Nil,
|
||
}
|
||
|
||
use crate::List::{Cons, Nil};
|
||
|
||
fn main() {
|
||
let a = Cons(5,
|
||
Box::new(Cons(10,
|
||
Box::new(Nil))));
|
||
let b = Cons(3, Box::new(a));
|
||
let c = Cons(4, Box::new(a));
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-17: Demonstrating we’re not allowed to have
|
||
two lists using <code>Box<T></code> that try to share ownership of a third list</span></p>
|
||
<p>When we compile this code, we get this error:</p>
|
||
<pre><code class="language-text">error[E0382]: use of moved value: `a`
|
||
--> src/main.rs:13:30
|
||
|
|
||
12 | let b = Cons(3, Box::new(a));
|
||
| - value moved here
|
||
13 | let c = Cons(4, Box::new(a));
|
||
| ^ value used here after move
|
||
|
|
||
= note: move occurs because `a` has type `List`, which does not implement
|
||
the `Copy` trait
|
||
</code></pre>
|
||
<p>The <code>Cons</code> variants own the data they hold, so when we create the <code>b</code> list, <code>a</code>
|
||
is moved into <code>b</code> and <code>b</code> owns <code>a</code>. Then, when we try to use <code>a</code> again when
|
||
creating <code>c</code>, we’re not allowed to because <code>a</code> has been moved.</p>
|
||
<p>We could change the definition of <code>Cons</code> to hold references instead, but then
|
||
we would have to specify lifetime parameters. By specifying lifetime
|
||
parameters, we would be specifying that every element in the list will live at
|
||
least as long as the entire list. The borrow checker wouldn’t let us compile
|
||
<code>let a = Cons(10, &Nil);</code> for example, because the temporary <code>Nil</code> value would
|
||
be dropped before <code>a</code> could take a reference to it.</p>
|
||
<p>Instead, we’ll change our definition of <code>List</code> to use <code>Rc<T></code> in place of
|
||
<code>Box<T></code>, as shown in Listing 15-18. Each <code>Cons</code> variant will now hold a value
|
||
and an <code>Rc<T></code> pointing to a <code>List</code>. When we create <code>b</code>, instead of taking
|
||
ownership of <code>a</code>, we’ll clone the <code>Rc<List></code> that <code>a</code> is holding, thereby
|
||
increasing the number of references from one to two and letting <code>a</code> and <code>b</code>
|
||
share ownership of the data in that <code>Rc<List></code>. We’ll also clone <code>a</code> when
|
||
creating <code>c</code>, increasing the number of references from two to three. Every time
|
||
we call <code>Rc::clone</code>, the reference count to the data within the <code>Rc<List></code> will
|
||
increase, and the data won’t be cleaned up unless there are zero references to
|
||
it.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">enum List {
|
||
Cons(i32, Rc<List>),
|
||
Nil,
|
||
}
|
||
|
||
use crate::List::{Cons, Nil};
|
||
use std::rc::Rc;
|
||
|
||
fn main() {
|
||
let a = Rc::new(Cons(5, Rc::new(Cons(10, Rc::new(Nil)))));
|
||
let b = Cons(3, Rc::clone(&a));
|
||
let c = Cons(4, Rc::clone(&a));
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-18: A definition of <code>List</code> that uses
|
||
<code>Rc<T></code></span></p>
|
||
<p>We need to add a <code>use</code> statement to bring <code>Rc<T></code> into scope because it’s not
|
||
in the prelude. In <code>main</code>, we create the list holding 5 and 10 and store it in
|
||
a new <code>Rc<List></code> in <code>a</code>. Then when we create <code>b</code> and <code>c</code>, we call the
|
||
<code>Rc::clone</code> function and pass a reference to the <code>Rc<List></code> in <code>a</code> as an
|
||
argument.</p>
|
||
<p>We could have called <code>a.clone()</code> rather than <code>Rc::clone(&a)</code>, but Rust’s
|
||
convention is to use <code>Rc::clone</code> in this case. The implementation of
|
||
<code>Rc::clone</code> doesn’t make a deep copy of all the data like most types’
|
||
implementations of <code>clone</code> do. The call to <code>Rc::clone</code> only increments the
|
||
reference count, which doesn’t take much time. Deep copies of data can take a
|
||
lot of time. By using <code>Rc::clone</code> for reference counting, we can visually
|
||
distinguish between the deep-copy kinds of clones and the kinds of clones that
|
||
increase the reference count. When looking for performance problems in the
|
||
code, we only need to consider the deep-copy clones and can disregard calls to
|
||
<code>Rc::clone</code>.</p>
|
||
<h3><a class="header" href="#cloning-an-rct-increases-the-reference-count" id="cloning-an-rct-increases-the-reference-count">Cloning an <code>Rc<T></code> Increases the Reference Count</a></h3>
|
||
<p>Let’s change our working example in Listing 15-18 so we can see the reference
|
||
counts changing as we create and drop references to the <code>Rc<List></code> in <code>a</code>.</p>
|
||
<p>In Listing 15-19, we’ll change <code>main</code> so it has an inner scope around list <code>c</code>;
|
||
then we can see how the reference count changes when <code>c</code> goes out of scope.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">enum List {
|
||
</span><span class="boring"> Cons(i32, Rc<List>),
|
||
</span><span class="boring"> Nil,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">use crate::List::{Cons, Nil};
|
||
</span><span class="boring">use std::rc::Rc;
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let a = Rc::new(Cons(5, Rc::new(Cons(10, Rc::new(Nil)))));
|
||
println!("count after creating a = {}", Rc::strong_count(&a));
|
||
let b = Cons(3, Rc::clone(&a));
|
||
println!("count after creating b = {}", Rc::strong_count(&a));
|
||
{
|
||
let c = Cons(4, Rc::clone(&a));
|
||
println!("count after creating c = {}", Rc::strong_count(&a));
|
||
}
|
||
println!("count after c goes out of scope = {}", Rc::strong_count(&a));
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-19: Printing the reference count</span></p>
|
||
<p>At each point in the program where the reference count changes, we print the
|
||
reference count, which we can get by calling the <code>Rc::strong_count</code> function.
|
||
This function is named <code>strong_count</code> rather than <code>count</code> because the <code>Rc<T></code>
|
||
type also has a <code>weak_count</code>; we’ll see what <code>weak_count</code> is used for in the
|
||
<a href="ch15-06-reference-cycles.html#preventing-reference-cycles-turning-an-rct-into-a-weakt">“Preventing Reference Cycles: Turning an <code>Rc<T></code> into a
|
||
<code>Weak<T></code>”</a><!-- ignore --> section.</p>
|
||
<p>This code prints the following:</p>
|
||
<pre><code class="language-text">count after creating a = 1
|
||
count after creating b = 2
|
||
count after creating c = 3
|
||
count after c goes out of scope = 2
|
||
</code></pre>
|
||
<p>We can see that the <code>Rc<List></code> in <code>a</code> has an initial reference count of 1; then
|
||
each time we call <code>clone</code>, the count goes up by 1. When <code>c</code> goes out of scope,
|
||
the count goes down by 1. We don’t have to call a function to decrease the
|
||
reference count like we have to call <code>Rc::clone</code> to increase the reference
|
||
count: the implementation of the <code>Drop</code> trait decreases the reference count
|
||
automatically when an <code>Rc<T></code> value goes out of scope.</p>
|
||
<p>What we can’t see in this example is that when <code>b</code> and then <code>a</code> go out of scope
|
||
at the end of <code>main</code>, the count is then 0, and the <code>Rc<List></code> is cleaned up
|
||
completely at that point. Using <code>Rc<T></code> allows a single value to have
|
||
multiple owners, and the count ensures that the value remains valid as long as
|
||
any of the owners still exist.</p>
|
||
<p>Via immutable references, <code>Rc<T></code> allows you to share data between multiple
|
||
parts of your program for reading only. If <code>Rc<T></code> allowed you to have multiple
|
||
mutable references too, you might violate one of the borrowing rules discussed
|
||
in Chapter 4: multiple mutable borrows to the same place can cause data races
|
||
and inconsistencies. But being able to mutate data is very useful! In the next
|
||
section, we’ll discuss the interior mutability pattern and the <code>RefCell<T></code>
|
||
type that you can use in conjunction with an <code>Rc<T></code> to work with this
|
||
immutability restriction.</p>
|
||
<h2><a class="header" href="#refcellt-and-the-interior-mutability-pattern" id="refcellt-and-the-interior-mutability-pattern"><code>RefCell<T></code> and the Interior Mutability Pattern</a></h2>
|
||
<p><em>Interior mutability</em> is a design pattern in Rust that allows you to mutate
|
||
data even when there are immutable references to that data; normally, this
|
||
action is disallowed by the borrowing rules. To mutate data, the pattern uses
|
||
<code>unsafe</code> code inside a data structure to bend Rust’s usual rules that govern
|
||
mutation and borrowing. We haven’t yet covered unsafe code; we will in Chapter
|
||
19. We can use types that use the interior mutability pattern when we can
|
||
ensure that the borrowing rules will be followed at runtime, even though the
|
||
compiler can’t guarantee that. The <code>unsafe</code> code involved is then wrapped in a
|
||
safe API, and the outer type is still immutable.</p>
|
||
<p>Let’s explore this concept by looking at the <code>RefCell<T></code> type that follows the
|
||
interior mutability pattern.</p>
|
||
<h3><a class="header" href="#enforcing-borrowing-rules-at-runtime-with-refcellt" id="enforcing-borrowing-rules-at-runtime-with-refcellt">Enforcing Borrowing Rules at Runtime with <code>RefCell<T></code></a></h3>
|
||
<p>Unlike <code>Rc<T></code>, the <code>RefCell<T></code> type represents single ownership over the data
|
||
it holds. So, what makes <code>RefCell<T></code> different from a type like <code>Box<T></code>?
|
||
Recall the borrowing rules you learned in Chapter 4:</p>
|
||
<ul>
|
||
<li>At any given time, you can have <em>either</em> (but not both of) one mutable
|
||
reference or any number of immutable references.</li>
|
||
<li>References must always be valid.</li>
|
||
</ul>
|
||
<p>With references and <code>Box<T></code>, the borrowing rules’ invariants are enforced at
|
||
compile time. With <code>RefCell<T></code>, these invariants are enforced <em>at runtime</em>.
|
||
With references, if you break these rules, you’ll get a compiler error. With
|
||
<code>RefCell<T></code>, if you break these rules, your program will panic and exit.</p>
|
||
<p>The advantages of checking the borrowing rules at compile time are that errors
|
||
will be caught sooner in the development process, and there is no impact on
|
||
runtime performance because all the analysis is completed beforehand. For those
|
||
reasons, checking the borrowing rules at compile time is the best choice in the
|
||
majority of cases, which is why this is Rust’s default.</p>
|
||
<p>The advantage of checking the borrowing rules at runtime instead is that
|
||
certain memory-safe scenarios are then allowed, whereas they are disallowed by
|
||
the compile-time checks. Static analysis, like the Rust compiler, is inherently
|
||
conservative. Some properties of code are impossible to detect by analyzing the
|
||
code: the most famous example is the Halting Problem, which is beyond the scope
|
||
of this book but is an interesting topic to research.</p>
|
||
<p>Because some analysis is impossible, if the Rust compiler can’t be sure the
|
||
code complies with the ownership rules, it might reject a correct program; in
|
||
this way, it’s conservative. If Rust accepted an incorrect program, users
|
||
wouldn’t be able to trust in the guarantees Rust makes. However, if Rust
|
||
rejects a correct program, the programmer will be inconvenienced, but nothing
|
||
catastrophic can occur. The <code>RefCell<T></code> type is useful when you’re sure your
|
||
code follows the borrowing rules but the compiler is unable to understand and
|
||
guarantee that.</p>
|
||
<p>Similar to <code>Rc<T></code>, <code>RefCell<T></code> is only for use in single-threaded scenarios
|
||
and will give you a compile-time error if you try using it in a multithreaded
|
||
context. We’ll talk about how to get the functionality of <code>RefCell<T></code> in a
|
||
multithreaded program in Chapter 16.</p>
|
||
<p>Here is a recap of the reasons to choose <code>Box<T></code>, <code>Rc<T></code>, or <code>RefCell<T></code>:</p>
|
||
<ul>
|
||
<li><code>Rc<T></code> enables multiple owners of the same data; <code>Box<T></code> and <code>RefCell<T></code>
|
||
have single owners.</li>
|
||
<li><code>Box<T></code> allows immutable or mutable borrows checked at compile time; <code>Rc<T></code>
|
||
allows only immutable borrows checked at compile time; <code>RefCell<T></code> allows
|
||
immutable or mutable borrows checked at runtime.</li>
|
||
<li>Because <code>RefCell<T></code> allows mutable borrows checked at runtime, you can
|
||
mutate the value inside the <code>RefCell<T></code> even when the <code>RefCell<T></code> is
|
||
immutable.</li>
|
||
</ul>
|
||
<p>Mutating the value inside an immutable value is the <em>interior mutability</em>
|
||
pattern. Let’s look at a situation in which interior mutability is useful and
|
||
examine how it’s possible.</p>
|
||
<h3><a class="header" href="#interior-mutability-a-mutable-borrow-to-an-immutable-value" id="interior-mutability-a-mutable-borrow-to-an-immutable-value">Interior Mutability: A Mutable Borrow to an Immutable Value</a></h3>
|
||
<p>A consequence of the borrowing rules is that when you have an immutable value,
|
||
you can’t borrow it mutably. For example, this code won’t compile:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let x = 5;
|
||
let y = &mut x;
|
||
}
|
||
</code></pre>
|
||
<p>If you tried to compile this code, you’d get the following error:</p>
|
||
<pre><code class="language-text">error[E0596]: cannot borrow immutable local variable `x` as mutable
|
||
--> src/main.rs:3:18
|
||
|
|
||
2 | let x = 5;
|
||
| - consider changing this to `mut x`
|
||
3 | let y = &mut x;
|
||
| ^ cannot borrow mutably
|
||
</code></pre>
|
||
<p>However, there are situations in which it would be useful for a value to mutate
|
||
itself in its methods but appear immutable to other code. Code outside the
|
||
value’s methods would not be able to mutate the value. Using <code>RefCell<T></code> is
|
||
one way to get the ability to have interior mutability. But <code>RefCell<T></code>
|
||
doesn’t get around the borrowing rules completely: the borrow checker in the
|
||
compiler allows this interior mutability, and the borrowing rules are checked
|
||
at runtime instead. If you violate the rules, you’ll get a <code>panic!</code> instead of
|
||
a compiler error.</p>
|
||
<p>Let’s work through a practical example where we can use <code>RefCell<T></code> to mutate
|
||
an immutable value and see why that is useful.</p>
|
||
<h4><a class="header" href="#a-use-case-for-interior-mutability-mock-objects" id="a-use-case-for-interior-mutability-mock-objects">A Use Case for Interior Mutability: Mock Objects</a></h4>
|
||
<p>A <em>test double</em> is the general programming concept for a type used in place of
|
||
another type during testing. <em>Mock objects</em> are specific types of test doubles
|
||
that record what happens during a test so you can assert that the correct
|
||
actions took place.</p>
|
||
<p>Rust doesn’t have objects in the same sense as other languages have objects,
|
||
and Rust doesn’t have mock object functionality built into the standard library
|
||
as some other languages do. However, you can definitely create a struct that
|
||
will serve the same purposes as a mock object.</p>
|
||
<p>Here’s the scenario we’ll test: we’ll create a library that tracks a value
|
||
against a maximum value and sends messages based on how close to the maximum
|
||
value the current value is. This library could be used to keep track of a
|
||
user’s quota for the number of API calls they’re allowed to make, for example.</p>
|
||
<p>Our library will only provide the functionality of tracking how close to the
|
||
maximum a value is and what the messages should be at what times. Applications
|
||
that use our library will be expected to provide the mechanism for sending the
|
||
messages: the application could put a message in the application, send an
|
||
email, send a text message, or something else. The library doesn’t need to know
|
||
that detail. All it needs is something that implements a trait we’ll provide
|
||
called <code>Messenger</code>. Listing 15-20 shows the library code:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Messenger {
|
||
fn send(&self, msg: &str);
|
||
}
|
||
|
||
pub struct LimitTracker<'a, T: Messenger> {
|
||
messenger: &'a T,
|
||
value: usize,
|
||
max: usize,
|
||
}
|
||
|
||
impl<'a, T> LimitTracker<'a, T>
|
||
where T: Messenger {
|
||
pub fn new(messenger: &T, max: usize) -> LimitTracker<T> {
|
||
LimitTracker {
|
||
messenger,
|
||
value: 0,
|
||
max,
|
||
}
|
||
}
|
||
|
||
pub fn set_value(&mut self, value: usize) {
|
||
self.value = value;
|
||
|
||
let percentage_of_max = self.value as f64 / self.max as f64;
|
||
|
||
if percentage_of_max >= 1.0 {
|
||
self.messenger.send("Error: You are over your quota!");
|
||
} else if percentage_of_max >= 0.9 {
|
||
self.messenger.send("Urgent warning: You've used up over 90% of your quota!");
|
||
} else if percentage_of_max >= 0.75 {
|
||
self.messenger.send("Warning: You've used up over 75% of your quota!");
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 15-20: A library to keep track of how close a
|
||
value is to a maximum value and warn when the value is at certain levels</span></p>
|
||
<p>One important part of this code is that the <code>Messenger</code> trait has one method
|
||
called <code>send</code> that takes an immutable reference to <code>self</code> and the text of the
|
||
message. This is the interface our mock object needs to have. The other
|
||
important part is that we want to test the behavior of the <code>set_value</code> method
|
||
on the <code>LimitTracker</code>. We can change what we pass in for the <code>value</code> parameter,
|
||
but <code>set_value</code> doesn’t return anything for us to make assertions on. We want
|
||
to be able to say that if we create a <code>LimitTracker</code> with something that
|
||
implements the <code>Messenger</code> trait and a particular value for <code>max</code>, when we pass
|
||
different numbers for <code>value</code>, the messenger is told to send the appropriate
|
||
messages.</p>
|
||
<p>We need a mock object that, instead of sending an email or text message when we
|
||
call <code>send</code>, will only keep track of the messages it’s told to send. We can
|
||
create a new instance of the mock object, create a <code>LimitTracker</code> that uses the
|
||
mock object, call the <code>set_value</code> method on <code>LimitTracker</code>, and then check that
|
||
the mock object has the messages we expect. Listing 15-21 shows an attempt to
|
||
implement a mock object to do just that, but the borrow checker won’t allow it:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
struct MockMessenger {
|
||
sent_messages: Vec<String>,
|
||
}
|
||
|
||
impl MockMessenger {
|
||
fn new() -> MockMessenger {
|
||
MockMessenger { sent_messages: vec![] }
|
||
}
|
||
}
|
||
|
||
impl Messenger for MockMessenger {
|
||
fn send(&self, message: &str) {
|
||
self.sent_messages.push(String::from(message));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn it_sends_an_over_75_percent_warning_message() {
|
||
let mock_messenger = MockMessenger::new();
|
||
let mut limit_tracker = LimitTracker::new(&mock_messenger, 100);
|
||
|
||
limit_tracker.set_value(80);
|
||
|
||
assert_eq!(mock_messenger.sent_messages.len(), 1);
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-21: An attempt to implement a <code>MockMessenger</code>
|
||
that isn’t allowed by the borrow checker</span></p>
|
||
<p>This test code defines a <code>MockMessenger</code> struct that has a <code>sent_messages</code>
|
||
field with a <code>Vec</code> of <code>String</code> values to keep track of the messages it’s told
|
||
to send. We also define an associated function <code>new</code> to make it convenient to
|
||
create new <code>MockMessenger</code> values that start with an empty list of messages. We
|
||
then implement the <code>Messenger</code> trait for <code>MockMessenger</code> so we can give a
|
||
<code>MockMessenger</code> to a <code>LimitTracker</code>. In the definition of the <code>send</code> method, we
|
||
take the message passed in as a parameter and store it in the <code>MockMessenger</code>
|
||
list of <code>sent_messages</code>.</p>
|
||
<p>In the test, we’re testing what happens when the <code>LimitTracker</code> is told to set
|
||
<code>value</code> to something that is more than 75 percent of the <code>max</code> value. First, we
|
||
create a new <code>MockMessenger</code>, which will start with an empty list of messages.
|
||
Then we create a new <code>LimitTracker</code> and give it a reference to the new
|
||
<code>MockMessenger</code> and a <code>max</code> value of 100. We call the <code>set_value</code> method on the
|
||
<code>LimitTracker</code> with a value of 80, which is more than 75 percent of 100. Then
|
||
we assert that the list of messages that the <code>MockMessenger</code> is keeping track
|
||
of should now have one message in it.</p>
|
||
<p>However, there’s one problem with this test, as shown here:</p>
|
||
<pre><code class="language-text">error[E0596]: cannot borrow immutable field `self.sent_messages` as mutable
|
||
--> src/lib.rs:52:13
|
||
|
|
||
51 | fn send(&self, message: &str) {
|
||
| ----- use `&mut self` here to make mutable
|
||
52 | self.sent_messages.push(String::from(message));
|
||
| ^^^^^^^^^^^^^^^^^^ cannot mutably borrow immutable field
|
||
</code></pre>
|
||
<p>We can’t modify the <code>MockMessenger</code> to keep track of the messages, because the
|
||
<code>send</code> method takes an immutable reference to <code>self</code>. We also can’t take the
|
||
suggestion from the error text to use <code>&mut self</code> instead, because then the
|
||
signature of <code>send</code> wouldn’t match the signature in the <code>Messenger</code> trait
|
||
definition (feel free to try and see what error message you get).</p>
|
||
<p>This is a situation in which interior mutability can help! We’ll store the
|
||
<code>sent_messages</code> within a <code>RefCell<T></code>, and then the <code>send</code> message will be
|
||
able to modify <code>sent_messages</code> to store the messages we’ve seen. Listing 15-22
|
||
shows what that looks like:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">pub trait Messenger {
|
||
</span><span class="boring"> fn send(&self, msg: &str);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub struct LimitTracker<'a, T: Messenger> {
|
||
</span><span class="boring"> messenger: &'a T,
|
||
</span><span class="boring"> value: usize,
|
||
</span><span class="boring"> max: usize,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl<'a, T> LimitTracker<'a, T>
|
||
</span><span class="boring"> where T: Messenger {
|
||
</span><span class="boring"> pub fn new(messenger: &T, max: usize) -> LimitTracker<T> {
|
||
</span><span class="boring"> LimitTracker {
|
||
</span><span class="boring"> messenger,
|
||
</span><span class="boring"> value: 0,
|
||
</span><span class="boring"> max,
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span><span class="boring"> pub fn set_value(&mut self, value: usize) {
|
||
</span><span class="boring"> self.value = value;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> let percentage_of_max = self.value as f64 / self.max as f64;
|
||
</span><span class="boring">
|
||
</span><span class="boring"> if percentage_of_max >= 1.0 {
|
||
</span><span class="boring"> self.messenger.send("Error: You are over your quota!");
|
||
</span><span class="boring"> } else if percentage_of_max >= 0.9 {
|
||
</span><span class="boring"> self.messenger.send("Urgent warning: You've used up over 90% of your quota!");
|
||
</span><span class="boring"> } else if percentage_of_max >= 0.75 {
|
||
</span><span class="boring"> self.messenger.send("Warning: You've used up over 75% of your quota!");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use std::cell::RefCell;
|
||
|
||
struct MockMessenger {
|
||
sent_messages: RefCell<Vec<String>>,
|
||
}
|
||
|
||
impl MockMessenger {
|
||
fn new() -> MockMessenger {
|
||
MockMessenger { sent_messages: RefCell::new(vec![]) }
|
||
}
|
||
}
|
||
|
||
impl Messenger for MockMessenger {
|
||
fn send(&self, message: &str) {
|
||
self.sent_messages.borrow_mut().push(String::from(message));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn it_sends_an_over_75_percent_warning_message() {
|
||
// --snip--
|
||
<span class="boring"> let mock_messenger = MockMessenger::new();
|
||
</span><span class="boring"> let mut limit_tracker = LimitTracker::new(&mock_messenger, 100);
|
||
</span><span class="boring"> limit_tracker.set_value(75);
|
||
</span>
|
||
assert_eq!(mock_messenger.sent_messages.borrow().len(), 1);
|
||
}
|
||
}
|
||
<span class="boring">fn main() {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 15-22: Using <code>RefCell<T></code> to mutate an inner
|
||
value while the outer value is considered immutable</span></p>
|
||
<p>The <code>sent_messages</code> field is now of type <code>RefCell<Vec<String>></code> instead of
|
||
<code>Vec<String></code>. In the <code>new</code> function, we create a new <code>RefCell<Vec<String>></code>
|
||
instance around the empty vector.</p>
|
||
<p>For the implementation of the <code>send</code> method, the first parameter is still an
|
||
immutable borrow of <code>self</code>, which matches the trait definition. We call
|
||
<code>borrow_mut</code> on the <code>RefCell<Vec<String>></code> in <code>self.sent_messages</code> to get a
|
||
mutable reference to the value inside the <code>RefCell<Vec<String>></code>, which is
|
||
the vector. Then we can call <code>push</code> on the mutable reference to the vector to
|
||
keep track of the messages sent during the test.</p>
|
||
<p>The last change we have to make is in the assertion: to see how many items are
|
||
in the inner vector, we call <code>borrow</code> on the <code>RefCell<Vec<String>></code> to get an
|
||
immutable reference to the vector.</p>
|
||
<p>Now that you’ve seen how to use <code>RefCell<T></code>, let’s dig into how it works!</p>
|
||
<h4><a class="header" href="#keeping-track-of-borrows-at-runtime-with-refcellt" id="keeping-track-of-borrows-at-runtime-with-refcellt">Keeping Track of Borrows at Runtime with <code>RefCell<T></code></a></h4>
|
||
<p>When creating immutable and mutable references, we use the <code>&</code> and <code>&mut</code>
|
||
syntax, respectively. With <code>RefCell<T></code>, we use the <code>borrow</code> and <code>borrow_mut</code>
|
||
methods, which are part of the safe API that belongs to <code>RefCell<T></code>. The
|
||
<code>borrow</code> method returns the smart pointer type <code>Ref<T></code>, and <code>borrow_mut</code>
|
||
returns the smart pointer type <code>RefMut<T></code>. Both types implement <code>Deref</code>, so we
|
||
can treat them like regular references.</p>
|
||
<p>The <code>RefCell<T></code> keeps track of how many <code>Ref<T></code> and <code>RefMut<T></code> smart
|
||
pointers are currently active. Every time we call <code>borrow</code>, the <code>RefCell<T></code>
|
||
increases its count of how many immutable borrows are active. When a <code>Ref<T></code>
|
||
value goes out of scope, the count of immutable borrows goes down by one. Just
|
||
like the compile-time borrowing rules, <code>RefCell<T></code> lets us have many immutable
|
||
borrows or one mutable borrow at any point in time.</p>
|
||
<p>If we try to violate these rules, rather than getting a compiler error as we
|
||
would with references, the implementation of <code>RefCell<T></code> will panic at
|
||
runtime. Listing 15-23 shows a modification of the implementation of <code>send</code> in
|
||
Listing 15-22. We’re deliberately trying to create two mutable borrows active
|
||
for the same scope to illustrate that <code>RefCell<T></code> prevents us from doing this
|
||
at runtime.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore panics">impl Messenger for MockMessenger {
|
||
fn send(&self, message: &str) {
|
||
let mut one_borrow = self.sent_messages.borrow_mut();
|
||
let mut two_borrow = self.sent_messages.borrow_mut();
|
||
|
||
one_borrow.push(String::from(message));
|
||
two_borrow.push(String::from(message));
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 15-23: Creating two mutable references in the
|
||
same scope to see that <code>RefCell<T></code> will panic</span></p>
|
||
<p>We create a variable <code>one_borrow</code> for the <code>RefMut<T></code> smart pointer returned
|
||
from <code>borrow_mut</code>. Then we create another mutable borrow in the same way in the
|
||
variable <code>two_borrow</code>. This makes two mutable references in the same scope,
|
||
which isn’t allowed. When we run the tests for our library, the code in Listing
|
||
15-23 will compile without any errors, but the test will fail:</p>
|
||
<pre><code class="language-text">---- tests::it_sends_an_over_75_percent_warning_message stdout ----
|
||
thread 'tests::it_sends_an_over_75_percent_warning_message' panicked at
|
||
'already borrowed: BorrowMutError', src/libcore/result.rs:906:4
|
||
note: Run with `RUST_BACKTRACE=1` for a backtrace.
|
||
</code></pre>
|
||
<p>Notice that the code panicked with the message <code>already borrowed: BorrowMutError</code>. This is how <code>RefCell<T></code> handles violations of the borrowing
|
||
rules at runtime.</p>
|
||
<p>Catching borrowing errors at runtime rather than compile time means that you
|
||
would find a mistake in your code later in the development process and possibly
|
||
not until your code was deployed to production. Also, your code would incur a
|
||
small runtime performance penalty as a result of keeping track of the borrows
|
||
at runtime rather than compile time. However, using <code>RefCell<T></code> makes it
|
||
possible to write a mock object that can modify itself to keep track of the
|
||
messages it has seen while you’re using it in a context where only immutable
|
||
values are allowed. You can use <code>RefCell<T></code> despite its trade-offs to get more
|
||
functionality than regular references provide.</p>
|
||
<h3><a class="header" href="#having-multiple-owners-of-mutable-data-by-combining-rct-and-refcellt" id="having-multiple-owners-of-mutable-data-by-combining-rct-and-refcellt">Having Multiple Owners of Mutable Data by Combining <code>Rc<T></code> and <code>RefCell<T></code></a></h3>
|
||
<p>A common way to use <code>RefCell<T></code> is in combination with <code>Rc<T></code>. Recall that
|
||
<code>Rc<T></code> lets you have multiple owners of some data, but it only gives immutable
|
||
access to that data. If you have an <code>Rc<T></code> that holds a <code>RefCell<T></code>, you can
|
||
get a value that can have multiple owners <em>and</em> that you can mutate!</p>
|
||
<p>For example, recall the cons list example in Listing 15-18 where we used
|
||
<code>Rc<T></code> to allow multiple lists to share ownership of another list. Because
|
||
<code>Rc<T></code> holds only immutable values, we can’t change any of the values in the
|
||
list once we’ve created them. Let’s add in <code>RefCell<T></code> to gain the ability to
|
||
change the values in the lists. Listing 15-24 shows that by using a
|
||
<code>RefCell<T></code> in the <code>Cons</code> definition, we can modify the value stored in all
|
||
the lists:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">#[derive(Debug)]
|
||
enum List {
|
||
Cons(Rc<RefCell<i32>>, Rc<List>),
|
||
Nil,
|
||
}
|
||
|
||
use crate::List::{Cons, Nil};
|
||
use std::rc::Rc;
|
||
use std::cell::RefCell;
|
||
|
||
fn main() {
|
||
let value = Rc::new(RefCell::new(5));
|
||
|
||
let a = Rc::new(Cons(Rc::clone(&value), Rc::new(Nil)));
|
||
|
||
let b = Cons(Rc::new(RefCell::new(6)), Rc::clone(&a));
|
||
let c = Cons(Rc::new(RefCell::new(10)), Rc::clone(&a));
|
||
|
||
*value.borrow_mut() += 10;
|
||
|
||
println!("a after = {:?}", a);
|
||
println!("b after = {:?}", b);
|
||
println!("c after = {:?}", c);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-24: Using <code>Rc<RefCell<i32>></code> to create a
|
||
<code>List</code> that we can mutate</span></p>
|
||
<p>We create a value that is an instance of <code>Rc<RefCell<i32>></code> and store it in a
|
||
variable named <code>value</code> so we can access it directly later. Then we create a
|
||
<code>List</code> in <code>a</code> with a <code>Cons</code> variant that holds <code>value</code>. We need to clone
|
||
<code>value</code> so both <code>a</code> and <code>value</code> have ownership of the inner <code>5</code> value rather
|
||
than transferring ownership from <code>value</code> to <code>a</code> or having <code>a</code> borrow from
|
||
<code>value</code>.</p>
|
||
<p>We wrap the list <code>a</code> in an <code>Rc<T></code> so when we create lists <code>b</code> and <code>c</code>, they
|
||
can both refer to <code>a</code>, which is what we did in Listing 15-18.</p>
|
||
<p>After we’ve created the lists in <code>a</code>, <code>b</code>, and <code>c</code>, we add 10 to the value in
|
||
<code>value</code>. We do this by calling <code>borrow_mut</code> on <code>value</code>, which uses the
|
||
automatic dereferencing feature we discussed in Chapter 5 (see the section
|
||
<a href="ch05-03-method-syntax.html#wheres-the---operator">“Where’s the <code>-></code> Operator?”</a><!-- ignore -->) to
|
||
dereference the <code>Rc<T></code> to the inner <code>RefCell<T></code> value. The <code>borrow_mut</code>
|
||
method returns a <code>RefMut<T></code> smart pointer, and we use the dereference operator
|
||
on it and change the inner value.</p>
|
||
<p>When we print <code>a</code>, <code>b</code>, and <code>c</code>, we can see that they all have the modified
|
||
value of 15 rather than 5:</p>
|
||
<pre><code class="language-text">a after = Cons(RefCell { value: 15 }, Nil)
|
||
b after = Cons(RefCell { value: 6 }, Cons(RefCell { value: 15 }, Nil))
|
||
c after = Cons(RefCell { value: 10 }, Cons(RefCell { value: 15 }, Nil))
|
||
</code></pre>
|
||
<p>This technique is pretty neat! By using <code>RefCell<T></code>, we have an outwardly
|
||
immutable <code>List</code> value. But we can use the methods on <code>RefCell<T></code> that provide
|
||
access to its interior mutability so we can modify our data when we need to.
|
||
The runtime checks of the borrowing rules protect us from data races, and it’s
|
||
sometimes worth trading a bit of speed for this flexibility in our data
|
||
structures.</p>
|
||
<p>The standard library has other types that provide interior mutability, such as
|
||
<code>Cell<T></code>, which is similar except that instead of giving references to the
|
||
inner value, the value is copied in and out of the <code>Cell<T></code>. There’s also
|
||
<code>Mutex<T></code>, which offers interior mutability that’s safe to use across threads;
|
||
we’ll discuss its use in Chapter 16. Check out the standard library docs for
|
||
more details on the differences between these types.</p>
|
||
<h2><a class="header" href="#reference-cycles-can-leak-memory" id="reference-cycles-can-leak-memory">Reference Cycles Can Leak Memory</a></h2>
|
||
<p>Rust’s memory safety guarantees make it difficult, but not impossible, to
|
||
accidentally create memory that is never cleaned up (known as a <em>memory leak</em>).
|
||
Preventing memory leaks entirely is not one of Rust’s guarantees in the same
|
||
way that disallowing data races at compile time is, meaning memory leaks are
|
||
memory safe in Rust. We can see that Rust allows memory leaks by using <code>Rc<T></code>
|
||
and <code>RefCell<T></code>: it’s possible to create references where items refer to each
|
||
other in a cycle. This creates memory leaks because the reference count of each
|
||
item in the cycle will never reach 0, and the values will never be dropped.</p>
|
||
<h3><a class="header" href="#creating-a-reference-cycle" id="creating-a-reference-cycle">Creating a Reference Cycle</a></h3>
|
||
<p>Let’s look at how a reference cycle might happen and how to prevent it,
|
||
starting with the definition of the <code>List</code> enum and a <code>tail</code> method in Listing
|
||
15-25:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<!-- Hidden fn main is here to disable the automatic wrapping in fn main that
|
||
doc tests do; the `use List` fails if this listing is put within a main -->
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">fn main() {}
|
||
</span>use std::rc::Rc;
|
||
use std::cell::RefCell;
|
||
use crate::List::{Cons, Nil};
|
||
|
||
#[derive(Debug)]
|
||
enum List {
|
||
Cons(i32, RefCell<Rc<List>>),
|
||
Nil,
|
||
}
|
||
|
||
impl List {
|
||
fn tail(&self) -> Option<&RefCell<Rc<List>>> {
|
||
match self {
|
||
Cons(_, item) => Some(item),
|
||
Nil => None,
|
||
}
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-25: A cons list definition that holds a
|
||
<code>RefCell<T></code> so we can modify what a <code>Cons</code> variant is referring to</span></p>
|
||
<p>We’re using another variation of the <code>List</code> definition from Listing 15-5. The
|
||
second element in the <code>Cons</code> variant is now <code>RefCell<Rc<List>></code>, meaning that
|
||
instead of having the ability to modify the <code>i32</code> value as we did in Listing
|
||
15-24, we want to modify which <code>List</code> value a <code>Cons</code> variant is pointing to.
|
||
We’re also adding a <code>tail</code> method to make it convenient for us to access the
|
||
second item if we have a <code>Cons</code> variant.</p>
|
||
<p>In Listing 15-26, we’re adding a <code>main</code> function that uses the definitions in
|
||
Listing 15-25. This code creates a list in <code>a</code> and a list in <code>b</code> that points to
|
||
the list in <code>a</code>. Then it modifies the list in <code>a</code> to point to <code>b</code>, creating a
|
||
reference cycle. There are <code>println!</code> statements along the way to show what the
|
||
reference counts are at various points in this process.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use crate::List::{Cons, Nil};
|
||
</span><span class="boring">use std::rc::Rc;
|
||
</span><span class="boring">use std::cell::RefCell;
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">enum List {
|
||
</span><span class="boring"> Cons(i32, RefCell<Rc<List>>),
|
||
</span><span class="boring"> Nil,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl List {
|
||
</span><span class="boring"> fn tail(&self) -> Option<&RefCell<Rc<List>>> {
|
||
</span><span class="boring"> match self {
|
||
</span><span class="boring"> Cons(_, item) => Some(item),
|
||
</span><span class="boring"> Nil => None,
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let a = Rc::new(Cons(5, RefCell::new(Rc::new(Nil))));
|
||
|
||
println!("a initial rc count = {}", Rc::strong_count(&a));
|
||
println!("a next item = {:?}", a.tail());
|
||
|
||
let b = Rc::new(Cons(10, RefCell::new(Rc::clone(&a))));
|
||
|
||
println!("a rc count after b creation = {}", Rc::strong_count(&a));
|
||
println!("b initial rc count = {}", Rc::strong_count(&b));
|
||
println!("b next item = {:?}", b.tail());
|
||
|
||
if let Some(link) = a.tail() {
|
||
*link.borrow_mut() = Rc::clone(&b);
|
||
}
|
||
|
||
println!("b rc count after changing a = {}", Rc::strong_count(&b));
|
||
println!("a rc count after changing a = {}", Rc::strong_count(&a));
|
||
|
||
// Uncomment the next line to see that we have a cycle;
|
||
// it will overflow the stack
|
||
// println!("a next item = {:?}", a.tail());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-26: Creating a reference cycle of two <code>List</code>
|
||
values pointing to each other</span></p>
|
||
<p>We create an <code>Rc<List></code> instance holding a <code>List</code> value in the variable <code>a</code>
|
||
with an initial list of <code>5, Nil</code>. We then create an <code>Rc<List></code> instance
|
||
holding another <code>List</code> value in the variable <code>b</code> that contains the value 10 and
|
||
points to the list in <code>a</code>.</p>
|
||
<p>We modify <code>a</code> so it points to <code>b</code> instead of <code>Nil</code>, creating a cycle. We
|
||
do that by using the <code>tail</code> method to get a reference to the
|
||
<code>RefCell<Rc<List>></code> in <code>a</code>, which we put in the variable <code>link</code>. Then we use
|
||
the <code>borrow_mut</code> method on the <code>RefCell<Rc<List>></code> to change the value inside
|
||
from an <code>Rc<List></code> that holds a <code>Nil</code> value to the <code>Rc<List></code> in <code>b</code>.</p>
|
||
<p>When we run this code, keeping the last <code>println!</code> commented out for the
|
||
moment, we’ll get this output:</p>
|
||
<pre><code class="language-text">a initial rc count = 1
|
||
a next item = Some(RefCell { value: Nil })
|
||
a rc count after b creation = 2
|
||
b initial rc count = 1
|
||
b next item = Some(RefCell { value: Cons(5, RefCell { value: Nil }) })
|
||
b rc count after changing a = 2
|
||
a rc count after changing a = 2
|
||
</code></pre>
|
||
<p>The reference count of the <code>Rc<List></code> instances in both <code>a</code> and <code>b</code> are 2
|
||
after we change the list in <code>a</code> to point to <code>b</code>. At the end of <code>main</code>, Rust
|
||
will try to drop <code>b</code> first, which will decrease the count of the <code>Rc<List></code>
|
||
instance in <code>b</code> by 1.</p>
|
||
<p>However, because <code>a</code> is still referencing the <code>Rc<List></code> that was in <code>b</code>, that
|
||
<code>Rc<List></code> has a count of 1 rather than 0, so the memory the <code>Rc<List></code> has on
|
||
the heap won’t be dropped. The memory will just sit there with a count of 1,
|
||
forever. To visualize this reference cycle, we’ve created a diagram in Figure
|
||
15-4.</p>
|
||
<img alt="Reference cycle of lists" src="img/trpl15-04.svg" class="center" />
|
||
<p><span class="caption">Figure 15-4: A reference cycle of lists <code>a</code> and <code>b</code>
|
||
pointing to each other</span></p>
|
||
<p>If you uncomment the last <code>println!</code> and run the program, Rust will try to
|
||
print this cycle with <code>a</code> pointing to <code>b</code> pointing to <code>a</code> and so forth until it
|
||
overflows the stack.</p>
|
||
<p>In this case, right after we create the reference cycle, the program ends. The
|
||
consequences of this cycle aren’t very dire. However, if a more complex program
|
||
allocated lots of memory in a cycle and held onto it for a long time, the
|
||
program would use more memory than it needed and might overwhelm the system,
|
||
causing it to run out of available memory.</p>
|
||
<p>Creating reference cycles is not easily done, but it’s not impossible either.
|
||
If you have <code>RefCell<T></code> values that contain <code>Rc<T></code> values or similar nested
|
||
combinations of types with interior mutability and reference counting, you must
|
||
ensure that you don’t create cycles; you can’t rely on Rust to catch them.
|
||
Creating a reference cycle would be a logic bug in your program that you should
|
||
use automated tests, code reviews, and other software development practices to
|
||
minimize.</p>
|
||
<p>Another solution for avoiding reference cycles is reorganizing your data
|
||
structures so that some references express ownership and some references don’t.
|
||
As a result, you can have cycles made up of some ownership relationships and
|
||
some non-ownership relationships, and only the ownership relationships affect
|
||
whether or not a value can be dropped. In Listing 15-25, we always want <code>Cons</code>
|
||
variants to own their list, so reorganizing the data structure isn’t possible.
|
||
Let’s look at an example using graphs made up of parent nodes and child nodes
|
||
to see when non-ownership relationships are an appropriate way to prevent
|
||
reference cycles.</p>
|
||
<h3><a class="header" href="#preventing-reference-cycles-turning-an-rct-into-a-weakt" id="preventing-reference-cycles-turning-an-rct-into-a-weakt">Preventing Reference Cycles: Turning an <code>Rc<T></code> into a <code>Weak<T></code></a></h3>
|
||
<p>So far, we’ve demonstrated that calling <code>Rc::clone</code> increases the
|
||
<code>strong_count</code> of an <code>Rc<T></code> instance, and an <code>Rc<T></code> instance is only cleaned
|
||
up if its <code>strong_count</code> is 0. You can also create a <em>weak reference</em> to the
|
||
value within an <code>Rc<T></code> instance by calling <code>Rc::downgrade</code> and passing a
|
||
reference to the <code>Rc<T></code>. When you call <code>Rc::downgrade</code>, you get a smart
|
||
pointer of type <code>Weak<T></code>. Instead of increasing the <code>strong_count</code> in the
|
||
<code>Rc<T></code> instance by 1, calling <code>Rc::downgrade</code> increases the <code>weak_count</code> by 1.
|
||
The <code>Rc<T></code> type uses <code>weak_count</code> to keep track of how many <code>Weak<T></code>
|
||
references exist, similar to <code>strong_count</code>. The difference is the <code>weak_count</code>
|
||
doesn’t need to be 0 for the <code>Rc<T></code> instance to be cleaned up.</p>
|
||
<p>Strong references are how you can share ownership of an <code>Rc<T></code> instance. Weak
|
||
references don’t express an ownership relationship. They won’t cause a
|
||
reference cycle because any cycle involving some weak references will be broken
|
||
once the strong reference count of values involved is 0.</p>
|
||
<p>Because the value that <code>Weak<T></code> references might have been dropped, to do
|
||
anything with the value that a <code>Weak<T></code> is pointing to, you must make sure the
|
||
value still exists. Do this by calling the <code>upgrade</code> method on a <code>Weak<T></code>
|
||
instance, which will return an <code>Option<Rc<T>></code>. You’ll get a result of <code>Some</code>
|
||
if the <code>Rc<T></code> value has not been dropped yet and a result of <code>None</code> if the
|
||
<code>Rc<T></code> value has been dropped. Because <code>upgrade</code> returns an <code>Option<T></code>, Rust
|
||
will ensure that the <code>Some</code> case and the <code>None</code> case are handled, and there
|
||
won’t be an invalid pointer.</p>
|
||
<p>As an example, rather than using a list whose items know only about the next
|
||
item, we’ll create a tree whose items know about their children items <em>and</em>
|
||
their parent items.</p>
|
||
<h4><a class="header" href="#creating-a-tree-data-structure-a-node-with-child-nodes" id="creating-a-tree-data-structure-a-node-with-child-nodes">Creating a Tree Data Structure: a <code>Node</code> with Child Nodes</a></h4>
|
||
<p>To start, we’ll build a tree with nodes that know about their child nodes.
|
||
We’ll create a struct named <code>Node</code> that holds its own <code>i32</code> value as well as
|
||
references to its children <code>Node</code> values:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::rc::Rc;
|
||
use std::cell::RefCell;
|
||
|
||
#[derive(Debug)]
|
||
struct Node {
|
||
value: i32,
|
||
children: RefCell<Vec<Rc<Node>>>,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We want a <code>Node</code> to own its children, and we want to share that ownership with
|
||
variables so we can access each <code>Node</code> in the tree directly. To do this, we
|
||
define the <code>Vec<T></code> items to be values of type <code>Rc<Node></code>. We also want to
|
||
modify which nodes are children of another node, so we have a <code>RefCell<T></code> in
|
||
<code>children</code> around the <code>Vec<Rc<Node>></code>.</p>
|
||
<p>Next, we’ll use our struct definition and create one <code>Node</code> instance named
|
||
<code>leaf</code> with the value 3 and no children, and another instance named <code>branch</code>
|
||
with the value 5 and <code>leaf</code> as one of its children, as shown in Listing 15-27:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::rc::Rc;
|
||
</span><span class="boring">use std::cell::RefCell;
|
||
</span><span class="boring">
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Node {
|
||
</span><span class="boring"> value: i32,
|
||
</span><span class="boring"> children: RefCell<Vec<Rc<Node>>>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let leaf = Rc::new(Node {
|
||
value: 3,
|
||
children: RefCell::new(vec![]),
|
||
});
|
||
|
||
let branch = Rc::new(Node {
|
||
value: 5,
|
||
children: RefCell::new(vec![Rc::clone(&leaf)]),
|
||
});
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-27: Creating a <code>leaf</code> node with no children
|
||
and a <code>branch</code> node with <code>leaf</code> as one of its children</span></p>
|
||
<p>We clone the <code>Rc<Node></code> in <code>leaf</code> and store that in <code>branch</code>, meaning the
|
||
<code>Node</code> in <code>leaf</code> now has two owners: <code>leaf</code> and <code>branch</code>. We can get from
|
||
<code>branch</code> to <code>leaf</code> through <code>branch.children</code>, but there’s no way to get from
|
||
<code>leaf</code> to <code>branch</code>. The reason is that <code>leaf</code> has no reference to <code>branch</code> and
|
||
doesn’t know they’re related. We want <code>leaf</code> to know that <code>branch</code> is its
|
||
parent. We’ll do that next.</p>
|
||
<h4><a class="header" href="#adding-a-reference-from-a-child-to-its-parent" id="adding-a-reference-from-a-child-to-its-parent">Adding a Reference from a Child to Its Parent</a></h4>
|
||
<p>To make the child node aware of its parent, we need to add a <code>parent</code> field to
|
||
our <code>Node</code> struct definition. The trouble is in deciding what the type of
|
||
<code>parent</code> should be. We know it can’t contain an <code>Rc<T></code>, because that would
|
||
create a reference cycle with <code>leaf.parent</code> pointing to <code>branch</code> and
|
||
<code>branch.children</code> pointing to <code>leaf</code>, which would cause their <code>strong_count</code>
|
||
values to never be 0.</p>
|
||
<p>Thinking about the relationships another way, a parent node should own its
|
||
children: if a parent node is dropped, its child nodes should be dropped as
|
||
well. However, a child should not own its parent: if we drop a child node, the
|
||
parent should still exist. This is a case for weak references!</p>
|
||
<p>So instead of <code>Rc<T></code>, we’ll make the type of <code>parent</code> use <code>Weak<T></code>,
|
||
specifically a <code>RefCell<Weak<Node>></code>. Now our <code>Node</code> struct definition looks
|
||
like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::rc::{Rc, Weak};
|
||
use std::cell::RefCell;
|
||
|
||
#[derive(Debug)]
|
||
struct Node {
|
||
value: i32,
|
||
parent: RefCell<Weak<Node>>,
|
||
children: RefCell<Vec<Rc<Node>>>,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>A node will be able to refer to its parent node but doesn’t own its parent.
|
||
In Listing 15-28, we update <code>main</code> to use this new definition so the <code>leaf</code>
|
||
node will have a way to refer to its parent, <code>branch</code>:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::rc::{Rc, Weak};
|
||
</span><span class="boring">use std::cell::RefCell;
|
||
</span><span class="boring">
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Node {
|
||
</span><span class="boring"> value: i32,
|
||
</span><span class="boring"> parent: RefCell<Weak<Node>>,
|
||
</span><span class="boring"> children: RefCell<Vec<Rc<Node>>>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let leaf = Rc::new(Node {
|
||
value: 3,
|
||
parent: RefCell::new(Weak::new()),
|
||
children: RefCell::new(vec![]),
|
||
});
|
||
|
||
println!("leaf parent = {:?}", leaf.parent.borrow().upgrade());
|
||
|
||
let branch = Rc::new(Node {
|
||
value: 5,
|
||
parent: RefCell::new(Weak::new()),
|
||
children: RefCell::new(vec![Rc::clone(&leaf)]),
|
||
});
|
||
|
||
*leaf.parent.borrow_mut() = Rc::downgrade(&branch);
|
||
|
||
println!("leaf parent = {:?}", leaf.parent.borrow().upgrade());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-28: A <code>leaf</code> node with a weak reference to its
|
||
parent node <code>branch</code></span></p>
|
||
<p>Creating the <code>leaf</code> node looks similar to how creating the <code>leaf</code> node looked
|
||
in Listing 15-27 with the exception of the <code>parent</code> field: <code>leaf</code> starts out
|
||
without a parent, so we create a new, empty <code>Weak<Node></code> reference instance.</p>
|
||
<p>At this point, when we try to get a reference to the parent of <code>leaf</code> by using
|
||
the <code>upgrade</code> method, we get a <code>None</code> value. We see this in the output from the
|
||
first <code>println!</code> statement:</p>
|
||
<pre><code class="language-text">leaf parent = None
|
||
</code></pre>
|
||
<p>When we create the <code>branch</code> node, it will also have a new <code>Weak<Node></code>
|
||
reference in the <code>parent</code> field, because <code>branch</code> doesn’t have a parent node.
|
||
We still have <code>leaf</code> as one of the children of <code>branch</code>. Once we have the
|
||
<code>Node</code> instance in <code>branch</code>, we can modify <code>leaf</code> to give it a <code>Weak<Node></code>
|
||
reference to its parent. We use the <code>borrow_mut</code> method on the
|
||
<code>RefCell<Weak<Node>></code> in the <code>parent</code> field of <code>leaf</code>, and then we use the
|
||
<code>Rc::downgrade</code> function to create a <code>Weak<Node></code> reference to <code>branch</code> from
|
||
the <code>Rc<Node></code> in <code>branch.</code></p>
|
||
<p>When we print the parent of <code>leaf</code> again, this time we’ll get a <code>Some</code> variant
|
||
holding <code>branch</code>: now <code>leaf</code> can access its parent! When we print <code>leaf</code>, we
|
||
also avoid the cycle that eventually ended in a stack overflow like we had in
|
||
Listing 15-26; the <code>Weak<Node></code> references are printed as <code>(Weak)</code>:</p>
|
||
<pre><code class="language-text">leaf parent = Some(Node { value: 5, parent: RefCell { value: (Weak) },
|
||
children: RefCell { value: [Node { value: 3, parent: RefCell { value: (Weak) },
|
||
children: RefCell { value: [] } }] } })
|
||
</code></pre>
|
||
<p>The lack of infinite output indicates that this code didn’t create a reference
|
||
cycle. We can also tell this by looking at the values we get from calling
|
||
<code>Rc::strong_count</code> and <code>Rc::weak_count</code>.</p>
|
||
<h4><a class="header" href="#visualizing-changes-to-strong_count-and-weak_count" id="visualizing-changes-to-strong_count-and-weak_count">Visualizing Changes to <code>strong_count</code> and <code>weak_count</code></a></h4>
|
||
<p>Let’s look at how the <code>strong_count</code> and <code>weak_count</code> values of the <code>Rc<Node></code>
|
||
instances change by creating a new inner scope and moving the creation of
|
||
<code>branch</code> into that scope. By doing so, we can see what happens when <code>branch</code> is
|
||
created and then dropped when it goes out of scope. The modifications are shown
|
||
in Listing 15-29:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::rc::{Rc, Weak};
|
||
</span><span class="boring">use std::cell::RefCell;
|
||
</span><span class="boring">
|
||
</span><span class="boring">#[derive(Debug)]
|
||
</span><span class="boring">struct Node {
|
||
</span><span class="boring"> value: i32,
|
||
</span><span class="boring"> parent: RefCell<Weak<Node>>,
|
||
</span><span class="boring"> children: RefCell<Vec<Rc<Node>>>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let leaf = Rc::new(Node {
|
||
value: 3,
|
||
parent: RefCell::new(Weak::new()),
|
||
children: RefCell::new(vec![]),
|
||
});
|
||
|
||
println!(
|
||
"leaf strong = {}, weak = {}",
|
||
Rc::strong_count(&leaf),
|
||
Rc::weak_count(&leaf),
|
||
);
|
||
|
||
{
|
||
let branch = Rc::new(Node {
|
||
value: 5,
|
||
parent: RefCell::new(Weak::new()),
|
||
children: RefCell::new(vec![Rc::clone(&leaf)]),
|
||
});
|
||
|
||
*leaf.parent.borrow_mut() = Rc::downgrade(&branch);
|
||
|
||
println!(
|
||
"branch strong = {}, weak = {}",
|
||
Rc::strong_count(&branch),
|
||
Rc::weak_count(&branch),
|
||
);
|
||
|
||
println!(
|
||
"leaf strong = {}, weak = {}",
|
||
Rc::strong_count(&leaf),
|
||
Rc::weak_count(&leaf),
|
||
);
|
||
}
|
||
|
||
println!("leaf parent = {:?}", leaf.parent.borrow().upgrade());
|
||
println!(
|
||
"leaf strong = {}, weak = {}",
|
||
Rc::strong_count(&leaf),
|
||
Rc::weak_count(&leaf),
|
||
);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 15-29: Creating <code>branch</code> in an inner scope and
|
||
examining strong and weak reference counts</span></p>
|
||
<p>After <code>leaf</code> is created, its <code>Rc<Node></code> has a strong count of 1 and a weak
|
||
count of 0. In the inner scope, we create <code>branch</code> and associate it with
|
||
<code>leaf</code>, at which point when we print the counts, the <code>Rc<Node></code> in <code>branch</code>
|
||
will have a strong count of 1 and a weak count of 1 (for <code>leaf.parent</code> pointing
|
||
to <code>branch</code> with a <code>Weak<Node></code>). When we print the counts in <code>leaf</code>, we’ll see
|
||
it will have a strong count of 2, because <code>branch</code> now has a clone of the
|
||
<code>Rc<Node></code> of <code>leaf</code> stored in <code>branch.children</code>, but will still have a weak
|
||
count of 0.</p>
|
||
<p>When the inner scope ends, <code>branch</code> goes out of scope and the strong count of
|
||
the <code>Rc<Node></code> decreases to 0, so its <code>Node</code> is dropped. The weak count of 1
|
||
from <code>leaf.parent</code> has no bearing on whether or not <code>Node</code> is dropped, so we
|
||
don’t get any memory leaks!</p>
|
||
<p>If we try to access the parent of <code>leaf</code> after the end of the scope, we’ll get
|
||
<code>None</code> again. At the end of the program, the <code>Rc<Node></code> in <code>leaf</code> has a strong
|
||
count of 1 and a weak count of 0, because the variable <code>leaf</code> is now the only
|
||
reference to the <code>Rc<Node></code> again.</p>
|
||
<p>All of the logic that manages the counts and value dropping is built into
|
||
<code>Rc<T></code> and <code>Weak<T></code> and their implementations of the <code>Drop</code> trait. By
|
||
specifying that the relationship from a child to its parent should be a
|
||
<code>Weak<T></code> reference in the definition of <code>Node</code>, you’re able to have parent
|
||
nodes point to child nodes and vice versa without creating a reference cycle
|
||
and memory leaks.</p>
|
||
<h2><a class="header" href="#summary-14" id="summary-14">Summary</a></h2>
|
||
<p>This chapter covered how to use smart pointers to make different guarantees and
|
||
trade-offs from those Rust makes by default with regular references. The
|
||
<code>Box<T></code> type has a known size and points to data allocated on the heap. The
|
||
<code>Rc<T></code> type keeps track of the number of references to data on the heap so
|
||
that data can have multiple owners. The <code>RefCell<T></code> type with its interior
|
||
mutability gives us a type that we can use when we need an immutable type but
|
||
need to change an inner value of that type; it also enforces the borrowing
|
||
rules at runtime instead of at compile time.</p>
|
||
<p>Also discussed were the <code>Deref</code> and <code>Drop</code> traits, which enable a lot of the
|
||
functionality of smart pointers. We explored reference cycles that can cause
|
||
memory leaks and how to prevent them using <code>Weak<T></code>.</p>
|
||
<p>If this chapter has piqued your interest and you want to implement your own
|
||
smart pointers, check out <a href="https://doc.rust-lang.org/stable/nomicon/">“The Rustonomicon”</a> for more useful
|
||
information.</p>
|
||
<p>Next, we’ll talk about concurrency in Rust. You’ll even learn about a few new
|
||
smart pointers.</p>
|
||
<h1><a class="header" href="#fearless-concurrency" id="fearless-concurrency">Fearless Concurrency</a></h1>
|
||
<p>Handling concurrent programming safely and efficiently is another of Rust’s
|
||
major goals. <em>Concurrent programming</em>, where different parts of a program
|
||
execute independently, and <em>parallel programming</em>, where different parts of a
|
||
program execute at the same time, are becoming increasingly important as more
|
||
computers take advantage of their multiple processors. Historically,
|
||
programming in these contexts has been difficult and error prone: Rust hopes to
|
||
change that.</p>
|
||
<p>Initially, the Rust team thought that ensuring memory safety and preventing
|
||
concurrency problems were two separate challenges to be solved with different
|
||
methods. Over time, the team discovered that the ownership and type systems are
|
||
a powerful set of tools to help manage memory safety <em>and</em> concurrency
|
||
problems! By leveraging ownership and type checking, many concurrency errors
|
||
are compile-time errors in Rust rather than runtime errors. Therefore, rather
|
||
than making you spend lots of time trying to reproduce the exact circumstances
|
||
under which a runtime concurrency bug occurs, incorrect code will refuse to
|
||
compile and present an error explaining the problem. As a result, you can fix
|
||
your code while you’re working on it rather than potentially after it has been
|
||
shipped to production. We’ve nicknamed this aspect of Rust <em>fearless</em>
|
||
<em>concurrency</em>. Fearless concurrency allows you to write code that is free of
|
||
subtle bugs and is easy to refactor without introducing new bugs.</p>
|
||
<blockquote>
|
||
<p>Note: For simplicity’s sake, we’ll refer to many of the problems as
|
||
<em>concurrent</em> rather than being more precise by saying <em>concurrent and/or
|
||
parallel</em>. If this book were about concurrency and/or parallelism, we’d be
|
||
more specific. For this chapter, please mentally substitute <em>concurrent
|
||
and/or parallel</em> whenever we use <em>concurrent</em>.</p>
|
||
</blockquote>
|
||
<p>Many languages are dogmatic about the solutions they offer for handling
|
||
concurrent problems. For example, Erlang has elegant functionality for
|
||
message-passing concurrency but has only obscure ways to share state between
|
||
threads. Supporting only a subset of possible solutions is a reasonable
|
||
strategy for higher-level languages, because a higher-level language promises
|
||
benefits from giving up some control to gain abstractions. However, lower-level
|
||
languages are expected to provide the solution with the best performance in any
|
||
given situation and have fewer abstractions over the hardware. Therefore, Rust
|
||
offers a variety of tools for modeling problems in whatever way is appropriate
|
||
for your situation and requirements.</p>
|
||
<p>Here are the topics we’ll cover in this chapter:</p>
|
||
<ul>
|
||
<li>How to create threads to run multiple pieces of code at the same time</li>
|
||
<li><em>Message-passing</em> concurrency, where channels send messages between threads</li>
|
||
<li><em>Shared-state</em> concurrency, where multiple threads have access to some piece
|
||
of data</li>
|
||
<li>The <code>Sync</code> and <code>Send</code> traits, which extend Rust’s concurrency guarantees to
|
||
user-defined types as well as types provided by the standard library</li>
|
||
</ul>
|
||
<h2><a class="header" href="#using-threads-to-run-code-simultaneously" id="using-threads-to-run-code-simultaneously">Using Threads to Run Code Simultaneously</a></h2>
|
||
<p>In most current operating systems, an executed program’s code is run in a
|
||
<em>process</em>, and the operating system manages multiple processes at once. Within
|
||
your program, you can also have independent parts that run simultaneously. The
|
||
features that run these independent parts are called <em>threads</em>.</p>
|
||
<p>Splitting the computation in your program into multiple threads can improve
|
||
performance because the program does multiple tasks at the same time, but it
|
||
also adds complexity. Because threads can run simultaneously, there’s no
|
||
inherent guarantee about the order in which parts of your code on different
|
||
threads will run. This can lead to problems, such as:</p>
|
||
<ul>
|
||
<li>Race conditions, where threads are accessing data or resources in an
|
||
inconsistent order</li>
|
||
<li>Deadlocks, where two threads are waiting for each other to finish using a
|
||
resource the other thread has, preventing both threads from continuing</li>
|
||
<li>Bugs that happen only in certain situations and are hard to reproduce and fix
|
||
reliably</li>
|
||
</ul>
|
||
<p>Rust attempts to mitigate the negative effects of using threads, but
|
||
programming in a multithreaded context still takes careful thought and requires
|
||
a code structure that is different from that in programs running in a single
|
||
thread.</p>
|
||
<p>Programming languages implement threads in a few different ways. Many operating
|
||
systems provide an API for creating new threads. This model where a language
|
||
calls the operating system APIs to create threads is sometimes called <em>1:1</em>,
|
||
meaning one operating system thread per one language thread.</p>
|
||
<p>Many programming languages provide their own special implementation of threads.
|
||
Programming language-provided threads are known as <em>green</em> threads, and
|
||
languages that use these green threads will execute them in the context of a
|
||
different number of operating system threads. For this reason, the
|
||
green-threaded model is called the <em>M:N</em> model: there are <code>M</code> green threads per
|
||
<code>N</code> operating system threads, where <code>M</code> and <code>N</code> are not necessarily the same
|
||
number.</p>
|
||
<p>Each model has its own advantages and trade-offs, and the trade-off most
|
||
important to Rust is runtime support. <em>Runtime</em> is a confusing term and can
|
||
have different meanings in different contexts.</p>
|
||
<p>In this context, by <em>runtime</em> we mean code that is included by the language in
|
||
every binary. This code can be large or small depending on the language, but
|
||
every non-assembly language will have some amount of runtime code. For that
|
||
reason, colloquially when people say a language has “no runtime,” they often
|
||
mean “small runtime.” Smaller runtimes have fewer features but have the
|
||
advantage of resulting in smaller binaries, which make it easier to combine the
|
||
language with other languages in more contexts. Although many languages are
|
||
okay with increasing the runtime size in exchange for more features, Rust needs
|
||
to have nearly no runtime and cannot compromise on being able to call into C to
|
||
maintain performance.</p>
|
||
<p>The green-threading M:N model requires a larger language runtime to manage
|
||
threads. As such, the Rust standard library only provides an implementation of
|
||
1:1 threading. Because Rust is such a low-level language, there are crates that
|
||
implement M:N threading if you would rather trade overhead for aspects such as
|
||
more control over which threads run when and lower costs of context switching,
|
||
for example.</p>
|
||
<p>Now that we’ve defined threads in Rust, let’s explore how to use the
|
||
thread-related API provided by the standard library.</p>
|
||
<h3><a class="header" href="#creating-a-new-thread-with-spawn" id="creating-a-new-thread-with-spawn">Creating a New Thread with <code>spawn</code></a></h3>
|
||
<p>To create a new thread, we call the <code>thread::spawn</code> function and pass it a
|
||
closure (we talked about closures in Chapter 13) containing the code we want to
|
||
run in the new thread. The example in Listing 16-1 prints some text from a main
|
||
thread and other text from a new thread:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::time::Duration;
|
||
|
||
fn main() {
|
||
thread::spawn(|| {
|
||
for i in 1..10 {
|
||
println!("hi number {} from the spawned thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
});
|
||
|
||
for i in 1..5 {
|
||
println!("hi number {} from the main thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-1: Creating a new thread to print one thing
|
||
while the main thread prints something else</span></p>
|
||
<p>Note that with this function, the new thread will be stopped when the main
|
||
thread ends, whether or not it has finished running. The output from this
|
||
program might be a little different every time, but it will look similar to the
|
||
following:</p>
|
||
<pre><code class="language-text">hi number 1 from the main thread!
|
||
hi number 1 from the spawned thread!
|
||
hi number 2 from the main thread!
|
||
hi number 2 from the spawned thread!
|
||
hi number 3 from the main thread!
|
||
hi number 3 from the spawned thread!
|
||
hi number 4 from the main thread!
|
||
hi number 4 from the spawned thread!
|
||
hi number 5 from the spawned thread!
|
||
</code></pre>
|
||
<p>The calls to <code>thread::sleep</code> force a thread to stop its execution for a short
|
||
duration, allowing a different thread to run. The threads will probably take
|
||
turns, but that isn’t guaranteed: it depends on how your operating system
|
||
schedules the threads. In this run, the main thread printed first, even though
|
||
the print statement from the spawned thread appears first in the code. And even
|
||
though we told the spawned thread to print until <code>i</code> is 9, it only got to 5
|
||
before the main thread shut down.</p>
|
||
<p>If you run this code and only see output from the main thread, or don’t see any
|
||
overlap, try increasing the numbers in the ranges to create more opportunities
|
||
for the operating system to switch between the threads.</p>
|
||
<h3><a class="header" href="#waiting-for-all-threads-to-finish-using-join-handles" id="waiting-for-all-threads-to-finish-using-join-handles">Waiting for All Threads to Finish Using <code>join</code> Handles</a></h3>
|
||
<p>The code in Listing 16-1 not only stops the spawned thread prematurely most of
|
||
the time due to the main thread ending, but also can’t guarantee that the
|
||
spawned thread will get to run at all. The reason is that there is no guarantee
|
||
on the order in which threads run!</p>
|
||
<p>We can fix the problem of the spawned thread not getting to run, or not getting
|
||
to run completely, by saving the return value of <code>thread::spawn</code> in a variable.
|
||
The return type of <code>thread::spawn</code> is <code>JoinHandle</code>. A <code>JoinHandle</code> is an owned
|
||
value that, when we call the <code>join</code> method on it, will wait for its thread to
|
||
finish. Listing 16-2 shows how to use the <code>JoinHandle</code> of the thread we created
|
||
in Listing 16-1 and call <code>join</code> to make sure the spawned thread finishes before
|
||
<code>main</code> exits:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::time::Duration;
|
||
|
||
fn main() {
|
||
let handle = thread::spawn(|| {
|
||
for i in 1..10 {
|
||
println!("hi number {} from the spawned thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
});
|
||
|
||
for i in 1..5 {
|
||
println!("hi number {} from the main thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
|
||
handle.join().unwrap();
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-2: Saving a <code>JoinHandle</code> from <code>thread::spawn</code>
|
||
to guarantee the thread is run to completion</span></p>
|
||
<p>Calling <code>join</code> on the handle blocks the thread currently running until the
|
||
thread represented by the handle terminates. <em>Blocking</em> a thread means that
|
||
thread is prevented from performing work or exiting. Because we’ve put the call
|
||
to <code>join</code> after the main thread’s <code>for</code> loop, running Listing 16-2 should
|
||
produce output similar to this:</p>
|
||
<pre><code class="language-text">hi number 1 from the main thread!
|
||
hi number 2 from the main thread!
|
||
hi number 1 from the spawned thread!
|
||
hi number 3 from the main thread!
|
||
hi number 2 from the spawned thread!
|
||
hi number 4 from the main thread!
|
||
hi number 3 from the spawned thread!
|
||
hi number 4 from the spawned thread!
|
||
hi number 5 from the spawned thread!
|
||
hi number 6 from the spawned thread!
|
||
hi number 7 from the spawned thread!
|
||
hi number 8 from the spawned thread!
|
||
hi number 9 from the spawned thread!
|
||
</code></pre>
|
||
<p>The two threads continue alternating, but the main thread waits because of the
|
||
call to <code>handle.join()</code> and does not end until the spawned thread is finished.</p>
|
||
<p>But let’s see what happens when we instead move <code>handle.join()</code> before the
|
||
<code>for</code> loop in <code>main</code>, like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::time::Duration;
|
||
|
||
fn main() {
|
||
let handle = thread::spawn(|| {
|
||
for i in 1..10 {
|
||
println!("hi number {} from the spawned thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
});
|
||
|
||
handle.join().unwrap();
|
||
|
||
for i in 1..5 {
|
||
println!("hi number {} from the main thread!", i);
|
||
thread::sleep(Duration::from_millis(1));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>The main thread will wait for the spawned thread to finish and then run its
|
||
<code>for</code> loop, so the output won’t be interleaved anymore, as shown here:</p>
|
||
<pre><code class="language-text">hi number 1 from the spawned thread!
|
||
hi number 2 from the spawned thread!
|
||
hi number 3 from the spawned thread!
|
||
hi number 4 from the spawned thread!
|
||
hi number 5 from the spawned thread!
|
||
hi number 6 from the spawned thread!
|
||
hi number 7 from the spawned thread!
|
||
hi number 8 from the spawned thread!
|
||
hi number 9 from the spawned thread!
|
||
hi number 1 from the main thread!
|
||
hi number 2 from the main thread!
|
||
hi number 3 from the main thread!
|
||
hi number 4 from the main thread!
|
||
</code></pre>
|
||
<p>Small details, such as where <code>join</code> is called, can affect whether or not your
|
||
threads run at the same time.</p>
|
||
<h3><a class="header" href="#using-move-closures-with-threads" id="using-move-closures-with-threads">Using <code>move</code> Closures with Threads</a></h3>
|
||
<p>The <code>move</code> closure is often used alongside <code>thread::spawn</code> because it allows
|
||
you to use data from one thread in another thread.</p>
|
||
<p>In Chapter 13, we mentioned we can use the <code>move</code> keyword before the parameter
|
||
list of a closure to force the closure to take ownership of the values it uses
|
||
in the environment. This technique is especially useful when creating new
|
||
threads in order to transfer ownership of values from one thread to another.</p>
|
||
<p>Notice in Listing 16-1 that the closure we pass to <code>thread::spawn</code> takes no
|
||
arguments: we’re not using any data from the main thread in the spawned
|
||
thread’s code. To use data from the main thread in the spawned thread, the
|
||
spawned thread’s closure must capture the values it needs. Listing 16-3 shows
|
||
an attempt to create a vector in the main thread and use it in the spawned
|
||
thread. However, this won’t yet work, as you’ll see in a moment.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::thread;
|
||
|
||
fn main() {
|
||
let v = vec![1, 2, 3];
|
||
|
||
let handle = thread::spawn(|| {
|
||
println!("Here's a vector: {:?}", v);
|
||
});
|
||
|
||
handle.join().unwrap();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-3: Attempting to use a vector created by the
|
||
main thread in another thread</span></p>
|
||
<p>The closure uses <code>v</code>, so it will capture <code>v</code> and make it part of the closure’s
|
||
environment. Because <code>thread::spawn</code> runs this closure in a new thread, we
|
||
should be able to access <code>v</code> inside that new thread. But when we compile this
|
||
example, we get the following error:</p>
|
||
<pre><code class="language-text">error[E0373]: closure may outlive the current function, but it borrows `v`,
|
||
which is owned by the current function
|
||
--> src/main.rs:6:32
|
||
|
|
||
6 | let handle = thread::spawn(|| {
|
||
| ^^ may outlive borrowed value `v`
|
||
7 | println!("Here's a vector: {:?}", v);
|
||
| - `v` is borrowed here
|
||
|
|
||
help: to force the closure to take ownership of `v` (and any other referenced
|
||
variables), use the `move` keyword
|
||
|
|
||
6 | let handle = thread::spawn(move || {
|
||
| ^^^^^^^
|
||
</code></pre>
|
||
<p>Rust <em>infers</em> how to capture <code>v</code>, and because <code>println!</code> only needs a reference
|
||
to <code>v</code>, the closure tries to borrow <code>v</code>. However, there’s a problem: Rust can’t
|
||
tell how long the spawned thread will run, so it doesn’t know if the reference
|
||
to <code>v</code> will always be valid.</p>
|
||
<p>Listing 16-4 provides a scenario that’s more likely to have a reference to <code>v</code>
|
||
that won’t be valid:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::thread;
|
||
|
||
fn main() {
|
||
let v = vec![1, 2, 3];
|
||
|
||
let handle = thread::spawn(|| {
|
||
println!("Here's a vector: {:?}", v);
|
||
});
|
||
|
||
drop(v); // oh no!
|
||
|
||
handle.join().unwrap();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-4: A thread with a closure that attempts to
|
||
capture a reference to <code>v</code> from a main thread that drops <code>v</code></span></p>
|
||
<p>If we were allowed to run this code, there’s a possibility the spawned thread
|
||
would be immediately put in the background without running at all. The spawned
|
||
thread has a reference to <code>v</code> inside, but the main thread immediately drops
|
||
<code>v</code>, using the <code>drop</code> function we discussed in Chapter 15. Then, when the
|
||
spawned thread starts to execute, <code>v</code> is no longer valid, so a reference to it
|
||
is also invalid. Oh no!</p>
|
||
<p>To fix the compiler error in Listing 16-3, we can use the error message’s
|
||
advice:</p>
|
||
<pre><code class="language-text">help: to force the closure to take ownership of `v` (and any other referenced
|
||
variables), use the `move` keyword
|
||
|
|
||
6 | let handle = thread::spawn(move || {
|
||
| ^^^^^^^
|
||
</code></pre>
|
||
<p>By adding the <code>move</code> keyword before the closure, we force the closure to take
|
||
ownership of the values it’s using rather than allowing Rust to infer that it
|
||
should borrow the values. The modification to Listing 16-3 shown in Listing
|
||
16-5 will compile and run as we intend:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
|
||
fn main() {
|
||
let v = vec![1, 2, 3];
|
||
|
||
let handle = thread::spawn(move || {
|
||
println!("Here's a vector: {:?}", v);
|
||
});
|
||
|
||
handle.join().unwrap();
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-5: Using the <code>move</code> keyword to force a closure
|
||
to take ownership of the values it uses</span></p>
|
||
<p>What would happen to the code in Listing 16-4 where the main thread called
|
||
<code>drop</code> if we use a <code>move</code> closure? Would <code>move</code> fix that case? Unfortunately,
|
||
no; we would get a different error because what Listing 16-4 is trying to do
|
||
isn’t allowed for a different reason. If we added <code>move</code> to the closure, we
|
||
would move <code>v</code> into the closure’s environment, and we could no longer call
|
||
<code>drop</code> on it in the main thread. We would get this compiler error instead:</p>
|
||
<pre><code class="language-text">error[E0382]: use of moved value: `v`
|
||
--> src/main.rs:10:10
|
||
|
|
||
6 | let handle = thread::spawn(move || {
|
||
| ------- value moved (into closure) here
|
||
...
|
||
10 | drop(v); // oh no!
|
||
| ^ value used here after move
|
||
|
|
||
= note: move occurs because `v` has type `std::vec::Vec<i32>`, which does
|
||
not implement the `Copy` trait
|
||
</code></pre>
|
||
<p>Rust’s ownership rules have saved us again! We got an error from the code in
|
||
Listing 16-3 because Rust was being conservative and only borrowing <code>v</code> for the
|
||
thread, which meant the main thread could theoretically invalidate the spawned
|
||
thread’s reference. By telling Rust to move ownership of <code>v</code> to the spawned
|
||
thread, we’re guaranteeing Rust that the main thread won’t use <code>v</code> anymore. If
|
||
we change Listing 16-4 in the same way, we’re then violating the ownership
|
||
rules when we try to use <code>v</code> in the main thread. The <code>move</code> keyword overrides
|
||
Rust’s conservative default of borrowing; it doesn’t let us violate the
|
||
ownership rules.</p>
|
||
<p>With a basic understanding of threads and the thread API, let’s look at what we
|
||
can <em>do</em> with threads.</p>
|
||
<h2><a class="header" href="#using-message-passing-to-transfer-data-between-threads" id="using-message-passing-to-transfer-data-between-threads">Using Message Passing to Transfer Data Between Threads</a></h2>
|
||
<p>One increasingly popular approach to ensuring safe concurrency is <em>message
|
||
passing</em>, where threads or actors communicate by sending each other messages
|
||
containing data. Here’s the idea in a slogan from <a href="http://golang.org/doc/effective_go.html">the Go language
|
||
documentation</a>: “Do not communicate by
|
||
sharing memory; instead, share memory by communicating.”</p>
|
||
<p>One major tool Rust has for accomplishing message-sending concurrency is the
|
||
<em>channel</em>, a programming concept that Rust’s standard library provides an
|
||
implementation of. You can imagine a channel in programming as being like a
|
||
channel of water, such as a stream or a river. If you put something like a
|
||
rubber duck or boat into a stream, it will travel downstream to the end of the
|
||
waterway.</p>
|
||
<p>A channel in programming has two halves: a transmitter and a receiver. The
|
||
transmitter half is the upstream location where you put rubber ducks into the
|
||
river, and the receiver half is where the rubber duck ends up downstream. One
|
||
part of your code calls methods on the transmitter with the data you want to
|
||
send, and another part checks the receiving end for arriving messages. A
|
||
channel is said to be <em>closed</em> if either the transmitter or receiver half is
|
||
dropped.</p>
|
||
<p>Here, we’ll work up to a program that has one thread to generate values and
|
||
send them down a channel, and another thread that will receive the values and
|
||
print them out. We’ll be sending simple values between threads using a channel
|
||
to illustrate the feature. Once you’re familiar with the technique, you could
|
||
use channels to implement a chat system or a system where many threads perform
|
||
parts of a calculation and send the parts to one thread that aggregates the
|
||
results.</p>
|
||
<p>First, in Listing 16-6, we’ll create a channel but not do anything with it.
|
||
Note that this won’t compile yet because Rust can’t tell what type of values we
|
||
want to send over the channel.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::sync::mpsc;
|
||
|
||
fn main() {
|
||
let (tx, rx) = mpsc::channel();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-6: Creating a channel and assigning the two
|
||
halves to <code>tx</code> and <code>rx</code></span></p>
|
||
<p>We create a new channel using the <code>mpsc::channel</code> function; <code>mpsc</code> stands for
|
||
<em>multiple producer, single consumer</em>. In short, the way Rust’s standard library
|
||
implements channels means a channel can have multiple <em>sending</em> ends that
|
||
produce values but only one <em>receiving</em> end that consumes those values. Imagine
|
||
multiple streams flowing together into one big river: everything sent down any
|
||
of the streams will end up in one river at the end. We’ll start with a single
|
||
producer for now, but we’ll add multiple producers when we get this example
|
||
working.</p>
|
||
<p>The <code>mpsc::channel</code> function returns a tuple, the first element of which is the
|
||
sending end and the second element is the receiving end. The abbreviations <code>tx</code>
|
||
and <code>rx</code> are traditionally used in many fields for <em>transmitter</em> and <em>receiver</em>
|
||
respectively, so we name our variables as such to indicate each end. We’re
|
||
using a <code>let</code> statement with a pattern that destructures the tuples; we’ll
|
||
discuss the use of patterns in <code>let</code> statements and destructuring in Chapter
|
||
18. Using a <code>let</code> statement this way is a convenient approach to extract the
|
||
pieces of the tuple returned by <code>mpsc::channel</code>.</p>
|
||
<p>Let’s move the transmitting end into a spawned thread and have it send one
|
||
string so the spawned thread is communicating with the main thread, as shown in
|
||
Listing 16-7. This is like putting a rubber duck in the river upstream or
|
||
sending a chat message from one thread to another.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::sync::mpsc;
|
||
|
||
fn main() {
|
||
let (tx, rx) = mpsc::channel();
|
||
|
||
thread::spawn(move || {
|
||
let val = String::from("hi");
|
||
tx.send(val).unwrap();
|
||
});
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-7: Moving <code>tx</code> to a spawned thread and sending
|
||
“hi”</span></p>
|
||
<p>Again, we’re using <code>thread::spawn</code> to create a new thread and then using <code>move</code>
|
||
to move <code>tx</code> into the closure so the spawned thread owns <code>tx</code>. The spawned
|
||
thread needs to own the transmitting end of the channel to be able to send
|
||
messages through the channel.</p>
|
||
<p>The transmitting end has a <code>send</code> method that takes the value we want to send.
|
||
The <code>send</code> method returns a <code>Result<T, E></code> type, so if the receiving end has
|
||
already been dropped and there’s nowhere to send a value, the send operation
|
||
will return an error. In this example, we’re calling <code>unwrap</code> to panic in case
|
||
of an error. But in a real application, we would handle it properly: return to
|
||
Chapter 9 to review strategies for proper error handling.</p>
|
||
<p>In Listing 16-8, we’ll get the value from the receiving end of the channel in
|
||
the main thread. This is like retrieving the rubber duck from the water at the
|
||
end of the river or like getting a chat message.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::sync::mpsc;
|
||
|
||
fn main() {
|
||
let (tx, rx) = mpsc::channel();
|
||
|
||
thread::spawn(move || {
|
||
let val = String::from("hi");
|
||
tx.send(val).unwrap();
|
||
});
|
||
|
||
let received = rx.recv().unwrap();
|
||
println!("Got: {}", received);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-8: Receiving the value “hi” in the main thread
|
||
and printing it</span></p>
|
||
<p>The receiving end of a channel has two useful methods: <code>recv</code> and <code>try_recv</code>.
|
||
We’re using <code>recv</code>, short for <em>receive</em>, which will block the main thread’s
|
||
execution and wait until a value is sent down the channel. Once a value is
|
||
sent, <code>recv</code> will return it in a <code>Result<T, E></code>. When the sending end of the
|
||
channel closes, <code>recv</code> will return an error to signal that no more values will
|
||
be coming.</p>
|
||
<p>The <code>try_recv</code> method doesn’t block, but will instead return a <code>Result<T, E></code>
|
||
immediately: an <code>Ok</code> value holding a message if one is available and an <code>Err</code>
|
||
value if there aren’t any messages this time. Using <code>try_recv</code> is useful if
|
||
this thread has other work to do while waiting for messages: we could write a
|
||
loop that calls <code>try_recv</code> every so often, handles a message if one is
|
||
available, and otherwise does other work for a little while until checking
|
||
again.</p>
|
||
<p>We’ve used <code>recv</code> in this example for simplicity; we don’t have any other work
|
||
for the main thread to do other than wait for messages, so blocking the main
|
||
thread is appropriate.</p>
|
||
<p>When we run the code in Listing 16-8, we’ll see the value printed from the main
|
||
thread:</p>
|
||
<pre><code class="language-text">Got: hi
|
||
</code></pre>
|
||
<p>Perfect!</p>
|
||
<h3><a class="header" href="#channels-and-ownership-transference" id="channels-and-ownership-transference">Channels and Ownership Transference</a></h3>
|
||
<p>The ownership rules play a vital role in message sending because they help you
|
||
write safe, concurrent code. Preventing errors in concurrent programming is the
|
||
advantage of thinking about ownership throughout your Rust programs. Let’s do
|
||
an experiment to show how channels and ownership work together to prevent
|
||
problems: we’ll try to use a <code>val</code> value in the spawned thread <em>after</em> we’ve
|
||
sent it down the channel. Try compiling the code in Listing 16-9 to see why
|
||
this code isn’t allowed:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::thread;
|
||
use std::sync::mpsc;
|
||
|
||
fn main() {
|
||
let (tx, rx) = mpsc::channel();
|
||
|
||
thread::spawn(move || {
|
||
let val = String::from("hi");
|
||
tx.send(val).unwrap();
|
||
println!("val is {}", val);
|
||
});
|
||
|
||
let received = rx.recv().unwrap();
|
||
println!("Got: {}", received);
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-9: Attempting to use <code>val</code> after we’ve sent it
|
||
down the channel</span></p>
|
||
<p>Here, we try to print <code>val</code> after we’ve sent it down the channel via <code>tx.send</code>.
|
||
Allowing this would be a bad idea: once the value has been sent to another
|
||
thread, that thread could modify or drop it before we try to use the value
|
||
again. Potentially, the other thread’s modifications could cause errors or
|
||
unexpected results due to inconsistent or nonexistent data. However, Rust gives
|
||
us an error if we try to compile the code in Listing 16-9:</p>
|
||
<pre><code class="language-text">error[E0382]: use of moved value: `val`
|
||
--> src/main.rs:10:31
|
||
|
|
||
9 | tx.send(val).unwrap();
|
||
| --- value moved here
|
||
10 | println!("val is {}", val);
|
||
| ^^^ value used here after move
|
||
|
|
||
= note: move occurs because `val` has type `std::string::String`, which does
|
||
not implement the `Copy` trait
|
||
</code></pre>
|
||
<p>Our concurrency mistake has caused a compile time error. The <code>send</code> function
|
||
takes ownership of its parameter, and when the value is moved, the receiver
|
||
takes ownership of it. This stops us from accidentally using the value again
|
||
after sending it; the ownership system checks that everything is okay.</p>
|
||
<h3><a class="header" href="#sending-multiple-values-and-seeing-the-receiver-waiting" id="sending-multiple-values-and-seeing-the-receiver-waiting">Sending Multiple Values and Seeing the Receiver Waiting</a></h3>
|
||
<p>The code in Listing 16-8 compiled and ran, but it didn’t clearly show us that
|
||
two separate threads were talking to each other over the channel. In Listing
|
||
16-10 we’ve made some modifications that will prove the code in Listing 16-8 is
|
||
running concurrently: the spawned thread will now send multiple messages and
|
||
pause for a second between each message.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::thread;
|
||
use std::sync::mpsc;
|
||
use std::time::Duration;
|
||
|
||
fn main() {
|
||
let (tx, rx) = mpsc::channel();
|
||
|
||
thread::spawn(move || {
|
||
let vals = vec![
|
||
String::from("hi"),
|
||
String::from("from"),
|
||
String::from("the"),
|
||
String::from("thread"),
|
||
];
|
||
|
||
for val in vals {
|
||
tx.send(val).unwrap();
|
||
thread::sleep(Duration::from_secs(1));
|
||
}
|
||
});
|
||
|
||
for received in rx {
|
||
println!("Got: {}", received);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-10: Sending multiple messages and pausing
|
||
between each</span></p>
|
||
<p>This time, the spawned thread has a vector of strings that we want to send to
|
||
the main thread. We iterate over them, sending each individually, and pause
|
||
between each by calling the <code>thread::sleep</code> function with a <code>Duration</code> value of
|
||
1 second.</p>
|
||
<p>In the main thread, we’re not calling the <code>recv</code> function explicitly anymore:
|
||
instead, we’re treating <code>rx</code> as an iterator. For each value received, we’re
|
||
printing it. When the channel is closed, iteration will end.</p>
|
||
<p>When running the code in Listing 16-10, you should see the following output
|
||
with a 1-second pause in between each line:</p>
|
||
<pre><code class="language-text">Got: hi
|
||
Got: from
|
||
Got: the
|
||
Got: thread
|
||
</code></pre>
|
||
<p>Because we don’t have any code that pauses or delays in the <code>for</code> loop in the
|
||
main thread, we can tell that the main thread is waiting to receive values from
|
||
the spawned thread.</p>
|
||
<h3><a class="header" href="#creating-multiple-producers-by-cloning-the-transmitter" id="creating-multiple-producers-by-cloning-the-transmitter">Creating Multiple Producers by Cloning the Transmitter</a></h3>
|
||
<p>Earlier we mentioned that <code>mpsc</code> was an acronym for <em>multiple producer,
|
||
single consumer</em>. Let’s put <code>mpsc</code> to use and expand the code in Listing 16-10
|
||
to create multiple threads that all send values to the same receiver. We can do
|
||
so by cloning the transmitting half of the channel, as shown in Listing 16-11:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::sync::mpsc;
|
||
</span><span class="boring">use std::time::Duration;
|
||
</span><span class="boring">
|
||
</span><span class="boring">fn main() {
|
||
</span>// --snip--
|
||
|
||
let (tx, rx) = mpsc::channel();
|
||
|
||
let tx1 = mpsc::Sender::clone(&tx);
|
||
thread::spawn(move || {
|
||
let vals = vec![
|
||
String::from("hi"),
|
||
String::from("from"),
|
||
String::from("the"),
|
||
String::from("thread"),
|
||
];
|
||
|
||
for val in vals {
|
||
tx1.send(val).unwrap();
|
||
thread::sleep(Duration::from_secs(1));
|
||
}
|
||
});
|
||
|
||
thread::spawn(move || {
|
||
let vals = vec![
|
||
String::from("more"),
|
||
String::from("messages"),
|
||
String::from("for"),
|
||
String::from("you"),
|
||
];
|
||
|
||
for val in vals {
|
||
tx.send(val).unwrap();
|
||
thread::sleep(Duration::from_secs(1));
|
||
}
|
||
});
|
||
|
||
for received in rx {
|
||
println!("Got: {}", received);
|
||
}
|
||
|
||
// --snip--
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 16-11: Sending multiple messages from multiple
|
||
producers</span></p>
|
||
<p>This time, before we create the first spawned thread, we call <code>clone</code> on the
|
||
sending end of the channel. This will give us a new sending handle we can pass
|
||
to the first spawned thread. We pass the original sending end of the channel to
|
||
a second spawned thread. This gives us two threads, each sending different
|
||
messages to the receiving end of the channel.</p>
|
||
<p>When you run the code, your output should look something like this:</p>
|
||
<pre><code class="language-text">Got: hi
|
||
Got: more
|
||
Got: from
|
||
Got: messages
|
||
Got: for
|
||
Got: the
|
||
Got: thread
|
||
Got: you
|
||
</code></pre>
|
||
<p>You might see the values in another order; it depends on your system. This is
|
||
what makes concurrency interesting as well as difficult. If you experiment with
|
||
<code>thread::sleep</code>, giving it various values in the different threads, each run
|
||
will be more nondeterministic and create different output each time.</p>
|
||
<p>Now that we’ve looked at how channels work, let’s look at a different method of
|
||
concurrency.</p>
|
||
<h2><a class="header" href="#shared-state-concurrency" id="shared-state-concurrency">Shared-State Concurrency</a></h2>
|
||
<p>Message passing is a fine way of handling concurrency, but it’s not the only
|
||
one. Consider this part of the slogan from the Go language documentation again:
|
||
“do not communicate by sharing memory.”</p>
|
||
<p>What would communicating by sharing memory look like? In addition, why would
|
||
message-passing enthusiasts not use it and do the opposite instead?</p>
|
||
<p>In a way, channels in any programming language are similar to single ownership,
|
||
because once you transfer a value down a channel, you should no longer use that
|
||
value. Shared memory concurrency is like multiple ownership: multiple threads
|
||
can access the same memory location at the same time. As you saw in Chapter 15,
|
||
where smart pointers made multiple ownership possible, multiple ownership can
|
||
add complexity because these different owners need managing. Rust’s type system
|
||
and ownership rules greatly assist in getting this management correct. For an
|
||
example, let’s look at mutexes, one of the more common concurrency primitives
|
||
for shared memory.</p>
|
||
<h3><a class="header" href="#using-mutexes-to-allow-access-to-data-from-one-thread-at-a-time" id="using-mutexes-to-allow-access-to-data-from-one-thread-at-a-time">Using Mutexes to Allow Access to Data from One Thread at a Time</a></h3>
|
||
<p><em>Mutex</em> is an abbreviation for <em>mutual exclusion</em>, as in, a mutex allows only
|
||
one thread to access some data at any given time. To access the data in a
|
||
mutex, a thread must first signal that it wants access by asking to acquire the
|
||
mutex’s <em>lock</em>. The lock is a data structure that is part of the mutex that
|
||
keeps track of who currently has exclusive access to the data. Therefore, the
|
||
mutex is described as <em>guarding</em> the data it holds via the locking system.</p>
|
||
<p>Mutexes have a reputation for being difficult to use because you have to
|
||
remember two rules:</p>
|
||
<ul>
|
||
<li>You must attempt to acquire the lock before using the data.</li>
|
||
<li>When you’re done with the data that the mutex guards, you must unlock the
|
||
data so other threads can acquire the lock.</li>
|
||
</ul>
|
||
<p>For a real-world metaphor for a mutex, imagine a panel discussion at a
|
||
conference with only one microphone. Before a panelist can speak, they have to
|
||
ask or signal that they want to use the microphone. When they get the
|
||
microphone, they can talk for as long as they want to and then hand the
|
||
microphone to the next panelist who requests to speak. If a panelist forgets to
|
||
hand the microphone off when they’re finished with it, no one else is able to
|
||
speak. If management of the shared microphone goes wrong, the panel won’t work
|
||
as planned!</p>
|
||
<p>Management of mutexes can be incredibly tricky to get right, which is why so
|
||
many people are enthusiastic about channels. However, thanks to Rust’s type
|
||
system and ownership rules, you can’t get locking and unlocking wrong.</p>
|
||
<h4><a class="header" href="#the-api-of-mutext" id="the-api-of-mutext">The API of <code>Mutex<T></code></a></h4>
|
||
<p>As an example of how to use a mutex, let’s start by using a mutex in a
|
||
single-threaded context, as shown in Listing 16-12:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::sync::Mutex;
|
||
|
||
fn main() {
|
||
let m = Mutex::new(5);
|
||
|
||
{
|
||
let mut num = m.lock().unwrap();
|
||
*num = 6;
|
||
}
|
||
|
||
println!("m = {:?}", m);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-12: Exploring the API of <code>Mutex<T></code> in a
|
||
single-threaded context for simplicity</span></p>
|
||
<p>As with many types, we create a <code>Mutex<T></code> using the associated function <code>new</code>.
|
||
To access the data inside the mutex, we use the <code>lock</code> method to acquire the
|
||
lock. This call will block the current thread so it can’t do any work until
|
||
it’s our turn to have the lock.</p>
|
||
<p>The call to <code>lock</code> would fail if another thread holding the lock panicked. In
|
||
that case, no one would ever be able to get the lock, so we’ve chosen to
|
||
<code>unwrap</code> and have this thread panic if we’re in that situation.</p>
|
||
<p>After we’ve acquired the lock, we can treat the return value, named <code>num</code> in
|
||
this case, as a mutable reference to the data inside. The type system ensures
|
||
that we acquire a lock before using the value in <code>m</code>: <code>Mutex<i32></code> is not an
|
||
<code>i32</code>, so we <em>must</em> acquire the lock to be able to use the <code>i32</code> value. We
|
||
can’t forget; the type system won’t let us access the inner <code>i32</code> otherwise.</p>
|
||
<p>As you might suspect, <code>Mutex<T></code> is a smart pointer. More accurately, the call
|
||
to <code>lock</code> <em>returns</em> a smart pointer called <code>MutexGuard</code>, wrapped in a
|
||
<code>LockResult</code> that we handled with the call to <code>unwrap</code>. The <code>MutexGuard</code> smart
|
||
pointer implements <code>Deref</code> to point at our inner data; the smart pointer also
|
||
has a <code>Drop</code> implementation that releases the lock automatically when a
|
||
<code>MutexGuard</code> goes out of scope, which happens at the end of the inner scope in
|
||
Listing 16-12. As a result, we don’t risk forgetting to release the lock and
|
||
blocking the mutex from being used by other threads because the lock release
|
||
happens automatically.</p>
|
||
<p>After dropping the lock, we can print the mutex value and see that we were able
|
||
to change the inner <code>i32</code> to 6.</p>
|
||
<h4><a class="header" href="#sharing-a-mutext-between-multiple-threads" id="sharing-a-mutext-between-multiple-threads">Sharing a <code>Mutex<T></code> Between Multiple Threads</a></h4>
|
||
<p>Now, let’s try to share a value between multiple threads using <code>Mutex<T></code>.
|
||
We’ll spin up 10 threads and have them each increment a counter value by 1, so
|
||
the counter goes from 0 to 10. Note that the next few examples will have
|
||
compiler errors, and we’ll use those errors to learn more about using
|
||
<code>Mutex<T></code> and how Rust helps us use it correctly. Listing 16-13 has our
|
||
starting example:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::sync::Mutex;
|
||
use std::thread;
|
||
|
||
fn main() {
|
||
let counter = Mutex::new(0);
|
||
let mut handles = vec![];
|
||
|
||
for _ in 0..10 {
|
||
let handle = thread::spawn(move || {
|
||
let mut num = counter.lock().unwrap();
|
||
|
||
*num += 1;
|
||
});
|
||
handles.push(handle);
|
||
}
|
||
|
||
for handle in handles {
|
||
handle.join().unwrap();
|
||
}
|
||
|
||
println!("Result: {}", *counter.lock().unwrap());
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-13: Ten threads each increment a counter
|
||
guarded by a <code>Mutex<T></code></span></p>
|
||
<p>We create a <code>counter</code> variable to hold an <code>i32</code> inside a <code>Mutex<T></code>, as we
|
||
did in Listing 16-12. Next, we create 10 threads by iterating over a range
|
||
of numbers. We use <code>thread::spawn</code> and give all the threads the same closure,
|
||
one that moves the counter into the thread, acquires a lock on the <code>Mutex<T></code>
|
||
by calling the <code>lock</code> method, and then adds 1 to the value in the mutex. When a
|
||
thread finishes running its closure, <code>num</code> will go out of scope and release the
|
||
lock so another thread can acquire it.</p>
|
||
<p>In the main thread, we collect all the join handles. Then, as we did in Listing
|
||
16-2, we call <code>join</code> on each handle to make sure all the threads finish. At
|
||
that point, the main thread will acquire the lock and print the result of this
|
||
program.</p>
|
||
<p>We hinted that this example wouldn’t compile. Now let’s find out why!</p>
|
||
<pre><code class="language-text">error[E0382]: capture of moved value: `counter`
|
||
--> src/main.rs:10:27
|
||
|
|
||
9 | let handle = thread::spawn(move || {
|
||
| ------- value moved (into closure) here
|
||
10 | let mut num = counter.lock().unwrap();
|
||
| ^^^^^^^ value captured here after move
|
||
|
|
||
= note: move occurs because `counter` has type `std::sync::Mutex<i32>`,
|
||
which does not implement the `Copy` trait
|
||
|
||
error[E0382]: use of moved value: `counter`
|
||
--> src/main.rs:21:29
|
||
|
|
||
9 | let handle = thread::spawn(move || {
|
||
| ------- value moved (into closure) here
|
||
...
|
||
21 | println!("Result: {}", *counter.lock().unwrap());
|
||
| ^^^^^^^ value used here after move
|
||
|
|
||
= note: move occurs because `counter` has type `std::sync::Mutex<i32>`,
|
||
which does not implement the `Copy` trait
|
||
|
||
error: aborting due to 2 previous errors
|
||
</code></pre>
|
||
<p>The error message states that the <code>counter</code> value is moved into the closure and
|
||
then captured when we call <code>lock</code>. That description sounds like what we wanted,
|
||
but it’s not allowed!</p>
|
||
<p>Let’s figure this out by simplifying the program. Instead of making 10 threads
|
||
in a <code>for</code> loop, let’s just make two threads without a loop and see what
|
||
happens. Replace the first <code>for</code> loop in Listing 16-13 with this code instead:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::sync::Mutex;
|
||
use std::thread;
|
||
|
||
fn main() {
|
||
let counter = Mutex::new(0);
|
||
let mut handles = vec![];
|
||
|
||
let handle = thread::spawn(move || {
|
||
let mut num = counter.lock().unwrap();
|
||
|
||
*num += 1;
|
||
});
|
||
handles.push(handle);
|
||
|
||
let handle2 = thread::spawn(move || {
|
||
let mut num2 = counter.lock().unwrap();
|
||
|
||
*num2 += 1;
|
||
});
|
||
handles.push(handle2);
|
||
|
||
for handle in handles {
|
||
handle.join().unwrap();
|
||
}
|
||
|
||
println!("Result: {}", *counter.lock().unwrap());
|
||
}
|
||
</code></pre>
|
||
<p>We make two threads and change the variable names used with the second thread
|
||
to <code>handle2</code> and <code>num2</code>. When we run the code this time, compiling gives us the
|
||
following:</p>
|
||
<pre><code class="language-text">error[E0382]: capture of moved value: `counter`
|
||
--> src/main.rs:16:24
|
||
|
|
||
8 | let handle = thread::spawn(move || {
|
||
| ------- value moved (into closure) here
|
||
...
|
||
16 | let mut num2 = counter.lock().unwrap();
|
||
| ^^^^^^^ value captured here after move
|
||
|
|
||
= note: move occurs because `counter` has type `std::sync::Mutex<i32>`,
|
||
which does not implement the `Copy` trait
|
||
|
||
error[E0382]: use of moved value: `counter`
|
||
--> src/main.rs:26:29
|
||
|
|
||
8 | let handle = thread::spawn(move || {
|
||
| ------- value moved (into closure) here
|
||
...
|
||
26 | println!("Result: {}", *counter.lock().unwrap());
|
||
| ^^^^^^^ value used here after move
|
||
|
|
||
= note: move occurs because `counter` has type `std::sync::Mutex<i32>`,
|
||
which does not implement the `Copy` trait
|
||
|
||
error: aborting due to 2 previous errors
|
||
</code></pre>
|
||
<p>Aha! The first error message indicates that <code>counter</code> is moved into the closure
|
||
for the thread associated with <code>handle</code>. That move is preventing us from
|
||
capturing <code>counter</code> when we try to call <code>lock</code> on it and store the result in
|
||
<code>num2</code> in the second thread! So Rust is telling us that we can’t move ownership
|
||
of <code>counter</code> into multiple threads. This was hard to see earlier because our
|
||
threads were in a loop, and Rust can’t point to different threads in different
|
||
iterations of the loop. Let’s fix the compiler error with a multiple-ownership
|
||
method we discussed in Chapter 15.</p>
|
||
<h4><a class="header" href="#multiple-ownership-with-multiple-threads" id="multiple-ownership-with-multiple-threads">Multiple Ownership with Multiple Threads</a></h4>
|
||
<p>In Chapter 15, we gave a value multiple owners by using the smart pointer
|
||
<code>Rc<T></code> to create a reference counted value. Let’s do the same here and see
|
||
what happens. We’ll wrap the <code>Mutex<T></code> in <code>Rc<T></code> in Listing 16-14 and clone
|
||
the <code>Rc<T></code> before moving ownership to the thread. Now that we’ve seen the
|
||
errors, we’ll also switch back to using the <code>for</code> loop, and we’ll keep the
|
||
<code>move</code> keyword with the closure.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use std::rc::Rc;
|
||
use std::sync::Mutex;
|
||
use std::thread;
|
||
|
||
fn main() {
|
||
let counter = Rc::new(Mutex::new(0));
|
||
let mut handles = vec![];
|
||
|
||
for _ in 0..10 {
|
||
let counter = Rc::clone(&counter);
|
||
let handle = thread::spawn(move || {
|
||
let mut num = counter.lock().unwrap();
|
||
|
||
*num += 1;
|
||
});
|
||
handles.push(handle);
|
||
}
|
||
|
||
for handle in handles {
|
||
handle.join().unwrap();
|
||
}
|
||
|
||
println!("Result: {}", *counter.lock().unwrap());
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 16-14: Attempting to use <code>Rc<T></code> to allow
|
||
multiple threads to own the <code>Mutex<T></code></span></p>
|
||
<p>Once again, we compile and get... different errors! The compiler is teaching us
|
||
a lot.</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `std::rc::Rc<std::sync::Mutex<i32>>:
|
||
std::marker::Send` is not satisfied in `[closure@src/main.rs:11:36:
|
||
15:10 counter:std::rc::Rc<std::sync::Mutex<i32>>]`
|
||
--> src/main.rs:11:22
|
||
|
|
||
11 | let handle = thread::spawn(move || {
|
||
| ^^^^^^^^^^^^^ `std::rc::Rc<std::sync::Mutex<i32>>`
|
||
cannot be sent between threads safely
|
||
|
|
||
= help: within `[closure@src/main.rs:11:36: 15:10
|
||
counter:std::rc::Rc<std::sync::Mutex<i32>>]`, the trait `std::marker::Send` is
|
||
not implemented for `std::rc::Rc<std::sync::Mutex<i32>>`
|
||
= note: required because it appears within the type
|
||
`[closure@src/main.rs:11:36: 15:10 counter:std::rc::Rc<std::sync::Mutex<i32>>]`
|
||
= note: required by `std::thread::spawn`
|
||
</code></pre>
|
||
<p>Wow, that error message is very wordy! Here are some important parts to focus
|
||
on: the first inline error says <code>`std::rc::Rc<std::sync::Mutex<i32>>` cannot be sent between threads safely</code>. The reason for this is in the next important
|
||
part to focus on, the error message. The distilled error message says <code>the trait bound `Send` is not satisfied</code>. We’ll talk about <code>Send</code> in the next
|
||
section: it’s one of the traits that ensures the types we use with threads are
|
||
meant for use in concurrent situations.</p>
|
||
<p>Unfortunately, <code>Rc<T></code> is not safe to share across threads. When <code>Rc<T></code>
|
||
manages the reference count, it adds to the count for each call to <code>clone</code> and
|
||
subtracts from the count when each clone is dropped. But it doesn’t use any
|
||
concurrency primitives to make sure that changes to the count can’t be
|
||
interrupted by another thread. This could lead to wrong counts—subtle bugs that
|
||
could in turn lead to memory leaks or a value being dropped before we’re done
|
||
with it. What we need is a type exactly like <code>Rc<T></code> but one that makes changes
|
||
to the reference count in a thread-safe way.</p>
|
||
<h4><a class="header" href="#atomic-reference-counting-with-arct" id="atomic-reference-counting-with-arct">Atomic Reference Counting with <code>Arc<T></code></a></h4>
|
||
<p>Fortunately, <code>Arc<T></code> <em>is</em> a type like <code>Rc<T></code> that is safe to use in
|
||
concurrent situations. The <em>a</em> stands for <em>atomic</em>, meaning it’s an <em>atomically
|
||
reference counted</em> type. Atomics are an additional kind of concurrency
|
||
primitive that we won’t cover in detail here: see the standard library
|
||
documentation for <code>std::sync::atomic</code> for more details. At this point, you just
|
||
need to know that atomics work like primitive types but are safe to share
|
||
across threads.</p>
|
||
<p>You might then wonder why all primitive types aren’t atomic and why standard
|
||
library types aren’t implemented to use <code>Arc<T></code> by default. The reason is that
|
||
thread safety comes with a performance penalty that you only want to pay when
|
||
you really need to. If you’re just performing operations on values within a
|
||
single thread, your code can run faster if it doesn’t have to enforce the
|
||
guarantees atomics provide.</p>
|
||
<p>Let’s return to our example: <code>Arc<T></code> and <code>Rc<T></code> have the same API, so we fix
|
||
our program by changing the <code>use</code> line, the call to <code>new</code>, and the call to
|
||
<code>clone</code>. The code in Listing 16-15 will finally compile and run:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::sync::{Mutex, Arc};
|
||
use std::thread;
|
||
|
||
fn main() {
|
||
let counter = Arc::new(Mutex::new(0));
|
||
let mut handles = vec![];
|
||
|
||
for _ in 0..10 {
|
||
let counter = Arc::clone(&counter);
|
||
let handle = thread::spawn(move || {
|
||
let mut num = counter.lock().unwrap();
|
||
|
||
*num += 1;
|
||
});
|
||
handles.push(handle);
|
||
}
|
||
|
||
for handle in handles {
|
||
handle.join().unwrap();
|
||
}
|
||
|
||
println!("Result: {}", *counter.lock().unwrap());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 16-15: Using an <code>Arc<T></code> to wrap the <code>Mutex<T></code>
|
||
to be able to share ownership across multiple threads</span></p>
|
||
<p>This code will print the following:</p>
|
||
<pre><code class="language-text">Result: 10
|
||
</code></pre>
|
||
<p>We did it! We counted from 0 to 10, which may not seem very impressive, but it
|
||
did teach us a lot about <code>Mutex<T></code> and thread safety. You could also use this
|
||
program’s structure to do more complicated operations than just incrementing a
|
||
counter. Using this strategy, you can divide a calculation into independent
|
||
parts, split those parts across threads, and then use a <code>Mutex<T></code> to have each
|
||
thread update the final result with its part.</p>
|
||
<h3><a class="header" href="#similarities-between-refcelltrct-and-mutextarct" id="similarities-between-refcelltrct-and-mutextarct">Similarities Between <code>RefCell<T></code>/<code>Rc<T></code> and <code>Mutex<T></code>/<code>Arc<T></code></a></h3>
|
||
<p>You might have noticed that <code>counter</code> is immutable but we could get a mutable
|
||
reference to the value inside it; this means <code>Mutex<T></code> provides interior
|
||
mutability, as the <code>Cell</code> family does. In the same way we used <code>RefCell<T></code> in
|
||
Chapter 15 to allow us to mutate contents inside an <code>Rc<T></code>, we use <code>Mutex<T></code>
|
||
to mutate contents inside an <code>Arc<T></code>.</p>
|
||
<p>Another detail to note is that Rust can’t protect you from all kinds of logic
|
||
errors when you use <code>Mutex<T></code>. Recall in Chapter 15 that using <code>Rc<T></code> came
|
||
with the risk of creating reference cycles, where two <code>Rc<T></code> values refer to
|
||
each other, causing memory leaks. Similarly, <code>Mutex<T></code> comes with the risk of
|
||
creating <em>deadlocks</em>. These occur when an operation needs to lock two resources
|
||
and two threads have each acquired one of the locks, causing them to wait for
|
||
each other forever. If you’re interested in deadlocks, try creating a Rust
|
||
program that has a deadlock; then research deadlock mitigation strategies for
|
||
mutexes in any language and have a go at implementing them in Rust. The
|
||
standard library API documentation for <code>Mutex<T></code> and <code>MutexGuard</code> offers
|
||
useful information.</p>
|
||
<p>We’ll round out this chapter by talking about the <code>Send</code> and <code>Sync</code> traits and
|
||
how we can use them with custom types.</p>
|
||
<h2><a class="header" href="#extensible-concurrency-with-the-sync-and-send-traits" id="extensible-concurrency-with-the-sync-and-send-traits">Extensible Concurrency with the <code>Sync</code> and <code>Send</code> Traits</a></h2>
|
||
<p>Interestingly, the Rust language has <em>very</em> few concurrency features. Almost
|
||
every concurrency feature we’ve talked about so far in this chapter has been
|
||
part of the standard library, not the language. Your options for handling
|
||
concurrency are not limited to the language or the standard library; you can
|
||
write your own concurrency features or use those written by others.</p>
|
||
<p>However, two concurrency concepts are embedded in the language: the
|
||
<code>std::marker</code> traits <code>Sync</code> and <code>Send</code>.</p>
|
||
<h3><a class="header" href="#allowing-transference-of-ownership-between-threads-with-send" id="allowing-transference-of-ownership-between-threads-with-send">Allowing Transference of Ownership Between Threads with <code>Send</code></a></h3>
|
||
<p>The <code>Send</code> marker trait indicates that ownership of the type implementing
|
||
<code>Send</code> can be transferred between threads. Almost every Rust type is <code>Send</code>,
|
||
but there are some exceptions, including <code>Rc<T></code>: this cannot be <code>Send</code> because
|
||
if you cloned an <code>Rc<T></code> value and tried to transfer ownership of the clone to
|
||
another thread, both threads might update the reference count at the same time.
|
||
For this reason, <code>Rc<T></code> is implemented for use in single-threaded situations
|
||
where you don’t want to pay the thread-safe performance penalty.</p>
|
||
<p>Therefore, Rust’s type system and trait bounds ensure that you can never
|
||
accidentally send an <code>Rc<T></code> value across threads unsafely. When we tried to do
|
||
this in Listing 16-14, we got the error <code>the trait Send is not implemented for Rc<Mutex<i32>></code>. When we switched to <code>Arc<T></code>, which is <code>Send</code>, the code
|
||
compiled.</p>
|
||
<p>Any type composed entirely of <code>Send</code> types is automatically marked as <code>Send</code> as
|
||
well. Almost all primitive types are <code>Send</code>, aside from raw pointers, which
|
||
we’ll discuss in Chapter 19.</p>
|
||
<h3><a class="header" href="#allowing-access-from-multiple-threads-with-sync" id="allowing-access-from-multiple-threads-with-sync">Allowing Access from Multiple Threads with <code>Sync</code></a></h3>
|
||
<p>The <code>Sync</code> marker trait indicates that it is safe for the type implementing
|
||
<code>Sync</code> to be referenced from multiple threads. In other words, any type <code>T</code> is
|
||
<code>Sync</code> if <code>&T</code> (a reference to <code>T</code>) is <code>Send</code>, meaning the reference can be
|
||
sent safely to another thread. Similar to <code>Send</code>, primitive types are <code>Sync</code>,
|
||
and types composed entirely of types that are <code>Sync</code> are also <code>Sync</code>.</p>
|
||
<p>The smart pointer <code>Rc<T></code> is also not <code>Sync</code> for the same reasons that it’s not
|
||
<code>Send</code>. The <code>RefCell<T></code> type (which we talked about in Chapter 15) and the
|
||
family of related <code>Cell<T></code> types are not <code>Sync</code>. The implementation of borrow
|
||
checking that <code>RefCell<T></code> does at runtime is not thread-safe. The smart
|
||
pointer <code>Mutex<T></code> is <code>Sync</code> and can be used to share access with multiple
|
||
threads as you saw in the <a href="ch16-03-shared-state.html#sharing-a-mutext-between-multiple-threads">“Sharing a <code>Mutex<T></code> Between Multiple
|
||
Threads”</a><!-- ignore --> section.</p>
|
||
<h3><a class="header" href="#implementing-send-and-sync-manually-is-unsafe" id="implementing-send-and-sync-manually-is-unsafe">Implementing <code>Send</code> and <code>Sync</code> Manually Is Unsafe</a></h3>
|
||
<p>Because types that are made up of <code>Send</code> and <code>Sync</code> traits are automatically
|
||
also <code>Send</code> and <code>Sync</code>, we don’t have to implement those traits manually. As
|
||
marker traits, they don’t even have any methods to implement. They’re just
|
||
useful for enforcing invariants related to concurrency.</p>
|
||
<p>Manually implementing these traits involves implementing unsafe Rust code.
|
||
We’ll talk about using unsafe Rust code in Chapter 19; for now, the important
|
||
information is that building new concurrent types not made up of <code>Send</code> and
|
||
<code>Sync</code> parts requires careful thought to uphold the safety guarantees.
|
||
<a href="https://doc.rust-lang.org/stable/nomicon/">The Rustonomicon</a> has more information about these guarantees and how to
|
||
uphold them.</p>
|
||
<h2><a class="header" href="#summary-15" id="summary-15">Summary</a></h2>
|
||
<p>This isn’t the last you’ll see of concurrency in this book: the project in
|
||
Chapter 20 will use the concepts in this chapter in a more realistic situation
|
||
than the smaller examples discussed here.</p>
|
||
<p>As mentioned earlier, because very little of how Rust handles concurrency is
|
||
part of the language, many concurrency solutions are implemented as crates.
|
||
These evolve more quickly than the standard library, so be sure to search
|
||
online for the current, state-of-the-art crates to use in multithreaded
|
||
situations.</p>
|
||
<p>The Rust standard library provides channels for message passing and smart
|
||
pointer types, such as <code>Mutex<T></code> and <code>Arc<T></code>, that are safe to use in
|
||
concurrent contexts. The type system and the borrow checker ensure that the
|
||
code using these solutions won’t end up with data races or invalid references.
|
||
Once you get your code to compile, you can rest assured that it will happily
|
||
run on multiple threads without the kinds of hard-to-track-down bugs common in
|
||
other languages. Concurrent programming is no longer a concept to be afraid of:
|
||
go forth and make your programs concurrent, fearlessly!</p>
|
||
<p>Next, we’ll talk about idiomatic ways to model problems and structure solutions
|
||
as your Rust programs get bigger. In addition, we’ll discuss how Rust’s idioms
|
||
relate to those you might be familiar with from object-oriented programming.</p>
|
||
<h1><a class="header" href="#object-oriented-programming-features-of-rust" id="object-oriented-programming-features-of-rust">Object Oriented Programming Features of Rust</a></h1>
|
||
<p>Object-oriented programming (OOP) is a way of modeling programs. Objects came
|
||
from Simula in the 1960s. Those objects influenced Alan Kay’s programming
|
||
architecture in which objects pass messages to each other. He coined the term
|
||
<em>object-oriented programming</em> in 1967 to describe this architecture. Many
|
||
competing definitions describe what OOP is; some definitions would classify
|
||
Rust as object oriented, but other definitions would not. In this chapter,
|
||
we’ll explore certain characteristics that are commonly considered object
|
||
oriented and how those characteristics translate to idiomatic Rust. We’ll then
|
||
show you how to implement an object-oriented design pattern in Rust and discuss
|
||
the trade-offs of doing so versus implementing a solution using some of Rust’s
|
||
strengths instead.</p>
|
||
<h2><a class="header" href="#characteristics-of-object-oriented-languages" id="characteristics-of-object-oriented-languages">Characteristics of Object-Oriented Languages</a></h2>
|
||
<p>There is no consensus in the programming community about what features a
|
||
language must have to be considered object oriented. Rust is influenced by many
|
||
programming paradigms, including OOP; for example, we explored the features
|
||
that came from functional programming in Chapter 13. Arguably, OOP languages
|
||
share certain common characteristics, namely objects, encapsulation, and
|
||
inheritance. Let’s look at what each of those characteristics means and whether
|
||
Rust supports it.</p>
|
||
<h3><a class="header" href="#objects-contain-data-and-behavior" id="objects-contain-data-and-behavior">Objects Contain Data and Behavior</a></h3>
|
||
<p>The book <em>Design Patterns: Elements of Reusable Object-Oriented Software</em> by
|
||
Erich Gamma, Richard Helm, Ralph Johnson, and John Vlissides (Addison-Wesley
|
||
Professional, 1994) colloquially referred to as <em>The Gang of Four</em> book, is a
|
||
catalog of object-oriented design patterns. It defines OOP this way:</p>
|
||
<blockquote>
|
||
<p>Object-oriented programs are made up of objects. An <em>object</em> packages both
|
||
data and the procedures that operate on that data. The procedures are
|
||
typically called <em>methods</em> or <em>operations</em>.</p>
|
||
</blockquote>
|
||
<p>Using this definition, Rust is object oriented: structs and enums have data,
|
||
and <code>impl</code> blocks provide methods on structs and enums. Even though structs and
|
||
enums with methods aren’t <em>called</em> objects, they provide the same
|
||
functionality, according to the Gang of Four’s definition of objects.</p>
|
||
<h3><a class="header" href="#encapsulation-that-hides-implementation-details" id="encapsulation-that-hides-implementation-details">Encapsulation that Hides Implementation Details</a></h3>
|
||
<p>Another aspect commonly associated with OOP is the idea of <em>encapsulation</em>,
|
||
which means that the implementation details of an object aren’t accessible to
|
||
code using that object. Therefore, the only way to interact with an object is
|
||
through its public API; code using the object shouldn’t be able to reach into
|
||
the object’s internals and change data or behavior directly. This enables the
|
||
programmer to change and refactor an object’s internals without needing to
|
||
change the code that uses the object.</p>
|
||
<p>We discussed how to control encapsulation in Chapter 7: we can use the <code>pub</code>
|
||
keyword to decide which modules, types, functions, and methods in our code
|
||
should be public, and by default everything else is private. For example, we
|
||
can define a struct <code>AveragedCollection</code> that has a field containing a vector
|
||
of <code>i32</code> values. The struct can also have a field that contains the average of
|
||
the values in the vector, meaning the average doesn’t have to be computed
|
||
on demand whenever anyone needs it. In other words, <code>AveragedCollection</code> will
|
||
cache the calculated average for us. Listing 17-1 has the definition of the
|
||
<code>AveragedCollection</code> struct:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct AveragedCollection {
|
||
list: Vec<i32>,
|
||
average: f64,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-1: An <code>AveragedCollection</code> struct that
|
||
maintains a list of integers and the average of the items in the
|
||
collection</span></p>
|
||
<p>The struct is marked <code>pub</code> so that other code can use it, but the fields within
|
||
the struct remain private. This is important in this case because we want to
|
||
ensure that whenever a value is added or removed from the list, the average is
|
||
also updated. We do this by implementing <code>add</code>, <code>remove</code>, and <code>average</code> methods
|
||
on the struct, as shown in Listing 17-2:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct AveragedCollection {
|
||
</span><span class="boring"> list: Vec<i32>,
|
||
</span><span class="boring"> average: f64,
|
||
</span><span class="boring">}
|
||
</span>impl AveragedCollection {
|
||
pub fn add(&mut self, value: i32) {
|
||
self.list.push(value);
|
||
self.update_average();
|
||
}
|
||
|
||
pub fn remove(&mut self) -> Option<i32> {
|
||
let result = self.list.pop();
|
||
match result {
|
||
Some(value) => {
|
||
self.update_average();
|
||
Some(value)
|
||
},
|
||
None => None,
|
||
}
|
||
}
|
||
|
||
pub fn average(&self) -> f64 {
|
||
self.average
|
||
}
|
||
|
||
fn update_average(&mut self) {
|
||
let total: i32 = self.list.iter().sum();
|
||
self.average = total as f64 / self.list.len() as f64;
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-2: Implementations of the public methods
|
||
<code>add</code>, <code>remove</code>, and <code>average</code> on <code>AveragedCollection</code></span></p>
|
||
<p>The public methods <code>add</code>, <code>remove</code>, and <code>average</code> are the only ways to access
|
||
or modify data in an instance of <code>AveragedCollection</code>. When an item is added
|
||
to <code>list</code> using the <code>add</code> method or removed using the <code>remove</code> method, the
|
||
implementations of each call the private <code>update_average</code> method that handles
|
||
updating the <code>average</code> field as well.</p>
|
||
<p>We leave the <code>list</code> and <code>average</code> fields private so there is no way for
|
||
external code to add or remove items to the <code>list</code> field directly; otherwise,
|
||
the <code>average</code> field might become out of sync when the <code>list</code> changes. The
|
||
<code>average</code> method returns the value in the <code>average</code> field, allowing external
|
||
code to read the <code>average</code> but not modify it.</p>
|
||
<p>Because we’ve encapsulated the implementation details of the struct
|
||
<code>AveragedCollection</code>, we can easily change aspects, such as the data structure,
|
||
in the future. For instance, we could use a <code>HashSet<i32></code> instead of a
|
||
<code>Vec<i32></code> for the <code>list</code> field. As long as the signatures of the <code>add</code>,
|
||
<code>remove</code>, and <code>average</code> public methods stay the same, code using
|
||
<code>AveragedCollection</code> wouldn’t need to change. If we made <code>list</code> public instead,
|
||
this wouldn’t necessarily be the case: <code>HashSet<i32></code> and <code>Vec<i32></code> have
|
||
different methods for adding and removing items, so the external code would
|
||
likely have to change if it were modifying <code>list</code> directly.</p>
|
||
<p>If encapsulation is a required aspect for a language to be considered object
|
||
oriented, then Rust meets that requirement. The option to use <code>pub</code> or not for
|
||
different parts of code enables encapsulation of implementation details.</p>
|
||
<h3><a class="header" href="#inheritance-as-a-type-system-and-as-code-sharing" id="inheritance-as-a-type-system-and-as-code-sharing">Inheritance as a Type System and as Code Sharing</a></h3>
|
||
<p><em>Inheritance</em> is a mechanism whereby an object can inherit from another
|
||
object’s definition, thus gaining the parent object’s data and behavior without
|
||
you having to define them again.</p>
|
||
<p>If a language must have inheritance to be an object-oriented language, then
|
||
Rust is not one. There is no way to define a struct that inherits the parent
|
||
struct’s fields and method implementations. However, if you’re used to having
|
||
inheritance in your programming toolbox, you can use other solutions in Rust,
|
||
depending on your reason for reaching for inheritance in the first place.</p>
|
||
<p>You choose inheritance for two main reasons. One is for reuse of code: you can
|
||
implement particular behavior for one type, and inheritance enables you to
|
||
reuse that implementation for a different type. You can share Rust code using
|
||
default trait method implementations instead, which you saw in Listing 10-14
|
||
when we added a default implementation of the <code>summarize</code> method on the
|
||
<code>Summary</code> trait. Any type implementing the <code>Summary</code> trait would have the
|
||
<code>summarize</code> method available on it without any further code. This is similar to
|
||
a parent class having an implementation of a method and an inheriting child
|
||
class also having the implementation of the method. We can also override the
|
||
default implementation of the <code>summarize</code> method when we implement the
|
||
<code>Summary</code> trait, which is similar to a child class overriding the
|
||
implementation of a method inherited from a parent class.</p>
|
||
<p>The other reason to use inheritance relates to the type system: to enable a
|
||
child type to be used in the same places as the parent type. This is also
|
||
called <em>polymorphism</em>, which means that you can substitute multiple objects for
|
||
each other at runtime if they share certain characteristics.</p>
|
||
<blockquote>
|
||
<h3><a class="header" href="#polymorphism" id="polymorphism">Polymorphism</a></h3>
|
||
<p>To many people, polymorphism is synonymous with inheritance. But it’s
|
||
actually a more general concept that refers to code that can work with data
|
||
of multiple types. For inheritance, those types are generally subclasses.</p>
|
||
<p>Rust instead uses generics to abstract over different possible types and
|
||
trait bounds to impose constraints on what those types must provide. This is
|
||
sometimes called <em>bounded parametric polymorphism</em>.</p>
|
||
</blockquote>
|
||
<p>Inheritance has recently fallen out of favor as a programming design solution
|
||
in many programming languages because it’s often at risk of sharing more code
|
||
than necessary. Subclasses shouldn’t always share all characteristics of their
|
||
parent class but will do so with inheritance. This can make a program’s design
|
||
less flexible. It also introduces the possibility of calling methods on
|
||
subclasses that don’t make sense or that cause errors because the methods don’t
|
||
apply to the subclass. In addition, some languages will only allow a subclass
|
||
to inherit from one class, further restricting the flexibility of a program’s
|
||
design.</p>
|
||
<p>For these reasons, Rust takes a different approach, using trait objects instead
|
||
of inheritance. Let’s look at how trait objects enable polymorphism in Rust.</p>
|
||
<h2><a class="header" href="#using-trait-objects-that-allow-for-values-of-different-types" id="using-trait-objects-that-allow-for-values-of-different-types">Using Trait Objects That Allow for Values of Different Types</a></h2>
|
||
<p>In Chapter 8, we mentioned that one limitation of vectors is that they can
|
||
store elements of only one type. We created a workaround in Listing 8-10 where
|
||
we defined a <code>SpreadsheetCell</code> enum that had variants to hold integers, floats,
|
||
and text. This meant we could store different types of data in each cell and
|
||
still have a vector that represented a row of cells. This is a perfectly good
|
||
solution when our interchangeable items are a fixed set of types that we know
|
||
when our code is compiled.</p>
|
||
<p>However, sometimes we want our library user to be able to extend the set of
|
||
types that are valid in a particular situation. To show how we might achieve
|
||
this, we’ll create an example graphical user interface (GUI) tool that iterates
|
||
through a list of items, calling a <code>draw</code> method on each one to draw it to the
|
||
screen—a common technique for GUI tools. We’ll create a library crate called
|
||
<code>gui</code> that contains the structure of a GUI library. This crate might include
|
||
some types for people to use, such as <code>Button</code> or <code>TextField</code>. In addition,
|
||
<code>gui</code> users will want to create their own types that can be drawn: for
|
||
instance, one programmer might add an <code>Image</code> and another might add a
|
||
<code>SelectBox</code>.</p>
|
||
<p>We won’t implement a fully fledged GUI library for this example but will show
|
||
how the pieces would fit together. At the time of writing the library, we can’t
|
||
know and define all the types other programmers might want to create. But we do
|
||
know that <code>gui</code> needs to keep track of many values of different types, and it
|
||
needs to call a <code>draw</code> method on each of these differently typed values. It
|
||
doesn’t need to know exactly what will happen when we call the <code>draw</code> method,
|
||
just that the value will have that method available for us to call.</p>
|
||
<p>To do this in a language with inheritance, we might define a class named
|
||
<code>Component</code> that has a method named <code>draw</code> on it. The other classes, such as
|
||
<code>Button</code>, <code>Image</code>, and <code>SelectBox</code>, would inherit from <code>Component</code> and thus
|
||
inherit the <code>draw</code> method. They could each override the <code>draw</code> method to define
|
||
their custom behavior, but the framework could treat all of the types as if
|
||
they were <code>Component</code> instances and call <code>draw</code> on them. But because Rust
|
||
doesn’t have inheritance, we need another way to structure the <code>gui</code> library to
|
||
allow users to extend it with new types.</p>
|
||
<h3><a class="header" href="#defining-a-trait-for-common-behavior" id="defining-a-trait-for-common-behavior">Defining a Trait for Common Behavior</a></h3>
|
||
<p>To implement the behavior we want <code>gui</code> to have, we’ll define a trait named
|
||
<code>Draw</code> that will have one method named <code>draw</code>. Then we can define a vector that
|
||
takes a <em>trait object</em>. A trait object points to both an instance of a type
|
||
implementing our specified trait as well as a table used to look up trait
|
||
methods on that type at runtime. We create a trait object by specifying some
|
||
sort of pointer, such as a <code>&</code> reference or a <code>Box<T></code> smart pointer, then the
|
||
<code>dyn</code> keyword, and then specifying the relevant trait. (We’ll talk about the
|
||
reason trait objects must use a pointer in Chapter 19 in the section
|
||
<a href="ch19-04-advanced-types.html#dynamically-sized-types-and-the-sized-trait">“Dynamically Sized Types and the <code>Sized</code> Trait.”</a><!--
|
||
ignore -->) We can use trait objects in place of a generic or concrete type.
|
||
Wherever we use a trait object, Rust’s type system will ensure at compile time
|
||
that any value used in that context will implement the trait object’s trait.
|
||
Consequently, we don’t need to know all the possible types at compile time.</p>
|
||
<p>We’ve mentioned that in Rust, we refrain from calling structs and enums
|
||
“objects” to distinguish them from other languages’ objects. In a struct or
|
||
enum, the data in the struct fields and the behavior in <code>impl</code> blocks are
|
||
separated, whereas in other languages, the data and behavior combined into one
|
||
concept is often labeled an object. However, trait objects <em>are</em> more like
|
||
objects in other languages in the sense that they combine data and behavior.
|
||
But trait objects differ from traditional objects in that we can’t add data to
|
||
a trait object. Trait objects aren’t as generally useful as objects in other
|
||
languages: their specific purpose is to allow abstraction across common
|
||
behavior.</p>
|
||
<p>Listing 17-3 shows how to define a trait named <code>Draw</code> with one method named
|
||
<code>draw</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Draw {
|
||
fn draw(&self);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-3: Definition of the <code>Draw</code> trait</span></p>
|
||
<p>This syntax should look familiar from our discussions on how to define traits
|
||
in Chapter 10. Next comes some new syntax: Listing 17-4 defines a struct named
|
||
<code>Screen</code> that holds a vector named <code>components</code>. This vector is of type
|
||
<code>Box<dyn Draw></code>, which is a trait object; it’s a stand-in for any type inside
|
||
a <code>Box</code> that implements the <code>Draw</code> trait.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub trait Draw {
|
||
</span><span class="boring"> fn draw(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>pub struct Screen {
|
||
pub components: Vec<Box<dyn Draw>>,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-4: Definition of the <code>Screen</code> struct with a
|
||
<code>components</code> field holding a vector of trait objects that implement the <code>Draw</code>
|
||
trait</span></p>
|
||
<p>On the <code>Screen</code> struct, we’ll define a method named <code>run</code> that will call the
|
||
<code>draw</code> method on each of its <code>components</code>, as shown in Listing 17-5:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub trait Draw {
|
||
</span><span class="boring"> fn draw(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub struct Screen {
|
||
</span><span class="boring"> pub components: Vec<Box<dyn Draw>>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Screen {
|
||
pub fn run(&self) {
|
||
for component in self.components.iter() {
|
||
component.draw();
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-5: A <code>run</code> method on <code>Screen</code> that calls the
|
||
<code>draw</code> method on each component</span></p>
|
||
<p>This works differently from defining a struct that uses a generic type
|
||
parameter with trait bounds. A generic type parameter can only be substituted
|
||
with one concrete type at a time, whereas trait objects allow for multiple
|
||
concrete types to fill in for the trait object at runtime. For example, we
|
||
could have defined the <code>Screen</code> struct using a generic type and a trait bound
|
||
as in Listing 17-6:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub trait Draw {
|
||
</span><span class="boring"> fn draw(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>pub struct Screen<T: Draw> {
|
||
pub components: Vec<T>,
|
||
}
|
||
|
||
impl<T> Screen<T>
|
||
where T: Draw {
|
||
pub fn run(&self) {
|
||
for component in self.components.iter() {
|
||
component.draw();
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-6: An alternate implementation of the <code>Screen</code>
|
||
struct and its <code>run</code> method using generics and trait bounds</span></p>
|
||
<p>This restricts us to a <code>Screen</code> instance that has a list of components all of
|
||
type <code>Button</code> or all of type <code>TextField</code>. If you’ll only ever have homogeneous
|
||
collections, using generics and trait bounds is preferable because the
|
||
definitions will be monomorphized at compile time to use the concrete types.</p>
|
||
<p>On the other hand, with the method using trait objects, one <code>Screen</code> instance
|
||
can hold a <code>Vec<T></code> that contains a <code>Box<Button></code> as well as a
|
||
<code>Box<TextField></code>. Let’s look at how this works, and then we’ll talk about the
|
||
runtime performance implications.</p>
|
||
<h3><a class="header" href="#implementing-the-trait" id="implementing-the-trait">Implementing the Trait</a></h3>
|
||
<p>Now we’ll add some types that implement the <code>Draw</code> trait. We’ll provide the
|
||
<code>Button</code> type. Again, actually implementing a GUI library is beyond the scope
|
||
of this book, so the <code>draw</code> method won’t have any useful implementation in its
|
||
body. To imagine what the implementation might look like, a <code>Button</code> struct
|
||
might have fields for <code>width</code>, <code>height</code>, and <code>label</code>, as shown in Listing 17-7:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub trait Draw {
|
||
</span><span class="boring"> fn draw(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>pub struct Button {
|
||
pub width: u32,
|
||
pub height: u32,
|
||
pub label: String,
|
||
}
|
||
|
||
impl Draw for Button {
|
||
fn draw(&self) {
|
||
// code to actually draw a button
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-7: A <code>Button</code> struct that implements the
|
||
<code>Draw</code> trait</span></p>
|
||
<p>The <code>width</code>, <code>height</code>, and <code>label</code> fields on <code>Button</code> will differ from the
|
||
fields on other components, such as a <code>TextField</code> type, that might have those
|
||
fields plus a <code>placeholder</code> field instead. Each of the types we want to draw on
|
||
the screen will implement the <code>Draw</code> trait but will use different code in the
|
||
<code>draw</code> method to define how to draw that particular type, as <code>Button</code> has here
|
||
(without the actual GUI code, which is beyond the scope of this chapter). The
|
||
<code>Button</code> type, for instance, might have an additional <code>impl</code> block containing
|
||
methods related to what happens when a user clicks the button. These kinds of
|
||
methods won’t apply to types like <code>TextField</code>.</p>
|
||
<p>If someone using our library decides to implement a <code>SelectBox</code> struct that has
|
||
<code>width</code>, <code>height</code>, and <code>options</code> fields, they implement the <code>Draw</code> trait on the
|
||
<code>SelectBox</code> type as well, as shown in Listing 17-8:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use gui::Draw;
|
||
|
||
struct SelectBox {
|
||
width: u32,
|
||
height: u32,
|
||
options: Vec<String>,
|
||
}
|
||
|
||
impl Draw for SelectBox {
|
||
fn draw(&self) {
|
||
// code to actually draw a select box
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 17-8: Another crate using <code>gui</code> and implementing
|
||
the <code>Draw</code> trait on a <code>SelectBox</code> struct</span></p>
|
||
<p>Our library’s user can now write their <code>main</code> function to create a <code>Screen</code>
|
||
instance. To the <code>Screen</code> instance, they can add a <code>SelectBox</code> and a <code>Button</code>
|
||
by putting each in a <code>Box<T></code> to become a trait object. They can then call the
|
||
<code>run</code> method on the <code>Screen</code> instance, which will call <code>draw</code> on each of the
|
||
components. Listing 17-9 shows this implementation:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use gui::{Screen, Button};
|
||
|
||
fn main() {
|
||
let screen = Screen {
|
||
components: vec![
|
||
Box::new(SelectBox {
|
||
width: 75,
|
||
height: 10,
|
||
options: vec![
|
||
String::from("Yes"),
|
||
String::from("Maybe"),
|
||
String::from("No")
|
||
],
|
||
}),
|
||
Box::new(Button {
|
||
width: 50,
|
||
height: 10,
|
||
label: String::from("OK"),
|
||
}),
|
||
],
|
||
};
|
||
|
||
screen.run();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 17-9: Using trait objects to store values of
|
||
different types that implement the same trait</span></p>
|
||
<p>When we wrote the library, we didn’t know that someone might add the
|
||
<code>SelectBox</code> type, but our <code>Screen</code> implementation was able to operate on the
|
||
new type and draw it because <code>SelectBox</code> implements the <code>Draw</code> trait, which
|
||
means it implements the <code>draw</code> method.</p>
|
||
<p>This concept—of being concerned only with the messages a value responds to
|
||
rather than the value’s concrete type—is similar to the concept of <em>duck
|
||
typing</em> in dynamically typed languages: if it walks like a duck and quacks
|
||
like a duck, then it must be a duck! In the implementation of <code>run</code> on <code>Screen</code>
|
||
in Listing 17-5, <code>run</code> doesn’t need to know what the concrete type of each
|
||
component is. It doesn’t check whether a component is an instance of a <code>Button</code>
|
||
or a <code>SelectBox</code>, it just calls the <code>draw</code> method on the component. By
|
||
specifying <code>Box<dyn Draw></code> as the type of the values in the <code>components</code>
|
||
vector, we’ve defined <code>Screen</code> to need values that we can call the <code>draw</code>
|
||
method on.</p>
|
||
<p>The advantage of using trait objects and Rust’s type system to write code
|
||
similar to code using duck typing is that we never have to check whether a
|
||
value implements a particular method at runtime or worry about getting errors
|
||
if a value doesn’t implement a method but we call it anyway. Rust won’t compile
|
||
our code if the values don’t implement the traits that the trait objects need.</p>
|
||
<p>For example, Listing 17-10 shows what happens if we try to create a <code>Screen</code>
|
||
with a <code>String</code> as a component:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">use gui::Screen;
|
||
|
||
fn main() {
|
||
let screen = Screen {
|
||
components: vec![
|
||
Box::new(String::from("Hi")),
|
||
],
|
||
};
|
||
|
||
screen.run();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 17-10: Attempting to use a type that doesn’t
|
||
implement the trait object’s trait</span></p>
|
||
<p>We’ll get this error because <code>String</code> doesn’t implement the <code>Draw</code> trait:</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `std::string::String: gui::Draw` is not satisfied
|
||
--> src/main.rs:7:13
|
||
|
|
||
7 | Box::new(String::from("Hi")),
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the trait gui::Draw is not
|
||
implemented for `std::string::String`
|
||
|
|
||
= note: required for the cast to the object type `gui::Draw`
|
||
</code></pre>
|
||
<p>This error lets us know that either we’re passing something to <code>Screen</code> we
|
||
didn’t mean to pass and we should pass a different type or we should implement
|
||
<code>Draw</code> on <code>String</code> so that <code>Screen</code> is able to call <code>draw</code> on it.</p>
|
||
<h3><a class="header" href="#trait-objects-perform-dynamic-dispatch" id="trait-objects-perform-dynamic-dispatch">Trait Objects Perform Dynamic Dispatch</a></h3>
|
||
<p>Recall in the <a href="ch10-01-syntax.html#performance-of-code-using-generics">“Performance of Code Using
|
||
Generics”</a><!-- ignore --> section in
|
||
Chapter 10 our discussion on the monomorphization process performed by the
|
||
compiler when we use trait bounds on generics: the compiler generates
|
||
nongeneric implementations of functions and methods for each concrete type
|
||
that we use in place of a generic type parameter. The code that results from
|
||
monomorphization is doing <em>static dispatch</em>, which is when the compiler knows
|
||
what method you’re calling at compile time. This is opposed to <em>dynamic
|
||
dispatch</em>, which is when the compiler can’t tell at compile time which method
|
||
you’re calling. In dynamic dispatch cases, the compiler emits code that at
|
||
runtime will figure out which method to call.</p>
|
||
<p>When we use trait objects, Rust must use dynamic dispatch. The compiler doesn’t
|
||
know all the types that might be used with the code that is using trait
|
||
objects, so it doesn’t know which method implemented on which type to call.
|
||
Instead, at runtime, Rust uses the pointers inside the trait object to know
|
||
which method to call. There is a runtime cost when this lookup happens that
|
||
doesn’t occur with static dispatch. Dynamic dispatch also prevents the compiler
|
||
from choosing to inline a method’s code, which in turn prevents some
|
||
optimizations. However, we did get extra flexibility in the code that we wrote
|
||
in Listing 17-5 and were able to support in Listing 17-9, so it’s a trade-off
|
||
to consider.</p>
|
||
<h3><a class="header" href="#object-safety-is-required-for-trait-objects" id="object-safety-is-required-for-trait-objects">Object Safety Is Required for Trait Objects</a></h3>
|
||
<p>You can only make <em>object-safe</em> traits into trait objects. Some complex rules
|
||
govern all the properties that make a trait object safe, but in practice, only
|
||
two rules are relevant. A trait is object safe if all the methods defined in
|
||
the trait have the following properties:</p>
|
||
<ul>
|
||
<li>The return type isn’t <code>Self</code>.</li>
|
||
<li>There are no generic type parameters.</li>
|
||
</ul>
|
||
<p>The <code>Self</code> keyword is an alias for the type we’re implementing the traits or
|
||
methods on. Trait objects must be object safe because once you’ve used a trait
|
||
object, Rust no longer knows the concrete type that’s implementing that trait.
|
||
If a trait method returns the concrete <code>Self</code> type, but a trait object forgets
|
||
the exact type that <code>Self</code> is, there is no way the method can use the original
|
||
concrete type. The same is true of generic type parameters that are filled in
|
||
with concrete type parameters when the trait is used: the concrete types become
|
||
part of the type that implements the trait. When the type is forgotten through
|
||
the use of a trait object, there is no way to know what types to fill in the
|
||
generic type parameters with.</p>
|
||
<p>An example of a trait whose methods are not object safe is the standard
|
||
library’s <code>Clone</code> trait. The signature for the <code>clone</code> method in the <code>Clone</code>
|
||
trait looks like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Clone {
|
||
fn clone(&self) -> Self;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>String</code> type implements the <code>Clone</code> trait, and when we call the <code>clone</code>
|
||
method on an instance of <code>String</code> we get back an instance of <code>String</code>.
|
||
Similarly, if we call <code>clone</code> on an instance of <code>Vec<T></code>, we get back an
|
||
instance of <code>Vec<T></code>. The signature of <code>clone</code> needs to know what type will
|
||
stand in for <code>Self</code>, because that’s the return type.</p>
|
||
<p>The compiler will indicate when you’re trying to do something that violates the
|
||
rules of object safety in regard to trait objects. For example, let’s say we
|
||
tried to implement the <code>Screen</code> struct in Listing 17-4 to hold types that
|
||
implement the <code>Clone</code> trait instead of the <code>Draw</code> trait, like this:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">pub struct Screen {
|
||
pub components: Vec<Box<dyn Clone>>,
|
||
}
|
||
</code></pre>
|
||
<p>We would get this error:</p>
|
||
<pre><code class="language-text">error[E0038]: the trait `std::clone::Clone` cannot be made into an object
|
||
--> src/lib.rs:2:5
|
||
|
|
||
2 | pub components: Vec<Box<dyn Clone>>,
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the trait `std::clone::Clone`
|
||
cannot be made into an object
|
||
|
|
||
= note: the trait cannot require that `Self : Sized`
|
||
</code></pre>
|
||
<p>This error means you can’t use this trait as a trait object in this way. If
|
||
you’re interested in more details on object safety, see <a href="https://github.com/rust-lang/rfcs/blob/master/text/0255-object-safety.md">Rust RFC 255</a>.</p>
|
||
<h2><a class="header" href="#implementing-an-object-oriented-design-pattern" id="implementing-an-object-oriented-design-pattern">Implementing an Object-Oriented Design Pattern</a></h2>
|
||
<p>The <em>state pattern</em> is an object-oriented design pattern. The crux of the
|
||
pattern is that a value has some internal state, which is represented by a set
|
||
of <em>state objects</em>, and the value’s behavior changes based on the internal
|
||
state. The state objects share functionality: in Rust, of course, we use
|
||
structs and traits rather than objects and inheritance. Each state object is
|
||
responsible for its own behavior and for governing when it should change into
|
||
another state. The value that holds a state object knows nothing about the
|
||
different behavior of the states or when to transition between states.</p>
|
||
<p>Using the state pattern means when the business requirements of the program
|
||
change, we won’t need to change the code of the value holding the state or the
|
||
code that uses the value. We’ll only need to update the code inside one of the
|
||
state objects to change its rules or perhaps add more state objects. Let’s look
|
||
at an example of the state design pattern and how to use it in Rust.</p>
|
||
<p>We’ll implement a blog post workflow in an incremental way. The blog’s final
|
||
functionality will look like this:</p>
|
||
<ol>
|
||
<li>A blog post starts as an empty draft.</li>
|
||
<li>When the draft is done, a review of the post is requested.</li>
|
||
<li>When the post is approved, it gets published.</li>
|
||
<li>Only published blog posts return content to print, so unapproved posts can’t
|
||
accidentally be published.</li>
|
||
</ol>
|
||
<p>Any other changes attempted on a post should have no effect. For example, if we
|
||
try to approve a draft blog post before we’ve requested a review, the post
|
||
should remain an unpublished draft.</p>
|
||
<p>Listing 17-11 shows this workflow in code form: this is an example usage of the
|
||
API we’ll implement in a library crate named <code>blog</code>. This won’t compile yet
|
||
because we haven’t implemented the <code>blog</code> crate yet.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use blog::Post;
|
||
|
||
fn main() {
|
||
let mut post = Post::new();
|
||
|
||
post.add_text("I ate a salad for lunch today");
|
||
assert_eq!("", post.content());
|
||
|
||
post.request_review();
|
||
assert_eq!("", post.content());
|
||
|
||
post.approve();
|
||
assert_eq!("I ate a salad for lunch today", post.content());
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 17-11: Code that demonstrates the desired
|
||
behavior we want our <code>blog</code> crate to have</span></p>
|
||
<p>We want to allow the user to create a new draft blog post with <code>Post::new</code>.
|
||
Then we want to allow text to be added to the blog post while it’s in the draft
|
||
state. If we try to get the post’s content immediately, before approval,
|
||
nothing should happen because the post is still a draft. We’ve added
|
||
<code>assert_eq!</code> in the code for demonstration purposes. An excellent unit test for
|
||
this would be to assert that a draft blog post returns an empty string from the
|
||
<code>content</code> method, but we’re not going to write tests for this example.</p>
|
||
<p>Next, we want to enable a request for a review of the post, and we want
|
||
<code>content</code> to return an empty string while waiting for the review. When the post
|
||
receives approval, it should get published, meaning the text of the post will
|
||
be returned when <code>content</code> is called.</p>
|
||
<p>Notice that the only type we’re interacting with from the crate is the <code>Post</code>
|
||
type. This type will use the state pattern and will hold a value that will be
|
||
one of three state objects representing the various states a post can be
|
||
in—draft, waiting for review, or published. Changing from one state to another
|
||
will be managed internally within the <code>Post</code> type. The states change in
|
||
response to the methods called by our library’s users on the <code>Post</code> instance,
|
||
but they don’t have to manage the state changes directly. Also, users can’t
|
||
make a mistake with the states, like publishing a post before it’s reviewed.</p>
|
||
<h3><a class="header" href="#defining-post-and-creating-a-new-instance-in-the-draft-state" id="defining-post-and-creating-a-new-instance-in-the-draft-state">Defining <code>Post</code> and Creating a New Instance in the Draft State</a></h3>
|
||
<p>Let’s get started on the implementation of the library! We know we need a
|
||
public <code>Post</code> struct that holds some content, so we’ll start with the
|
||
definition of the struct and an associated public <code>new</code> function to create an
|
||
instance of <code>Post</code>, as shown in Listing 17-12. We’ll also make a private
|
||
<code>State</code> trait. Then <code>Post</code> will hold a trait object of <code>Box<dyn State></code>
|
||
inside an <code>Option<T></code> in a private field named <code>state</code>. You’ll see why the
|
||
<code>Option<T></code> is necessary in a bit.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct Post {
|
||
state: Option<Box<dyn State>>,
|
||
content: String,
|
||
}
|
||
|
||
impl Post {
|
||
pub fn new() -> Post {
|
||
Post {
|
||
state: Some(Box::new(Draft {})),
|
||
content: String::new(),
|
||
}
|
||
}
|
||
}
|
||
|
||
trait State {}
|
||
|
||
struct Draft {}
|
||
|
||
impl State for Draft {}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-12: Definition of a <code>Post</code> struct and a <code>new</code>
|
||
function that creates a new <code>Post</code> instance, a <code>State</code> trait, and a <code>Draft</code>
|
||
struct</span></p>
|
||
<p>The <code>State</code> trait defines the behavior shared by different post states, and the
|
||
<code>Draft</code>, <code>PendingReview</code>, and <code>Published</code> states will all implement the <code>State</code>
|
||
trait. For now, the trait doesn’t have any methods, and we’ll start by defining
|
||
just the <code>Draft</code> state because that is the state we want a post to start in.</p>
|
||
<p>When we create a new <code>Post</code>, we set its <code>state</code> field to a <code>Some</code> value that
|
||
holds a <code>Box</code>. This <code>Box</code> points to a new instance of the <code>Draft</code> struct. This
|
||
ensures whenever we create a new instance of <code>Post</code>, it will start out as a
|
||
draft. Because the <code>state</code> field of <code>Post</code> is private, there is no way to
|
||
create a <code>Post</code> in any other state! In the <code>Post::new</code> function, we set the
|
||
<code>content</code> field to a new, empty <code>String</code>.</p>
|
||
<h3><a class="header" href="#storing-the-text-of-the-post-content" id="storing-the-text-of-the-post-content">Storing the Text of the Post Content</a></h3>
|
||
<p>Listing 17-11 showed that we want to be able to call a method named
|
||
<code>add_text</code> and pass it a <code>&str</code> that is then added to the text content of the
|
||
blog post. We implement this as a method rather than exposing the <code>content</code>
|
||
field as <code>pub</code>. This means we can implement a method later that will control
|
||
how the <code>content</code> field’s data is read. The <code>add_text</code> method is pretty
|
||
straightforward, so let’s add the implementation in Listing 17-13 to the <code>impl Post</code> block:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Post {
|
||
// --snip--
|
||
pub fn add_text(&mut self, text: &str) {
|
||
self.content.push_str(text);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-13: Implementing the <code>add_text</code> method to add
|
||
text to a post’s <code>content</code></span></p>
|
||
<p>The <code>add_text</code> method takes a mutable reference to <code>self</code>, because we’re
|
||
changing the <code>Post</code> instance that we’re calling <code>add_text</code> on. We then call
|
||
<code>push_str</code> on the <code>String</code> in <code>content</code> and pass the <code>text</code> argument to add to
|
||
the saved <code>content</code>. This behavior doesn’t depend on the state the post is in,
|
||
so it’s not part of the state pattern. The <code>add_text</code> method doesn’t interact
|
||
with the <code>state</code> field at all, but it is part of the behavior we want to
|
||
support.</p>
|
||
<h3><a class="header" href="#ensuring-the-content-of-a-draft-post-is-empty" id="ensuring-the-content-of-a-draft-post-is-empty">Ensuring the Content of a Draft Post Is Empty</a></h3>
|
||
<p>Even after we’ve called <code>add_text</code> and added some content to our post, we still
|
||
want the <code>content</code> method to return an empty string slice because the post is
|
||
still in the draft state, as shown on line 7 of Listing 17-11. For now, let’s
|
||
implement the <code>content</code> method with the simplest thing that will fulfill this
|
||
requirement: always returning an empty string slice. We’ll change this later
|
||
once we implement the ability to change a post’s state so it can be published.
|
||
So far, posts can only be in the draft state, so the post content should always
|
||
be empty. Listing 17-14 shows this placeholder implementation:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Post {
|
||
// --snip--
|
||
pub fn content(&self) -> &str {
|
||
""
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-14: Adding a placeholder implementation for
|
||
the <code>content</code> method on <code>Post</code> that always returns an empty string slice</span></p>
|
||
<p>With this added <code>content</code> method, everything in Listing 17-11 up to line 7
|
||
works as intended.</p>
|
||
<h3><a class="header" href="#requesting-a-review-of-the-post-changes-its-state" id="requesting-a-review-of-the-post-changes-its-state">Requesting a Review of the Post Changes Its State</a></h3>
|
||
<p>Next, we need to add functionality to request a review of a post, which should
|
||
change its state from <code>Draft</code> to <code>PendingReview</code>. Listing 17-15 shows this code:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> state: Option<Box<dyn State>>,
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Post {
|
||
// --snip--
|
||
pub fn request_review(&mut self) {
|
||
if let Some(s) = self.state.take() {
|
||
self.state = Some(s.request_review())
|
||
}
|
||
}
|
||
}
|
||
|
||
trait State {
|
||
fn request_review(self: Box<Self>) -> Box<dyn State>;
|
||
}
|
||
|
||
struct Draft {}
|
||
|
||
impl State for Draft {
|
||
fn request_review(self: Box<Self>) -> Box<dyn State> {
|
||
Box::new(PendingReview {})
|
||
}
|
||
}
|
||
|
||
struct PendingReview {}
|
||
|
||
impl State for PendingReview {
|
||
fn request_review(self: Box<Self>) -> Box<dyn State> {
|
||
self
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-15: Implementing <code>request_review</code> methods on
|
||
<code>Post</code> and the <code>State</code> trait</span></p>
|
||
<p>We give <code>Post</code> a public method named <code>request_review</code> that will take a mutable
|
||
reference to <code>self</code>. Then we call an internal <code>request_review</code> method on the
|
||
current state of <code>Post</code>, and this second <code>request_review</code> method consumes the
|
||
current state and returns a new state.</p>
|
||
<p>We’ve added the <code>request_review</code> method to the <code>State</code> trait; all types that
|
||
implement the trait will now need to implement the <code>request_review</code> method.
|
||
Note that rather than having <code>self</code>, <code>&self</code>, or <code>&mut self</code> as the first
|
||
parameter of the method, we have <code>self: Box<Self></code>. This syntax means the
|
||
method is only valid when called on a <code>Box</code> holding the type. This syntax takes
|
||
ownership of <code>Box<Self></code>, invalidating the old state so the state value of the
|
||
<code>Post</code> can transform into a new state.</p>
|
||
<p>To consume the old state, the <code>request_review</code> method needs to take ownership
|
||
of the state value. This is where the <code>Option</code> in the <code>state</code> field of <code>Post</code>
|
||
comes in: we call the <code>take</code> method to take the <code>Some</code> value out of the <code>state</code>
|
||
field and leave a <code>None</code> in its place, because Rust doesn’t let us have
|
||
unpopulated fields in structs. This lets us move the <code>state</code> value out of
|
||
<code>Post</code> rather than borrowing it. Then we’ll set the post’s <code>state</code> value to the
|
||
result of this operation.</p>
|
||
<p>We need to set <code>state</code> to <code>None</code> temporarily rather than setting it directly
|
||
with code like <code>self.state = self.state.request_review();</code> to get ownership of
|
||
the <code>state</code> value. This ensures <code>Post</code> can’t use the old <code>state</code> value after
|
||
we’ve transformed it into a new state.</p>
|
||
<p>The <code>request_review</code> method on <code>Draft</code> needs to return a new, boxed instance of
|
||
a new <code>PendingReview</code> struct, which represents the state when a post is waiting
|
||
for a review. The <code>PendingReview</code> struct also implements the <code>request_review</code>
|
||
method but doesn’t do any transformations. Rather, it returns itself, because
|
||
when we request a review on a post already in the <code>PendingReview</code> state, it
|
||
should stay in the <code>PendingReview</code> state.</p>
|
||
<p>Now we can start seeing the advantages of the state pattern: the
|
||
<code>request_review</code> method on <code>Post</code> is the same no matter its <code>state</code> value. Each
|
||
state is responsible for its own rules.</p>
|
||
<p>We’ll leave the <code>content</code> method on <code>Post</code> as is, returning an empty string
|
||
slice. We can now have a <code>Post</code> in the <code>PendingReview</code> state as well as in the
|
||
<code>Draft</code> state, but we want the same behavior in the <code>PendingReview</code> state.
|
||
Listing 17-11 now works up to line 10!</p>
|
||
<h3><a class="header" href="#adding-the-approve-method-that-changes-the-behavior-of-content" id="adding-the-approve-method-that-changes-the-behavior-of-content">Adding the <code>approve</code> Method that Changes the Behavior of <code>content</code></a></h3>
|
||
<p>The <code>approve</code> method will be similar to the <code>request_review</code> method: it will
|
||
set <code>state</code> to the value that the current state says it should have when that
|
||
state is approved, as shown in Listing 17-16:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> state: Option<Box<dyn State>>,
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Post {
|
||
// --snip--
|
||
pub fn approve(&mut self) {
|
||
if let Some(s) = self.state.take() {
|
||
self.state = Some(s.approve())
|
||
}
|
||
}
|
||
}
|
||
|
||
trait State {
|
||
fn request_review(self: Box<Self>) -> Box<dyn State>;
|
||
fn approve(self: Box<Self>) -> Box<dyn State>;
|
||
}
|
||
|
||
struct Draft {}
|
||
|
||
impl State for Draft {
|
||
<span class="boring"> fn request_review(self: Box<Self>) -> Box<dyn State> {
|
||
</span><span class="boring"> Box::new(PendingReview {})
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span> // --snip--
|
||
fn approve(self: Box<Self>) -> Box<dyn State> {
|
||
self
|
||
}
|
||
}
|
||
|
||
struct PendingReview {}
|
||
|
||
impl State for PendingReview {
|
||
<span class="boring"> fn request_review(self: Box<Self>) -> Box<dyn State> {
|
||
</span><span class="boring"> self
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">
|
||
</span> // --snip--
|
||
fn approve(self: Box<Self>) -> Box<dyn State> {
|
||
Box::new(Published {})
|
||
}
|
||
}
|
||
|
||
struct Published {}
|
||
|
||
impl State for Published {
|
||
fn request_review(self: Box<Self>) -> Box<dyn State> {
|
||
self
|
||
}
|
||
|
||
fn approve(self: Box<Self>) -> Box<dyn State> {
|
||
self
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-16: Implementing the <code>approve</code> method on
|
||
<code>Post</code> and the <code>State</code> trait</span></p>
|
||
<p>We add the <code>approve</code> method to the <code>State</code> trait and add a new struct that
|
||
implements <code>State</code>, the <code>Published</code> state.</p>
|
||
<p>Similar to <code>request_review</code>, if we call the <code>approve</code> method on a <code>Draft</code>, it
|
||
will have no effect because it will return <code>self</code>. When we call <code>approve</code> on
|
||
<code>PendingReview</code>, it returns a new, boxed instance of the <code>Published</code> struct.
|
||
The <code>Published</code> struct implements the <code>State</code> trait, and for both the
|
||
<code>request_review</code> method and the <code>approve</code> method, it returns itself, because
|
||
the post should stay in the <code>Published</code> state in those cases.</p>
|
||
<p>Now we need to update the <code>content</code> method on <code>Post</code>: if the state is
|
||
<code>Published</code>, we want to return the value in the post’s <code>content</code> field;
|
||
otherwise, we want to return an empty string slice, as shown in Listing 17-17:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">trait State {
|
||
</span><span class="boring"> fn content<'a>(&self, post: &'a Post) -> &'a str;
|
||
</span><span class="boring">}
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> state: Option<Box<dyn State>>,
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Post {
|
||
// --snip--
|
||
pub fn content(&self) -> &str {
|
||
self.state.as_ref().unwrap().content(self)
|
||
}
|
||
// --snip--
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-17: Updating the <code>content</code> method on <code>Post</code> to
|
||
delegate to a <code>content</code> method on <code>State</code></span></p>
|
||
<p>Because the goal is to keep all these rules inside the structs that implement
|
||
<code>State</code>, we call a <code>content</code> method on the value in <code>state</code> and pass the post
|
||
instance (that is, <code>self</code>) as an argument. Then we return the value that is
|
||
returned from using the <code>content</code> method on the <code>state</code> value.</p>
|
||
<p>We call the <code>as_ref</code> method on the <code>Option</code> because we want a reference to the
|
||
value inside the <code>Option</code> rather than ownership of the value. Because <code>state</code>
|
||
is an <code>Option<Box<dyn State>></code>, when we call <code>as_ref</code>, an <code>Option<&Box<dyn State>></code> is
|
||
returned. If we didn’t call <code>as_ref</code>, we would get an error because we can’t
|
||
move <code>state</code> out of the borrowed <code>&self</code> of the function parameter.</p>
|
||
<p>We then call the <code>unwrap</code> method, which we know will never panic, because we
|
||
know the methods on <code>Post</code> ensure that <code>state</code> will always contain a <code>Some</code>
|
||
value when those methods are done. This is one of the cases we talked about in
|
||
the <a href="ch09-03-to-panic-or-not-to-panic.html#cases-in-which-you-have-more-information-than-the-compiler">“Cases In Which You Have More Information Than the
|
||
Compiler”</a><!-- ignore --> section of Chapter 9 when we
|
||
know that a <code>None</code> value is never possible, even though the compiler isn’t able
|
||
to understand that.</p>
|
||
<p>At this point, when we call <code>content</code> on the <code>&Box<dyn State></code>, deref coercion will
|
||
take effect on the <code>&</code> and the <code>Box</code> so the <code>content</code> method will ultimately be
|
||
called on the type that implements the <code>State</code> trait. That means we need to add
|
||
<code>content</code> to the <code>State</code> trait definition, and that is where we’ll put the
|
||
logic for what content to return depending on which state we have, as shown in
|
||
Listing 17-18:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> content: String
|
||
</span><span class="boring">}
|
||
</span>trait State {
|
||
// --snip--
|
||
fn content<'a>(&self, post: &'a Post) -> &'a str {
|
||
""
|
||
}
|
||
}
|
||
|
||
// --snip--
|
||
struct Published {}
|
||
|
||
impl State for Published {
|
||
// --snip--
|
||
fn content<'a>(&self, post: &'a Post) -> &'a str {
|
||
&post.content
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-18: Adding the <code>content</code> method to the <code>State</code>
|
||
trait</span></p>
|
||
<p>We add a default implementation for the <code>content</code> method that returns an empty
|
||
string slice. That means we don’t need to implement <code>content</code> on the <code>Draft</code>
|
||
and <code>PendingReview</code> structs. The <code>Published</code> struct will override the <code>content</code>
|
||
method and return the value in <code>post.content</code>.</p>
|
||
<p>Note that we need lifetime annotations on this method, as we discussed in
|
||
Chapter 10. We’re taking a reference to a <code>post</code> as an argument and returning a
|
||
reference to part of that <code>post</code>, so the lifetime of the returned reference is
|
||
related to the lifetime of the <code>post</code> argument.</p>
|
||
<p>And we’re done—all of Listing 17-11 now works! We’ve implemented the state
|
||
pattern with the rules of the blog post workflow. The logic related to the
|
||
rules lives in the state objects rather than being scattered throughout <code>Post</code>.</p>
|
||
<h3><a class="header" href="#trade-offs-of-the-state-pattern" id="trade-offs-of-the-state-pattern">Trade-offs of the State Pattern</a></h3>
|
||
<p>We’ve shown that Rust is capable of implementing the object-oriented state
|
||
pattern to encapsulate the different kinds of behavior a post should have in
|
||
each state. The methods on <code>Post</code> know nothing about the various behaviors. The
|
||
way we organized the code, we have to look in only one place to know the
|
||
different ways a published post can behave: the implementation of the <code>State</code>
|
||
trait on the <code>Published</code> struct.</p>
|
||
<p>If we were to create an alternative implementation that didn’t use the state
|
||
pattern, we might instead use <code>match</code> expressions in the methods on <code>Post</code> or
|
||
even in the <code>main</code> code that checks the state of the post and changes behavior
|
||
in those places. That would mean we would have to look in several places to
|
||
understand all the implications of a post being in the published state! This
|
||
would only increase the more states we added: each of those <code>match</code> expressions
|
||
would need another arm.</p>
|
||
<p>With the state pattern, the <code>Post</code> methods and the places we use <code>Post</code> don’t
|
||
need <code>match</code> expressions, and to add a new state, we would only need to add a
|
||
new struct and implement the trait methods on that one struct.</p>
|
||
<p>The implementation using the state pattern is easy to extend to add more
|
||
functionality. To see the simplicity of maintaining code that uses the state
|
||
pattern, try a few of these suggestions:</p>
|
||
<ul>
|
||
<li>Add a <code>reject</code> method that changes the post’s state from <code>PendingReview</code> back
|
||
to <code>Draft</code>.</li>
|
||
<li>Require two calls to <code>approve</code> before the state can be changed to <code>Published</code>.</li>
|
||
<li>Allow users to add text content only when a post is in the <code>Draft</code> state.
|
||
Hint: have the state object responsible for what might change about the
|
||
content but not responsible for modifying the <code>Post</code>.</li>
|
||
</ul>
|
||
<p>One downside of the state pattern is that, because the states implement the
|
||
transitions between states, some of the states are coupled to each other. If we
|
||
add another state between <code>PendingReview</code> and <code>Published</code>, such as <code>Scheduled</code>,
|
||
we would have to change the code in <code>PendingReview</code> to transition to
|
||
<code>Scheduled</code> instead. It would be less work if <code>PendingReview</code> didn’t need to
|
||
change with the addition of a new state, but that would mean switching to
|
||
another design pattern.</p>
|
||
<p>Another downside is that we’ve duplicated some logic. To eliminate some of the
|
||
duplication, we might try to make default implementations for the
|
||
<code>request_review</code> and <code>approve</code> methods on the <code>State</code> trait that return <code>self</code>;
|
||
however, this would violate object safety, because the trait doesn’t know what
|
||
the concrete <code>self</code> will be exactly. We want to be able to use <code>State</code> as a
|
||
trait object, so we need its methods to be object safe.</p>
|
||
<p>Other duplication includes the similar implementations of the <code>request_review</code>
|
||
and <code>approve</code> methods on <code>Post</code>. Both methods delegate to the implementation of
|
||
the same method on the value in the <code>state</code> field of <code>Option</code> and set the new
|
||
value of the <code>state</code> field to the result. If we had a lot of methods on <code>Post</code>
|
||
that followed this pattern, we might consider defining a macro to eliminate the
|
||
repetition (see the <a href="ch19-06-macros.html#macros">“Macros”</a><!-- ignore --> section in Chapter 19).</p>
|
||
<p>By implementing the state pattern exactly as it’s defined for object-oriented
|
||
languages, we’re not taking as full advantage of Rust’s strengths as we could.
|
||
Let’s look at some changes we can make to the <code>blog</code> crate that can make
|
||
invalid states and transitions into compile time errors.</p>
|
||
<h4><a class="header" href="#encoding-states-and-behavior-as-types" id="encoding-states-and-behavior-as-types">Encoding States and Behavior as Types</a></h4>
|
||
<p>We’ll show you how to rethink the state pattern to get a different set of
|
||
trade-offs. Rather than encapsulating the states and transitions completely so
|
||
outside code has no knowledge of them, we’ll encode the states into different
|
||
types. Consequently, Rust’s type checking system will prevent attempts to use
|
||
draft posts where only published posts are allowed by issuing a compiler error.</p>
|
||
<p>Let’s consider the first part of <code>main</code> in Listing 17-11:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore"><span class="boring">use blog::Post;
|
||
</span>
|
||
fn main() {
|
||
let mut post = Post::new();
|
||
|
||
post.add_text("I ate a salad for lunch today");
|
||
assert_eq!("", post.content());
|
||
}
|
||
</code></pre>
|
||
<p>We still enable the creation of new posts in the draft state using <code>Post::new</code>
|
||
and the ability to add text to the post’s content. But instead of having a
|
||
<code>content</code> method on a draft post that returns an empty string, we’ll make it so
|
||
draft posts don’t have the <code>content</code> method at all. That way, if we try to get
|
||
a draft post’s content, we’ll get a compiler error telling us the method
|
||
doesn’t exist. As a result, it will be impossible for us to accidentally
|
||
display draft post content in production, because that code won’t even compile.
|
||
Listing 17-19 shows the definition of a <code>Post</code> struct and a <code>DraftPost</code> struct,
|
||
as well as methods on each:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct Post {
|
||
content: String,
|
||
}
|
||
|
||
pub struct DraftPost {
|
||
content: String,
|
||
}
|
||
|
||
impl Post {
|
||
pub fn new() -> DraftPost {
|
||
DraftPost {
|
||
content: String::new(),
|
||
}
|
||
}
|
||
|
||
pub fn content(&self) -> &str {
|
||
&self.content
|
||
}
|
||
}
|
||
|
||
impl DraftPost {
|
||
pub fn add_text(&mut self, text: &str) {
|
||
self.content.push_str(text);
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-19: A <code>Post</code> with a <code>content</code> method and a
|
||
<code>DraftPost</code> without a <code>content</code> method</span></p>
|
||
<p>Both the <code>Post</code> and <code>DraftPost</code> structs have a private <code>content</code> field that
|
||
stores the blog post text. The structs no longer have the <code>state</code> field because
|
||
we’re moving the encoding of the state to the types of the structs. The <code>Post</code>
|
||
struct will represent a published post, and it has a <code>content</code> method that
|
||
returns the <code>content</code>.</p>
|
||
<p>We still have a <code>Post::new</code> function, but instead of returning an instance of
|
||
<code>Post</code>, it returns an instance of <code>DraftPost</code>. Because <code>content</code> is private
|
||
and there aren’t any functions that return <code>Post</code>, it’s not possible to create
|
||
an instance of <code>Post</code> right now.</p>
|
||
<p>The <code>DraftPost</code> struct has an <code>add_text</code> method, so we can add text to
|
||
<code>content</code> as before, but note that <code>DraftPost</code> does not have a <code>content</code> method
|
||
defined! So now the program ensures all posts start as draft posts, and draft
|
||
posts don’t have their content available for display. Any attempt to get around
|
||
these constraints will result in a compiler error.</p>
|
||
<h4><a class="header" href="#implementing-transitions-as-transformations-into-different-types" id="implementing-transitions-as-transformations-into-different-types">Implementing Transitions as Transformations into Different Types</a></h4>
|
||
<p>So how do we get a published post? We want to enforce the rule that a draft
|
||
post has to be reviewed and approved before it can be published. A post in the
|
||
pending review state should still not display any content. Let’s implement
|
||
these constraints by adding another struct, <code>PendingReviewPost</code>, defining the
|
||
<code>request_review</code> method on <code>DraftPost</code> to return a <code>PendingReviewPost</code>, and
|
||
defining an <code>approve</code> method on <code>PendingReviewPost</code> to return a <code>Post</code>, as
|
||
shown in Listing 17-20:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct Post {
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">pub struct DraftPost {
|
||
</span><span class="boring"> content: String,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl DraftPost {
|
||
// --snip--
|
||
|
||
pub fn request_review(self) -> PendingReviewPost {
|
||
PendingReviewPost {
|
||
content: self.content,
|
||
}
|
||
}
|
||
}
|
||
|
||
pub struct PendingReviewPost {
|
||
content: String,
|
||
}
|
||
|
||
impl PendingReviewPost {
|
||
pub fn approve(self) -> Post {
|
||
Post {
|
||
content: self.content,
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 17-20: A <code>PendingReviewPost</code> that gets created by
|
||
calling <code>request_review</code> on <code>DraftPost</code> and an <code>approve</code> method that turns a
|
||
<code>PendingReviewPost</code> into a published <code>Post</code></span></p>
|
||
<p>The <code>request_review</code> and <code>approve</code> methods take ownership of <code>self</code>, thus
|
||
consuming the <code>DraftPost</code> and <code>PendingReviewPost</code> instances and transforming
|
||
them into a <code>PendingReviewPost</code> and a published <code>Post</code>, respectively. This way,
|
||
we won’t have any lingering <code>DraftPost</code> instances after we’ve called
|
||
<code>request_review</code> on them, and so forth. The <code>PendingReviewPost</code> struct doesn’t
|
||
have a <code>content</code> method defined on it, so attempting to read its content
|
||
results in a compiler error, as with <code>DraftPost</code>. Because the only way to get a
|
||
published <code>Post</code> instance that does have a <code>content</code> method defined is to call
|
||
the <code>approve</code> method on a <code>PendingReviewPost</code>, and the only way to get a
|
||
<code>PendingReviewPost</code> is to call the <code>request_review</code> method on a <code>DraftPost</code>,
|
||
we’ve now encoded the blog post workflow into the type system.</p>
|
||
<p>But we also have to make some small changes to <code>main</code>. The <code>request_review</code> and
|
||
<code>approve</code> methods return new instances rather than modifying the struct they’re
|
||
called on, so we need to add more <code>let post =</code> shadowing assignments to save
|
||
the returned instances. We also can’t have the assertions about the draft and
|
||
pending review post’s contents be empty strings, nor do we need them: we can’t
|
||
compile code that tries to use the content of posts in those states any longer.
|
||
The updated code in <code>main</code> is shown in Listing 17-21:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use blog::Post;
|
||
|
||
fn main() {
|
||
let mut post = Post::new();
|
||
|
||
post.add_text("I ate a salad for lunch today");
|
||
|
||
let post = post.request_review();
|
||
|
||
let post = post.approve();
|
||
|
||
assert_eq!("I ate a salad for lunch today", post.content());
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 17-21: Modifications to <code>main</code> to use the new
|
||
implementation of the blog post workflow</span></p>
|
||
<p>The changes we needed to make to <code>main</code> to reassign <code>post</code> mean that this
|
||
implementation doesn’t quite follow the object-oriented state pattern anymore:
|
||
the transformations between the states are no longer encapsulated entirely
|
||
within the <code>Post</code> implementation. However, our gain is that invalid states are
|
||
now impossible because of the type system and the type checking that happens at
|
||
compile time! This ensures that certain bugs, such as display of the content of
|
||
an unpublished post, will be discovered before they make it to production.</p>
|
||
<p>Try the tasks suggested for additional requirements that we mentioned at the
|
||
start of this section on the <code>blog</code> crate as it is after Listing 17-20 to see
|
||
what you think about the design of this version of the code. Note that some of
|
||
the tasks might be completed already in this design.</p>
|
||
<p>We’ve seen that even though Rust is capable of implementing object-oriented
|
||
design patterns, other patterns, such as encoding state into the type system,
|
||
are also available in Rust. These patterns have different trade-offs. Although
|
||
you might be very familiar with object-oriented patterns, rethinking the
|
||
problem to take advantage of Rust’s features can provide benefits, such as
|
||
preventing some bugs at compile time. Object-oriented patterns won’t always be
|
||
the best solution in Rust due to certain features, like ownership, that
|
||
object-oriented languages don’t have.</p>
|
||
<h2><a class="header" href="#summary-16" id="summary-16">Summary</a></h2>
|
||
<p>No matter whether or not you think Rust is an object-oriented language after
|
||
reading this chapter, you now know that you can use trait objects to get some
|
||
object-oriented features in Rust. Dynamic dispatch can give your code some
|
||
flexibility in exchange for a bit of runtime performance. You can use this
|
||
flexibility to implement object-oriented patterns that can help your code’s
|
||
maintainability. Rust also has other features, like ownership, that
|
||
object-oriented languages don’t have. An object-oriented pattern won’t always
|
||
be the best way to take advantage of Rust’s strengths, but is an available
|
||
option.</p>
|
||
<p>Next, we’ll look at patterns, which are another of Rust’s features that enable
|
||
lots of flexibility. We’ve looked at them briefly throughout the book but
|
||
haven’t seen their full capability yet. Let’s go!</p>
|
||
<h1><a class="header" href="#patterns-and-matching" id="patterns-and-matching">Patterns and Matching</a></h1>
|
||
<p>Patterns are a special syntax in Rust for matching against the structure of
|
||
types, both complex and simple. Using patterns in conjunction with <code>match</code>
|
||
expressions and other constructs gives you more control over a program’s
|
||
control flow. A pattern consists of some combination of the following:</p>
|
||
<ul>
|
||
<li>Literals</li>
|
||
<li>Destructured arrays, enums, structs, or tuples</li>
|
||
<li>Variables</li>
|
||
<li>Wildcards</li>
|
||
<li>Placeholders</li>
|
||
</ul>
|
||
<p>These components describe the shape of the data we’re working with, which we
|
||
then match against values to determine whether our program has the correct data
|
||
to continue running a particular piece of code.</p>
|
||
<p>To use a pattern, we compare it to some value. If the pattern matches the
|
||
value, we use the value parts in our code. Recall the <code>match</code> expressions in
|
||
Chapter 6 that used patterns, such as the coin-sorting machine example. If the
|
||
value fits the shape of the pattern, we can use the named pieces. If it
|
||
doesn’t, the code associated with the pattern won’t run.</p>
|
||
<p>This chapter is a reference on all things related to patterns. We’ll cover the
|
||
valid places to use patterns, the difference between refutable and irrefutable
|
||
patterns, and the different kinds of pattern syntax that you might see. By the
|
||
end of the chapter, you’ll know how to use patterns to express many concepts in
|
||
a clear way.</p>
|
||
<h2><a class="header" href="#all-the-places-patterns-can-be-used" id="all-the-places-patterns-can-be-used">All the Places Patterns Can Be Used</a></h2>
|
||
<p>Patterns pop up in a number of places in Rust, and you’ve been using them a lot
|
||
without realizing it! This section discusses all the places where patterns are
|
||
valid.</p>
|
||
<h3><a class="header" href="#match-arms" id="match-arms"><code>match</code> Arms</a></h3>
|
||
<p>As discussed in Chapter 6, we use patterns in the arms of <code>match</code> expressions.
|
||
Formally, <code>match</code> expressions are defined as the keyword <code>match</code>, a value to
|
||
match on, and one or more match arms that consist of a pattern and an
|
||
expression to run if the value matches that arm’s pattern, like this:</p>
|
||
<pre><code class="language-text">match VALUE {
|
||
PATTERN => EXPRESSION,
|
||
PATTERN => EXPRESSION,
|
||
PATTERN => EXPRESSION,
|
||
}
|
||
</code></pre>
|
||
<p>One requirement for <code>match</code> expressions is that they need to be <em>exhaustive</em> in
|
||
the sense that all possibilities for the value in the <code>match</code> expression must
|
||
be accounted for. One way to ensure you’ve covered every possibility is to have
|
||
a catchall pattern for the last arm: for example, a variable name matching any
|
||
value can never fail and thus covers every remaining case.</p>
|
||
<p>A particular pattern <code>_</code> will match anything, but it never binds to a variable,
|
||
so it’s often used in the last match arm. The <code>_</code> pattern can be useful when
|
||
you want to ignore any value not specified, for example. We’ll cover the <code>_</code>
|
||
pattern in more detail in the <a href="ch18-03-pattern-syntax.html#ignoring-values-in-a-pattern">“Ignoring Values in a
|
||
Pattern”</a><!-- ignore --> section later in this
|
||
chapter.</p>
|
||
<h3><a class="header" href="#conditional-if-let-expressions" id="conditional-if-let-expressions">Conditional <code>if let</code> Expressions</a></h3>
|
||
<p>In Chapter 6 we discussed how to use <code>if let</code> expressions mainly as a shorter
|
||
way to write the equivalent of a <code>match</code> that only matches one case.
|
||
Optionally, <code>if let</code> can have a corresponding <code>else</code> containing code to run if
|
||
the pattern in the <code>if let</code> doesn’t match.</p>
|
||
<p>Listing 18-1 shows that it’s also possible to mix and match <code>if let</code>, <code>else if</code>, and <code>else if let</code> expressions. Doing so gives us more flexibility than a
|
||
<code>match</code> expression in which we can express only one value to compare with the
|
||
patterns. Also, the conditions in a series of <code>if let</code>, <code>else if</code>, <code>else if let</code> arms aren’t required to relate to each other.</p>
|
||
<p>The code in Listing 18-1 shows a series of checks for several conditions that
|
||
decide what the background color should be. For this example, we’ve created
|
||
variables with hardcoded values that a real program might receive from user
|
||
input.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let favorite_color: Option<&str> = None;
|
||
let is_tuesday = false;
|
||
let age: Result<u8, _> = "34".parse();
|
||
|
||
if let Some(color) = favorite_color {
|
||
println!("Using your favorite color, {}, as the background", color);
|
||
} else if is_tuesday {
|
||
println!("Tuesday is green day!");
|
||
} else if let Ok(age) = age {
|
||
if age > 30 {
|
||
println!("Using purple as the background color");
|
||
} else {
|
||
println!("Using orange as the background color");
|
||
}
|
||
} else {
|
||
println!("Using blue as the background color");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-1: Mixing <code>if let</code>, <code>else if</code>, <code>else if let</code>,
|
||
and <code>else</code></span></p>
|
||
<p>If the user specifies a favorite color, that color is the background color. If
|
||
today is Tuesday, the background color is green. If the user specifies
|
||
their age as a string and we can parse it as a number successfully, the color
|
||
is either purple or orange depending on the value of the number. If none of
|
||
these conditions apply, the background color is blue.</p>
|
||
<p>This conditional structure lets us support complex requirements. With the
|
||
hardcoded values we have here, this example will print <code>Using purple as the background color</code>.</p>
|
||
<p>You can see that <code>if let</code> can also introduce shadowed variables in the same way
|
||
that <code>match</code> arms can: the line <code>if let Ok(age) = age</code> introduces a new
|
||
shadowed <code>age</code> variable that contains the value inside the <code>Ok</code> variant. This
|
||
means we need to place the <code>if age > 30</code> condition within that block: we can’t
|
||
combine these two conditions into <code>if let Ok(age) = age && age > 30</code>. The
|
||
shadowed <code>age</code> we want to compare to 30 isn’t valid until the new scope starts
|
||
with the curly bracket.</p>
|
||
<p>The downside of using <code>if let</code> expressions is that the compiler doesn’t check
|
||
exhaustiveness, whereas with <code>match</code> expressions it does. If we omitted the
|
||
last <code>else</code> block and therefore missed handling some cases, the compiler would
|
||
not alert us to the possible logic bug.</p>
|
||
<h3><a class="header" href="#while-let-conditional-loops" id="while-let-conditional-loops"><code>while let</code> Conditional Loops</a></h3>
|
||
<p>Similar in construction to <code>if let</code>, the <code>while let</code> conditional loop allows a
|
||
<code>while</code> loop to run for as long as a pattern continues to match. The example in
|
||
Listing 18-2 shows a <code>while let</code> loop that uses a vector as a stack and prints
|
||
the values in the vector in the opposite order in which they were pushed.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut stack = Vec::new();
|
||
|
||
stack.push(1);
|
||
stack.push(2);
|
||
stack.push(3);
|
||
|
||
while let Some(top) = stack.pop() {
|
||
println!("{}", top);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-2: Using a <code>while let</code> loop to print values
|
||
for as long as <code>stack.pop()</code> returns <code>Some</code></span></p>
|
||
<p>This example prints 3, 2, and then 1. The <code>pop</code> method takes the last element
|
||
out of the vector and returns <code>Some(value)</code>. If the vector is empty, <code>pop</code>
|
||
returns <code>None</code>. The <code>while</code> loop continues running the code in its block as
|
||
long as <code>pop</code> returns <code>Some</code>. When <code>pop</code> returns <code>None</code>, the loop stops. We can
|
||
use <code>while let</code> to pop every element off our stack.</p>
|
||
<h3><a class="header" href="#for-loops" id="for-loops"><code>for</code> Loops</a></h3>
|
||
<p>In Chapter 3, we mentioned that the <code>for</code> loop is the most common loop
|
||
construction in Rust code, but we haven’t yet discussed the pattern that <code>for</code>
|
||
takes. In a <code>for</code> loop, the pattern is the value that directly follows the
|
||
keyword <code>for</code>, so in <code>for x in y</code> the <code>x</code> is the pattern.</p>
|
||
<p>Listing 18-3 demonstrates how to use a pattern in a <code>for</code> loop to destructure,
|
||
or break apart, a tuple as part of the <code>for</code> loop.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v = vec!['a', 'b', 'c'];
|
||
|
||
for (index, value) in v.iter().enumerate() {
|
||
println!("{} is at index {}", value, index);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-3: Using a pattern in a <code>for</code> loop to
|
||
destructure a tuple</span></p>
|
||
<p>The code in Listing 18-3 will print the following:</p>
|
||
<pre><code class="language-text">a is at index 0
|
||
b is at index 1
|
||
c is at index 2
|
||
</code></pre>
|
||
<p>We use the <code>enumerate</code> method to adapt an iterator to produce a value and that
|
||
value’s index in the iterator, placed into a tuple. The first call to
|
||
<code>enumerate</code> produces the tuple <code>(0, 'a')</code>. When this value is matched to the
|
||
pattern <code>(index, value)</code>, <code>index</code> will be <code>0</code> and <code>value</code> will be <code>'a'</code>,
|
||
printing the first line of the output.</p>
|
||
<h3><a class="header" href="#let-statements" id="let-statements"><code>let</code> Statements</a></h3>
|
||
<p>Prior to this chapter, we had only explicitly discussed using patterns with
|
||
<code>match</code> and <code>if let</code>, but in fact, we’ve used patterns in other places as well,
|
||
including in <code>let</code> statements. For example, consider this straightforward
|
||
variable assignment with <code>let</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Throughout this book, we’ve used <code>let</code> like this hundreds of times, and
|
||
although you might not have realized it, you were using patterns! More
|
||
formally, a <code>let</code> statement looks like this:</p>
|
||
<pre><code class="language-text">let PATTERN = EXPRESSION;
|
||
</code></pre>
|
||
<p>In statements like <code>let x = 5;</code> with a variable name in the <code>PATTERN</code> slot, the
|
||
variable name is just a particularly simple form of a pattern. Rust compares
|
||
the expression against the pattern and assigns any names it finds. So in the
|
||
<code>let x = 5;</code> example, <code>x</code> is a pattern that means “bind what matches here to
|
||
the variable <code>x</code>.” Because the name <code>x</code> is the whole pattern, this pattern
|
||
effectively means “bind everything to the variable <code>x</code>, whatever the value is.”</p>
|
||
<p>To see the pattern matching aspect of <code>let</code> more clearly, consider Listing
|
||
18-4, which uses a pattern with <code>let</code> to destructure a tuple.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let (x, y, z) = (1, 2, 3);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-4: Using a pattern to destructure a tuple and
|
||
create three variables at once</span></p>
|
||
<p>Here, we match a tuple against a pattern. Rust compares the value <code>(1, 2, 3)</code>
|
||
to the pattern <code>(x, y, z)</code> and sees that the value matches the pattern, so Rust
|
||
binds <code>1</code> to <code>x</code>, <code>2</code> to <code>y</code>, and <code>3</code> to <code>z</code>. You can think of this tuple
|
||
pattern as nesting three individual variable patterns inside it.</p>
|
||
<p>If the number of elements in the pattern doesn’t match the number of elements
|
||
in the tuple, the overall type won’t match and we’ll get a compiler error. For
|
||
example, Listing 18-5 shows an attempt to destructure a tuple with three
|
||
elements into two variables, which won’t work.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let (x, y) = (1, 2, 3);
|
||
</code></pre>
|
||
<p><span class="caption">Listing 18-5: Incorrectly constructing a pattern whose
|
||
variables don’t match the number of elements in the tuple</span></p>
|
||
<p>Attempting to compile this code results in this type error:</p>
|
||
<pre><code class="language-text">error[E0308]: mismatched types
|
||
--> src/main.rs:2:9
|
||
|
|
||
2 | let (x, y) = (1, 2, 3);
|
||
| ^^^^^^ expected a tuple with 3 elements, found one with 2 elements
|
||
|
|
||
= note: expected type `({integer}, {integer}, {integer})`
|
||
found type `(_, _)`
|
||
</code></pre>
|
||
<p>If we wanted to ignore one or more of the values in the tuple, we could use <code>_</code>
|
||
or <code>..</code>, as you’ll see in the <a href="ch18-03-pattern-syntax.html#ignoring-values-in-a-pattern">“Ignoring Values in a
|
||
Pattern”</a><!-- ignore --> section. If the problem
|
||
is that we have too many variables in the pattern, the solution is to make the
|
||
types match by removing variables so the number of variables equals the number
|
||
of elements in the tuple.</p>
|
||
<h3><a class="header" href="#function-parameters-1" id="function-parameters-1">Function Parameters</a></h3>
|
||
<p>Function parameters can also be patterns. The code in Listing 18-6, which
|
||
declares a function named <code>foo</code> that takes one parameter named <code>x</code> of type
|
||
<code>i32</code>, should by now look familiar.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn foo(x: i32) {
|
||
// code goes here
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-6: A function signature uses patterns in the
|
||
parameters</span></p>
|
||
<p>The <code>x</code> part is a pattern! As we did with <code>let</code>, we could match a tuple in a
|
||
function’s arguments to the pattern. Listing 18-7 splits the values in a tuple
|
||
as we pass it to a function.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn print_coordinates(&(x, y): &(i32, i32)) {
|
||
println!("Current location: ({}, {})", x, y);
|
||
}
|
||
|
||
fn main() {
|
||
let point = (3, 5);
|
||
print_coordinates(&point);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-7: A function with parameters that destructure
|
||
a tuple</span></p>
|
||
<p>This code prints <code>Current location: (3, 5)</code>. The values <code>&(3, 5)</code> match the
|
||
pattern <code>&(x, y)</code>, so <code>x</code> is the value <code>3</code> and <code>y</code> is the value <code>5</code>.</p>
|
||
<p>We can also use patterns in closure parameter lists in the same way as in
|
||
function parameter lists, because closures are similar to functions, as
|
||
discussed in Chapter 13.</p>
|
||
<p>At this point, you’ve seen several ways of using patterns, but patterns don’t
|
||
work the same in every place we can use them. In some places, the patterns must
|
||
be irrefutable; in other circumstances, they can be refutable. We’ll discuss
|
||
these two concepts next.</p>
|
||
<h2><a class="header" href="#refutability-whether-a-pattern-might-fail-to-match" id="refutability-whether-a-pattern-might-fail-to-match">Refutability: Whether a Pattern Might Fail to Match</a></h2>
|
||
<p>Patterns come in two forms: refutable and irrefutable. Patterns that will match
|
||
for any possible value passed are <em>irrefutable</em>. An example would be <code>x</code> in the
|
||
statement <code>let x = 5;</code> because <code>x</code> matches anything and therefore cannot fail
|
||
to match. Patterns that can fail to match for some possible value are
|
||
<em>refutable</em>. An example would be <code>Some(x)</code> in the expression <code>if let Some(x) = a_value</code> because if the value in the <code>a_value</code> variable is <code>None</code> rather than
|
||
<code>Some</code>, the <code>Some(x)</code> pattern will not match.</p>
|
||
<p>Function parameters, <code>let</code> statements, and <code>for</code> loops can only accept
|
||
irrefutable patterns, because the program cannot do anything meaningful when
|
||
values don’t match. The <code>if let</code> and <code>while let</code> expressions accept
|
||
refutable and irrefutable patterns, but the compiler warns against
|
||
irrefutable patterns because by definition they’re intended to handle possible
|
||
failure: the functionality of a conditional is in its ability to perform
|
||
differently depending on success or failure.</p>
|
||
<p>In general, you shouldn’t have to worry about the distinction between refutable
|
||
and irrefutable patterns; however, you do need to be familiar with the concept
|
||
of refutability so you can respond when you see it in an error message. In
|
||
those cases, you’ll need to change either the pattern or the construct you’re
|
||
using the pattern with, depending on the intended behavior of the code.</p>
|
||
<p>Let’s look at an example of what happens when we try to use a refutable pattern
|
||
where Rust requires an irrefutable pattern and vice versa. Listing 18-8 shows a
|
||
<code>let</code> statement, but for the pattern we’ve specified <code>Some(x)</code>, a refutable
|
||
pattern. As you might expect, this code will not compile.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let Some(x) = some_option_value;
|
||
</code></pre>
|
||
<p><span class="caption">Listing 18-8: Attempting to use a refutable pattern with
|
||
<code>let</code></span></p>
|
||
<p>If <code>some_option_value</code> was a <code>None</code> value, it would fail to match the pattern
|
||
<code>Some(x)</code>, meaning the pattern is refutable. However, the <code>let</code> statement can
|
||
only accept an irrefutable pattern because there is nothing valid the code can
|
||
do with a <code>None</code> value. At compile time, Rust will complain that we’ve tried to
|
||
use a refutable pattern where an irrefutable pattern is required:</p>
|
||
<pre><code class="language-text">error[E0005]: refutable pattern in local binding: `None` not covered
|
||
-->
|
||
|
|
||
3 | let Some(x) = some_option_value;
|
||
| ^^^^^^^ pattern `None` not covered
|
||
</code></pre>
|
||
<p>Because we didn’t cover (and couldn’t cover!) every valid value with the
|
||
pattern <code>Some(x)</code>, Rust rightfully produces a compiler error.</p>
|
||
<p>To fix the problem where we have a refutable pattern where an irrefutable
|
||
pattern is needed, we can change the code that uses the pattern: instead of
|
||
using <code>let</code>, we can use <code>if let</code>. Then if the pattern doesn’t match, the code
|
||
will just skip the code in the curly brackets, giving it a way to continue
|
||
validly. Listing 18-9 shows how to fix the code in Listing 18-8.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">let some_option_value: Option<i32> = None;
|
||
</span>if let Some(x) = some_option_value {
|
||
println!("{}", x);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-9: Using <code>if let</code> and a block with refutable
|
||
patterns instead of <code>let</code></span></p>
|
||
<p>We’ve given the code an out! This code is perfectly valid, although it means we
|
||
cannot use an irrefutable pattern without receiving an error. If we give <code>if let</code> a pattern that will always match, such as <code>x</code>, as shown in Listing 18-10,
|
||
the compiler will give a warning.</p>
|
||
<pre><code class="language-rust ignore">if let x = 5 {
|
||
println!("{}", x);
|
||
};
|
||
</code></pre>
|
||
<p><span class="caption">Listing 18-10: Attempting to use an irrefutable pattern
|
||
with <code>if let</code></span></p>
|
||
<p>Rust complains that it doesn’t make sense to use <code>if let</code> with an irrefutable
|
||
pattern:</p>
|
||
<pre><code class="language-text">warning: irrefutable if-let pattern
|
||
--> <anon>:2:5
|
||
|
|
||
2 | / if let x = 5 {
|
||
3 | | println!("{}", x);
|
||
4 | | };
|
||
| |_^
|
||
|
|
||
= note: #[warn(irrefutable_let_patterns)] on by default
|
||
</code></pre>
|
||
<p>For this reason, match arms must use refutable patterns, except for the last
|
||
arm, which should match any remaining values with an irrefutable pattern. Rust
|
||
allows us to use an irrefutable pattern in a <code>match</code> with only one arm, but
|
||
this syntax isn’t particularly useful and could be replaced with a simpler
|
||
<code>let</code> statement.</p>
|
||
<p>Now that you know where to use patterns and the difference between refutable
|
||
and irrefutable patterns, let’s cover all the syntax we can use to create
|
||
patterns.</p>
|
||
<h2><a class="header" href="#pattern-syntax" id="pattern-syntax">Pattern Syntax</a></h2>
|
||
<p>Throughout the book, you’ve seen examples of many kinds of patterns. In this
|
||
section, we gather all the syntax valid in patterns and discuss why you might
|
||
want to use each one.</p>
|
||
<h3><a class="header" href="#matching-literals" id="matching-literals">Matching Literals</a></h3>
|
||
<p>As you saw in Chapter 6, you can match patterns against literals directly. The
|
||
following code gives some examples:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 1;
|
||
|
||
match x {
|
||
1 => println!("one"),
|
||
2 => println!("two"),
|
||
3 => println!("three"),
|
||
_ => println!("anything"),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code prints <code>one</code> because the value in <code>x</code> is 1. This syntax is useful
|
||
when you want your code to take an action if it gets a particular concrete
|
||
value.</p>
|
||
<h3><a class="header" href="#matching-named-variables" id="matching-named-variables">Matching Named Variables</a></h3>
|
||
<p>Named variables are irrefutable patterns that match any value, and we’ve used
|
||
them many times in the book. However, there is a complication when you use
|
||
named variables in <code>match</code> expressions. Because <code>match</code> starts a new scope,
|
||
variables declared as part of a pattern inside the <code>match</code> expression will
|
||
shadow those with the same name outside the <code>match</code> construct, as is the case
|
||
with all variables. In Listing 18-11, we declare a variable named <code>x</code> with the
|
||
value <code>Some(5)</code> and a variable <code>y</code> with the value <code>10</code>. We then create a
|
||
<code>match</code> expression on the value <code>x</code>. Look at the patterns in the match arms and
|
||
<code>println!</code> at the end, and try to figure out what the code will print before
|
||
running this code or reading further.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = Some(5);
|
||
let y = 10;
|
||
|
||
match x {
|
||
Some(50) => println!("Got 50"),
|
||
Some(y) => println!("Matched, y = {:?}", y),
|
||
_ => println!("Default case, x = {:?}", x),
|
||
}
|
||
|
||
println!("at the end: x = {:?}, y = {:?}", x, y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-11: A <code>match</code> expression with an arm that
|
||
introduces a shadowed variable <code>y</code></span></p>
|
||
<p>Let’s walk through what happens when the <code>match</code> expression runs. The pattern
|
||
in the first match arm doesn’t match the defined value of <code>x</code>, so the code
|
||
continues.</p>
|
||
<p>The pattern in the second match arm introduces a new variable named <code>y</code> that
|
||
will match any value inside a <code>Some</code> value. Because we’re in a new scope inside
|
||
the <code>match</code> expression, this is a new <code>y</code> variable, not the <code>y</code> we declared at
|
||
the beginning with the value 10. This new <code>y</code> binding will match any value
|
||
inside a <code>Some</code>, which is what we have in <code>x</code>. Therefore, this new <code>y</code> binds to
|
||
the inner value of the <code>Some</code> in <code>x</code>. That value is <code>5</code>, so the expression for
|
||
that arm executes and prints <code>Matched, y = 5</code>.</p>
|
||
<p>If <code>x</code> had been a <code>None</code> value instead of <code>Some(5)</code>, the patterns in the first
|
||
two arms wouldn’t have matched, so the value would have matched to the
|
||
underscore. We didn’t introduce the <code>x</code> variable in the pattern of the
|
||
underscore arm, so the <code>x</code> in the expression is still the outer <code>x</code> that hasn’t
|
||
been shadowed. In this hypothetical case, the <code>match</code> would print <code>Default case, x = None</code>.</p>
|
||
<p>When the <code>match</code> expression is done, its scope ends, and so does the scope of
|
||
the inner <code>y</code>. The last <code>println!</code> produces <code>at the end: x = Some(5), y = 10</code>.</p>
|
||
<p>To create a <code>match</code> expression that compares the values of the outer <code>x</code> and
|
||
<code>y</code>, rather than introducing a shadowed variable, we would need to use a match
|
||
guard conditional instead. We’ll talk about match guards later in the <a href="ch18-03-pattern-syntax.html#extra-conditionals-with-match-guards">“Extra
|
||
Conditionals with Match Guards”</a><!--
|
||
ignore --> section.</p>
|
||
<h3><a class="header" href="#multiple-patterns" id="multiple-patterns">Multiple Patterns</a></h3>
|
||
<p>In <code>match</code> expressions, you can match multiple patterns using the <code>|</code> syntax,
|
||
which means <em>or</em>. For example, the following code matches the value of <code>x</code>
|
||
against the match arms, the first of which has an <em>or</em> option, meaning if the
|
||
value of <code>x</code> matches either of the values in that arm, that arm’s code will
|
||
run:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 1;
|
||
|
||
match x {
|
||
1 | 2 => println!("one or two"),
|
||
3 => println!("three"),
|
||
_ => println!("anything"),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code prints <code>one or two</code>.</p>
|
||
<h3><a class="header" href="#matching-ranges-of-values-with-" id="matching-ranges-of-values-with-">Matching Ranges of Values with <code>..=</code></a></h3>
|
||
<p>The <code>..=</code> syntax allows us to match to an inclusive range of values. In the
|
||
following code, when a pattern matches any of the values within the range, that
|
||
arm will execute:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 5;
|
||
|
||
match x {
|
||
1..=5 => println!("one through five"),
|
||
_ => println!("something else"),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>If <code>x</code> is 1, 2, 3, 4, or 5, the first arm will match. This syntax is more
|
||
convenient than using the <code>|</code> operator to express the same idea; instead of
|
||
<code>1..=5</code>, we would have to specify <code>1 | 2 | 3 | 4 | 5</code> if we used <code>|</code>.
|
||
Specifying a range is much shorter, especially if we want to match, say, any
|
||
number between 1 and 1,000!</p>
|
||
<p>Ranges are only allowed with numeric values or <code>char</code> values, because the
|
||
compiler checks that the range isn’t empty at compile time. The only types for
|
||
which Rust can tell if a range is empty or not are <code>char</code> and numeric values.</p>
|
||
<p>Here is an example using ranges of <code>char</code> values:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 'c';
|
||
|
||
match x {
|
||
'a'..='j' => println!("early ASCII letter"),
|
||
'k'..='z' => println!("late ASCII letter"),
|
||
_ => println!("something else"),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Rust can tell that <code>c</code> is within the first pattern’s range and prints <code>early ASCII letter</code>.</p>
|
||
<h3><a class="header" href="#destructuring-to-break-apart-values" id="destructuring-to-break-apart-values">Destructuring to Break Apart Values</a></h3>
|
||
<p>We can also use patterns to destructure structs, enums, tuples, and references
|
||
to use different parts of these values. Let’s walk through each value.</p>
|
||
<h4><a class="header" href="#destructuring-structs" id="destructuring-structs">Destructuring Structs</a></h4>
|
||
<p>Listing 18-12 shows a <code>Point</code> struct with two fields, <code>x</code> and <code>y</code>, that we can
|
||
break apart using a pattern with a <code>let</code> statement.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point {
|
||
x: i32,
|
||
y: i32,
|
||
}
|
||
|
||
fn main() {
|
||
let p = Point { x: 0, y: 7 };
|
||
|
||
let Point { x: a, y: b } = p;
|
||
assert_eq!(0, a);
|
||
assert_eq!(7, b);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-12: Destructuring a struct’s fields into
|
||
separate variables</span></p>
|
||
<p>This code creates the variables <code>a</code> and <code>b</code> that match the values of the <code>x</code>
|
||
and <code>y</code> fields of the <code>p</code> struct. This example shows that the names of the
|
||
variables in the pattern don’t have to match the field names of the struct. But
|
||
it’s common to want the variable names to match the field names to make it
|
||
easier to remember which variables came from which fields.</p>
|
||
<p>Because having variable names match the fields is common and because writing
|
||
<code>let Point { x: x, y: y } = p;</code> contains a lot of duplication, there is a
|
||
shorthand for patterns that match struct fields: you only need to list the name
|
||
of the struct field, and the variables created from the pattern will have the
|
||
same names. Listing 18-13 shows code that behaves in the same way as the code
|
||
in Listing 18-12, but the variables created in the <code>let</code> pattern are <code>x</code> and
|
||
<code>y</code> instead of <code>a</code> and <code>b</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">struct Point {
|
||
x: i32,
|
||
y: i32,
|
||
}
|
||
|
||
fn main() {
|
||
let p = Point { x: 0, y: 7 };
|
||
|
||
let Point { x, y } = p;
|
||
assert_eq!(0, x);
|
||
assert_eq!(7, y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-13: Destructuring struct fields using struct
|
||
field shorthand</span></p>
|
||
<p>This code creates the variables <code>x</code> and <code>y</code> that match the <code>x</code> and <code>y</code> fields
|
||
of the <code>p</code> variable. The outcome is that the variables <code>x</code> and <code>y</code> contain the
|
||
values from the <code>p</code> struct.</p>
|
||
<p>We can also destructure with literal values as part of the struct pattern
|
||
rather than creating variables for all the fields. Doing so allows us to test
|
||
some of the fields for particular values while creating variables to
|
||
destructure the other fields.</p>
|
||
<p>Listing 18-14 shows a <code>match</code> expression that separates <code>Point</code> values into
|
||
three cases: points that lie directly on the <code>x</code> axis (which is true when <code>y = 0</code>), on the <code>y</code> axis (<code>x = 0</code>), or neither.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">struct Point {
|
||
</span><span class="boring"> x: i32,
|
||
</span><span class="boring"> y: i32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let p = Point { x: 0, y: 7 };
|
||
|
||
match p {
|
||
Point { x, y: 0 } => println!("On the x axis at {}", x),
|
||
Point { x: 0, y } => println!("On the y axis at {}", y),
|
||
Point { x, y } => println!("On neither axis: ({}, {})", x, y),
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-14: Destructuring and matching literal values
|
||
in one pattern</span></p>
|
||
<p>The first arm will match any point that lies on the <code>x</code> axis by specifying that
|
||
the <code>y</code> field matches if its value matches the literal <code>0</code>. The pattern still
|
||
creates an <code>x</code> variable that we can use in the code for this arm.</p>
|
||
<p>Similarly, the second arm matches any point on the <code>y</code> axis by specifying that
|
||
the <code>x</code> field matches if its value is <code>0</code> and creates a variable <code>y</code> for the
|
||
value of the <code>y</code> field. The third arm doesn’t specify any literals, so it
|
||
matches any other <code>Point</code> and creates variables for both the <code>x</code> and <code>y</code> fields.</p>
|
||
<p>In this example, the value <code>p</code> matches the second arm by virtue of <code>x</code>
|
||
containing a 0, so this code will print <code>On the y axis at 7</code>.</p>
|
||
<h4><a class="header" href="#destructuring-enums" id="destructuring-enums">Destructuring Enums</a></h4>
|
||
<p>We’ve destructured enums earlier in this book, for example, when we
|
||
destructured <code>Option<i32></code> in Listing 6-5 in Chapter 6. One detail we haven’t
|
||
mentioned explicitly is that the pattern to destructure an enum should
|
||
correspond to the way the data stored within the enum is defined. As an
|
||
example, in Listing 18-15 we use the <code>Message</code> enum from Listing 6-2 and write
|
||
a <code>match</code> with patterns that will destructure each inner value.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">enum Message {
|
||
Quit,
|
||
Move { x: i32, y: i32 },
|
||
Write(String),
|
||
ChangeColor(i32, i32, i32),
|
||
}
|
||
|
||
fn main() {
|
||
let msg = Message::ChangeColor(0, 160, 255);
|
||
|
||
match msg {
|
||
Message::Quit => {
|
||
println!("The Quit variant has no data to destructure.")
|
||
},
|
||
Message::Move { x, y } => {
|
||
println!(
|
||
"Move in the x direction {} and in the y direction {}",
|
||
x,
|
||
y
|
||
);
|
||
}
|
||
Message::Write(text) => println!("Text message: {}", text),
|
||
Message::ChangeColor(r, g, b) => {
|
||
println!(
|
||
"Change the color to red {}, green {}, and blue {}",
|
||
r,
|
||
g,
|
||
b
|
||
)
|
||
}
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-15: Destructuring enum variants that hold
|
||
different kinds of values</span></p>
|
||
<p>This code will print <code>Change the color to red 0, green 160, and blue 255</code>. Try
|
||
changing the value of <code>msg</code> to see the code from the other arms run.</p>
|
||
<p>For enum variants without any data, like <code>Message::Quit</code>, we can’t destructure
|
||
the value any further. We can only match on the literal <code>Message::Quit</code> value,
|
||
and no variables are in that pattern.</p>
|
||
<p>For struct-like enum variants, such as <code>Message::Move</code>, we can use a pattern
|
||
similar to the pattern we specify to match structs. After the variant name, we
|
||
place curly brackets and then list the fields with variables so we break apart
|
||
the pieces to use in the code for this arm. Here we use the shorthand form as
|
||
we did in Listing 18-13.</p>
|
||
<p>For tuple-like enum variants, like <code>Message::Write</code> that holds a tuple with one
|
||
element and <code>Message::ChangeColor</code> that holds a tuple with three elements, the
|
||
pattern is similar to the pattern we specify to match tuples. The number of
|
||
variables in the pattern must match the number of elements in the variant we’re
|
||
matching.</p>
|
||
<h4><a class="header" href="#destructuring-nested-structs-and-enums" id="destructuring-nested-structs-and-enums">Destructuring Nested Structs and Enums</a></h4>
|
||
<p>Until now, all our examples have been matching structs or enums that were one
|
||
level deep. Matching can work on nested items too!</p>
|
||
<p>For example, we can refactor the code in Listing 18-15 to support RGB and HSV
|
||
colors in the <code>ChangeColor</code> message, as shown in Listing 18-16.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">enum Color {
|
||
Rgb(i32, i32, i32),
|
||
Hsv(i32, i32, i32),
|
||
}
|
||
|
||
enum Message {
|
||
Quit,
|
||
Move { x: i32, y: i32 },
|
||
Write(String),
|
||
ChangeColor(Color),
|
||
}
|
||
|
||
fn main() {
|
||
let msg = Message::ChangeColor(Color::Hsv(0, 160, 255));
|
||
|
||
match msg {
|
||
Message::ChangeColor(Color::Rgb(r, g, b)) => {
|
||
println!(
|
||
"Change the color to red {}, green {}, and blue {}",
|
||
r,
|
||
g,
|
||
b
|
||
)
|
||
},
|
||
Message::ChangeColor(Color::Hsv(h, s, v)) => {
|
||
println!(
|
||
"Change the color to hue {}, saturation {}, and value {}",
|
||
h,
|
||
s,
|
||
v
|
||
)
|
||
}
|
||
_ => ()
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-16: Matching on nested enums</span></p>
|
||
<p>The pattern of the first arm in the <code>match</code> expression matches a
|
||
<code>Message::ChangeColor</code> enum variant that contains a <code>Color::Rgb</code> variant; then
|
||
the pattern binds to the three inner <code>i32</code> values. The pattern of the second
|
||
arm also matches a <code>Message::ChangeColor</code> enum variant, but the inner enum
|
||
matches the <code>Color::Hsv</code> variant instead. We can specify these complex
|
||
conditions in one <code>match</code> expression, even though two enums are involved.</p>
|
||
<h4><a class="header" href="#destructuring-structs-and-tuples" id="destructuring-structs-and-tuples">Destructuring Structs and Tuples</a></h4>
|
||
<p>We can mix, match, and nest destructuring patterns in even more complex ways.
|
||
The following example shows a complicated destructure where we nest structs and
|
||
tuples inside a tuple and destructure all the primitive values out:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Point {
|
||
</span><span class="boring"> x: i32,
|
||
</span><span class="boring"> y: i32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>let ((feet, inches), Point {x, y}) = ((3, 10), Point { x: 3, y: -10 });
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code lets us break complex types into their component parts so we can use
|
||
the values we’re interested in separately.</p>
|
||
<p>Destructuring with patterns is a convenient way to use pieces of values, such
|
||
as the value from each field in a struct, separately from each other.</p>
|
||
<h3><a class="header" href="#ignoring-values-in-a-pattern" id="ignoring-values-in-a-pattern">Ignoring Values in a Pattern</a></h3>
|
||
<p>You’ve seen that it’s sometimes useful to ignore values in a pattern, such as
|
||
in the last arm of a <code>match</code>, to get a catchall that doesn’t actually do
|
||
anything but does account for all remaining possible values. There are a few
|
||
ways to ignore entire values or parts of values in a pattern: using the <code>_</code>
|
||
pattern (which you’ve seen), using the <code>_</code> pattern within another pattern,
|
||
using a name that starts with an underscore, or using <code>..</code> to ignore remaining
|
||
parts of a value. Let’s explore how and why to use each of these patterns.</p>
|
||
<h4><a class="header" href="#ignoring-an-entire-value-with-_" id="ignoring-an-entire-value-with-_">Ignoring an Entire Value with <code>_</code></a></h4>
|
||
<p>We’ve used the underscore (<code>_</code>) as a wildcard pattern that will match any value
|
||
but not bind to the value. Although the underscore <code>_</code> pattern is especially
|
||
useful as the last arm in a <code>match</code> expression, we can use it in any pattern,
|
||
including function parameters, as shown in Listing 18-17.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn foo(_: i32, y: i32) {
|
||
println!("This code only uses the y parameter: {}", y);
|
||
}
|
||
|
||
fn main() {
|
||
foo(3, 4);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-17: Using <code>_</code> in a function signature</span></p>
|
||
<p>This code will completely ignore the value passed as the first argument, <code>3</code>,
|
||
and will print <code>This code only uses the y parameter: 4</code>.</p>
|
||
<p>In most cases when you no longer need a particular function parameter, you
|
||
would change the signature so it doesn’t include the unused parameter. Ignoring
|
||
a function parameter can be especially useful in some cases, for example, when
|
||
implementing a trait when you need a certain type signature but the function
|
||
body in your implementation doesn’t need one of the parameters. The compiler
|
||
will then not warn about unused function parameters, as it would if you used a
|
||
name instead.</p>
|
||
<h4><a class="header" href="#ignoring-parts-of-a-value-with-a-nested-_" id="ignoring-parts-of-a-value-with-a-nested-_">Ignoring Parts of a Value with a Nested <code>_</code></a></h4>
|
||
<p>We can also use <code>_</code> inside another pattern to ignore just part of a value, for
|
||
example, when we want to test for only part of a value but have no use for the
|
||
other parts in the corresponding code we want to run. Listing 18-18 shows code
|
||
responsible for managing a setting’s value. The business requirements are that
|
||
the user should not be allowed to overwrite an existing customization of a
|
||
setting but can unset the setting and give it a value if it is currently unset.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut setting_value = Some(5);
|
||
let new_setting_value = Some(10);
|
||
|
||
match (setting_value, new_setting_value) {
|
||
(Some(_), Some(_)) => {
|
||
println!("Can't overwrite an existing customized value");
|
||
}
|
||
_ => {
|
||
setting_value = new_setting_value;
|
||
}
|
||
}
|
||
|
||
println!("setting is {:?}", setting_value);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-18: Using an underscore within patterns that
|
||
match <code>Some</code> variants when we don’t need to use the value inside the
|
||
<code>Some</code></span></p>
|
||
<p>This code will print <code>Can't overwrite an existing customized value</code> and then
|
||
<code>setting is Some(5)</code>. In the first match arm, we don’t need to match on or use
|
||
the values inside either <code>Some</code> variant, but we do need to test for the case
|
||
when <code>setting_value</code> and <code>new_setting_value</code> are the <code>Some</code> variant. In that
|
||
case, we print why we’re not changing <code>setting_value</code>, and it doesn’t get
|
||
changed.</p>
|
||
<p>In all other cases (if either <code>setting_value</code> or <code>new_setting_value</code> are
|
||
<code>None</code>) expressed by the <code>_</code> pattern in the second arm, we want to allow
|
||
<code>new_setting_value</code> to become <code>setting_value</code>.</p>
|
||
<p>We can also use underscores in multiple places within one pattern to ignore
|
||
particular values. Listing 18-19 shows an example of ignoring the second and
|
||
fourth values in a tuple of five items.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let numbers = (2, 4, 8, 16, 32);
|
||
|
||
match numbers {
|
||
(first, _, third, _, fifth) => {
|
||
println!("Some numbers: {}, {}, {}", first, third, fifth)
|
||
},
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-19: Ignoring multiple parts of a tuple</span></p>
|
||
<p>This code will print <code>Some numbers: 2, 8, 32</code>, and the values 4 and 16 will be
|
||
ignored.</p>
|
||
<h4><a class="header" href="#ignoring-an-unused-variable-by-starting-its-name-with-_" id="ignoring-an-unused-variable-by-starting-its-name-with-_">Ignoring an Unused Variable by Starting Its Name with <code>_</code></a></h4>
|
||
<p>If you create a variable but don’t use it anywhere, Rust will usually issue a
|
||
warning because that could be a bug. But sometimes it’s useful to create a
|
||
variable you won’t use yet, such as when you’re prototyping or just starting a
|
||
project. In this situation, you can tell Rust not to warn you about the unused
|
||
variable by starting the name of the variable with an underscore. In Listing
|
||
18-20, we create two unused variables, but when we run this code, we should
|
||
only get a warning about one of them.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let _x = 5;
|
||
let y = 10;
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-20: Starting a variable name with an
|
||
underscore to avoid getting unused variable warnings</span></p>
|
||
<p>Here we get a warning about not using the variable <code>y</code>, but we don’t get a
|
||
warning about not using the variable preceded by the underscore.</p>
|
||
<p>Note that there is a subtle difference between using only <code>_</code> and using a name
|
||
that starts with an underscore. The syntax <code>_x</code> still binds the value to the
|
||
variable, whereas <code>_</code> doesn’t bind at all. To show a case where this
|
||
distinction matters, Listing 18-21 will provide us with an error.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let s = Some(String::from("Hello!"));
|
||
|
||
if let Some(_s) = s {
|
||
println!("found a string");
|
||
}
|
||
|
||
println!("{:?}", s);
|
||
</code></pre>
|
||
<p><span class="caption">Listing 18-21: An unused variable starting with an
|
||
underscore still binds the value, which might take ownership of the value</span></p>
|
||
<p>We’ll receive an error because the <code>s</code> value will still be moved into <code>_s</code>,
|
||
which prevents us from using <code>s</code> again. However, using the underscore by itself
|
||
doesn’t ever bind to the value. Listing 18-22 will compile without any errors
|
||
because <code>s</code> doesn’t get moved into <code>_</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let s = Some(String::from("Hello!"));
|
||
|
||
if let Some(_) = s {
|
||
println!("found a string");
|
||
}
|
||
|
||
println!("{:?}", s);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-22: Using an underscore does not bind the
|
||
value</span></p>
|
||
<p>This code works just fine because we never bind <code>s</code> to anything; it isn’t moved.</p>
|
||
<h4><a class="header" href="#ignoring-remaining-parts-of-a-value-with-" id="ignoring-remaining-parts-of-a-value-with-">Ignoring Remaining Parts of a Value with <code>..</code></a></h4>
|
||
<p>With values that have many parts, we can use the <code>..</code> syntax to use only a few
|
||
parts and ignore the rest, avoiding the need to list underscores for each
|
||
ignored value. The <code>..</code> pattern ignores any parts of a value that we haven’t
|
||
explicitly matched in the rest of the pattern. In Listing 18-23, we have a
|
||
<code>Point</code> struct that holds a coordinate in three-dimensional space. In the
|
||
<code>match</code> expression, we want to operate only on the <code>x</code> coordinate and ignore
|
||
the values in the <code>y</code> and <code>z</code> fields.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>struct Point {
|
||
x: i32,
|
||
y: i32,
|
||
z: i32,
|
||
}
|
||
|
||
let origin = Point { x: 0, y: 0, z: 0 };
|
||
|
||
match origin {
|
||
Point { x, .. } => println!("x is {}", x),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-23: Ignoring all fields of a <code>Point</code> except
|
||
for <code>x</code> by using <code>..</code></span></p>
|
||
<p>We list the <code>x</code> value and then just include the <code>..</code> pattern. This is quicker
|
||
than having to list <code>y: _</code> and <code>z: _</code>, particularly when we’re working with
|
||
structs that have lots of fields in situations where only one or two fields are
|
||
relevant.</p>
|
||
<p>The syntax <code>..</code> will expand to as many values as it needs to be. Listing 18-24
|
||
shows how to use <code>..</code> with a tuple.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let numbers = (2, 4, 8, 16, 32);
|
||
|
||
match numbers {
|
||
(first, .., last) => {
|
||
println!("Some numbers: {}, {}", first, last);
|
||
},
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-24: Matching only the first and last values in
|
||
a tuple and ignoring all other values</span></p>
|
||
<p>In this code, the first and last value are matched with <code>first</code> and <code>last</code>. The
|
||
<code>..</code> will match and ignore everything in the middle.</p>
|
||
<p>However, using <code>..</code> must be unambiguous. If it is unclear which values are
|
||
intended for matching and which should be ignored, Rust will give us an error.
|
||
Listing 18-25 shows an example of using <code>..</code> ambiguously, so it will not
|
||
compile.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
let numbers = (2, 4, 8, 16, 32);
|
||
|
||
match numbers {
|
||
(.., second, ..) => {
|
||
println!("Some numbers: {}", second)
|
||
},
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 18-25: An attempt to use <code>..</code> in an ambiguous
|
||
way</span></p>
|
||
<p>When we compile this example, we get this error:</p>
|
||
<pre><code class="language-text">error: `..` can only be used once per tuple or tuple struct pattern
|
||
--> src/main.rs:5:22
|
||
|
|
||
5 | (.., second, ..) => {
|
||
| ^^
|
||
</code></pre>
|
||
<p>It’s impossible for Rust to determine how many values in the tuple to ignore
|
||
before matching a value with <code>second</code> and then how many further values to
|
||
ignore thereafter. This code could mean that we want to ignore <code>2</code>, bind
|
||
<code>second</code> to <code>4</code>, and then ignore <code>8</code>, <code>16</code>, and <code>32</code>; or that we want to ignore
|
||
<code>2</code> and <code>4</code>, bind <code>second</code> to <code>8</code>, and then ignore <code>16</code> and <code>32</code>; and so forth.
|
||
The variable name <code>second</code> doesn’t mean anything special to Rust, so we get a
|
||
compiler error because using <code>..</code> in two places like this is ambiguous.</p>
|
||
<h3><a class="header" href="#extra-conditionals-with-match-guards" id="extra-conditionals-with-match-guards">Extra Conditionals with Match Guards</a></h3>
|
||
<p>A <em>match guard</em> is an additional <code>if</code> condition specified after the pattern in
|
||
a <code>match</code> arm that must also match, along with the pattern matching, for that
|
||
arm to be chosen. Match guards are useful for expressing more complex ideas
|
||
than a pattern alone allows.</p>
|
||
<p>The condition can use variables created in the pattern. Listing 18-26 shows a
|
||
<code>match</code> where the first arm has the pattern <code>Some(x)</code> and also has a match
|
||
guard of <code>if x < 5</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let num = Some(4);
|
||
|
||
match num {
|
||
Some(x) if x < 5 => println!("less than five: {}", x),
|
||
Some(x) => println!("{}", x),
|
||
None => (),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-26: Adding a match guard to a pattern</span></p>
|
||
<p>This example will print <code>less than five: 4</code>. When <code>num</code> is compared to the
|
||
pattern in the first arm, it matches, because <code>Some(4)</code> matches <code>Some(x)</code>. Then
|
||
the match guard checks whether the value in <code>x</code> is less than <code>5</code>, and because
|
||
it is, the first arm is selected.</p>
|
||
<p>If <code>num</code> had been <code>Some(10)</code> instead, the match guard in the first arm would
|
||
have been false because 10 is not less than 5. Rust would then go to the second
|
||
arm, which would match because the second arm doesn’t have a match guard and
|
||
therefore matches any <code>Some</code> variant.</p>
|
||
<p>There is no way to express the <code>if x < 5</code> condition within a pattern, so the
|
||
match guard gives us the ability to express this logic.</p>
|
||
<p>In Listing 18-11, we mentioned that we could use match guards to solve our
|
||
pattern-shadowing problem. Recall that a new variable was created inside the
|
||
pattern in the <code>match</code> expression instead of using the variable outside the
|
||
<code>match</code>. That new variable meant we couldn’t test against the value of the
|
||
outer variable. Listing 18-27 shows how we can use a match guard to fix this
|
||
problem.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = Some(5);
|
||
let y = 10;
|
||
|
||
match x {
|
||
Some(50) => println!("Got 50"),
|
||
Some(n) if n == y => println!("Matched, n = {}", n),
|
||
_ => println!("Default case, x = {:?}", x),
|
||
}
|
||
|
||
println!("at the end: x = {:?}, y = {}", x, y);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 18-27: Using a match guard to test for equality
|
||
with an outer variable</span></p>
|
||
<p>This code will now print <code>Default case, x = Some(5)</code>. The pattern in the second
|
||
match arm doesn’t introduce a new variable <code>y</code> that would shadow the outer <code>y</code>,
|
||
meaning we can use the outer <code>y</code> in the match guard. Instead of specifying the
|
||
pattern as <code>Some(y)</code>, which would have shadowed the outer <code>y</code>, we specify
|
||
<code>Some(n)</code>. This creates a new variable <code>n</code> that doesn’t shadow anything because
|
||
there is no <code>n</code> variable outside the <code>match</code>.</p>
|
||
<p>The match guard <code>if n == y</code> is not a pattern and therefore doesn’t introduce
|
||
new variables. This <code>y</code> <em>is</em> the outer <code>y</code> rather than a new shadowed <code>y</code>, and
|
||
we can look for a value that has the same value as the outer <code>y</code> by comparing
|
||
<code>n</code> to <code>y</code>.</p>
|
||
<p>You can also use the <em>or</em> operator <code>|</code> in a match guard to specify multiple
|
||
patterns; the match guard condition will apply to all the patterns. Listing
|
||
18-28 shows the precedence of combining a match guard with a pattern that uses
|
||
<code>|</code>. The important part of this example is that the <code>if y</code> match guard applies
|
||
to <code>4</code>, <code>5</code>, <em>and</em> <code>6</code>, even though it might look like <code>if y</code> only applies to
|
||
<code>6</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let x = 4;
|
||
let y = false;
|
||
|
||
match x {
|
||
4 | 5 | 6 if y => println!("yes"),
|
||
_ => println!("no"),
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-28: Combining multiple patterns with a match
|
||
guard</span></p>
|
||
<p>The match condition states that the arm only matches if the value of <code>x</code> is
|
||
equal to <code>4</code>, <code>5</code>, or <code>6</code> <em>and</em> if <code>y</code> is <code>true</code>. When this code runs, the
|
||
pattern of the first arm matches because <code>x</code> is <code>4</code>, but the match guard <code>if y</code>
|
||
is false, so the first arm is not chosen. The code moves on to the second arm,
|
||
which does match, and this program prints <code>no</code>. The reason is that the <code>if</code>
|
||
condition applies to the whole pattern <code>4 | 5 | 6</code>, not only to the last value
|
||
<code>6</code>. In other words, the precedence of a match guard in relation to a pattern
|
||
behaves like this:</p>
|
||
<pre><code class="language-text">(4 | 5 | 6) if y => ...
|
||
</code></pre>
|
||
<p>rather than this:</p>
|
||
<pre><code class="language-text">4 | 5 | (6 if y) => ...
|
||
</code></pre>
|
||
<p>After running the code, the precedence behavior is evident: if the match guard
|
||
were applied only to the final value in the list of values specified using the
|
||
<code>|</code> operator, the arm would have matched and the program would have printed
|
||
<code>yes</code>.</p>
|
||
<h3><a class="header" href="#-bindings" id="-bindings"><code>@</code> Bindings</a></h3>
|
||
<p>The <em>at</em> operator (<code>@</code>) lets us create a variable that holds a value at the
|
||
same time we’re testing that value to see whether it matches a pattern. Listing
|
||
18-29 shows an example where we want to test that a <code>Message::Hello</code> <code>id</code> field
|
||
is within the range <code>3..=7</code>. But we also want to bind the value to the variable
|
||
<code>id_variable</code> so we can use it in the code associated with the arm. We could
|
||
name this variable <code>id</code>, the same as the field, but for this example we’ll use
|
||
a different name.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Message {
|
||
Hello { id: i32 },
|
||
}
|
||
|
||
let msg = Message::Hello { id: 5 };
|
||
|
||
match msg {
|
||
Message::Hello { id: id_variable @ 3..=7 } => {
|
||
println!("Found an id in range: {}", id_variable)
|
||
},
|
||
Message::Hello { id: 10..=12 } => {
|
||
println!("Found an id in another range")
|
||
},
|
||
Message::Hello { id } => {
|
||
println!("Found some other id: {}", id)
|
||
},
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 18-29: Using <code>@</code> to bind to a value in a pattern
|
||
while also testing it</span></p>
|
||
<p>This example will print <code>Found an id in range: 5</code>. By specifying <code>id_variable @</code> before the range <code>3..=7</code>, we’re capturing whatever value matched the range
|
||
while also testing that the value matched the range pattern.</p>
|
||
<p>In the second arm, where we only have a range specified in the pattern, the code
|
||
associated with the arm doesn’t have a variable that contains the actual value
|
||
of the <code>id</code> field. The <code>id</code> field’s value could have been 10, 11, or 12, but
|
||
the code that goes with that pattern doesn’t know which it is. The pattern code
|
||
isn’t able to use the value from the <code>id</code> field, because we haven’t saved the
|
||
<code>id</code> value in a variable.</p>
|
||
<p>In the last arm, where we’ve specified a variable without a range, we do have
|
||
the value available to use in the arm’s code in a variable named <code>id</code>. The
|
||
reason is that we’ve used the struct field shorthand syntax. But we haven’t
|
||
applied any test to the value in the <code>id</code> field in this arm, as we did with the
|
||
first two arms: any value would match this pattern.</p>
|
||
<p>Using <code>@</code> lets us test a value and save it in a variable within one pattern.</p>
|
||
<h2><a class="header" href="#summary-17" id="summary-17">Summary</a></h2>
|
||
<p>Rust’s patterns are very useful in that they help distinguish between different
|
||
kinds of data. When used in <code>match</code> expressions, Rust ensures your patterns
|
||
cover every possible value, or your program won’t compile. Patterns in <code>let</code>
|
||
statements and function parameters make those constructs more useful, enabling
|
||
the destructuring of values into smaller parts at the same time as assigning to
|
||
variables. We can create simple or complex patterns to suit our needs.</p>
|
||
<p>Next, for the penultimate chapter of the book, we’ll look at some advanced
|
||
aspects of a variety of Rust’s features.</p>
|
||
<h1><a class="header" href="#advanced-features" id="advanced-features">Advanced Features</a></h1>
|
||
<p>By now, you’ve learned the most commonly used parts of the Rust programming
|
||
language. Before we do one more project in Chapter 20, we’ll look at a few
|
||
aspects of the language you might run into every once in a while. You can use
|
||
this chapter as a reference for when you encounter any unknowns when using
|
||
Rust. The features you’ll learn to use in this chapter are useful in very
|
||
specific situations. Although you might not reach for them often, we want to
|
||
make sure you have a grasp of all the features Rust has to offer.</p>
|
||
<p>In this chapter, we’ll cover:</p>
|
||
<ul>
|
||
<li>Unsafe Rust: how to opt out of some of Rust’s guarantees and take
|
||
responsibility for manually upholding those guarantees</li>
|
||
<li>Advanced traits: associated types, default type parameters, fully qualified
|
||
syntax, supertraits, and the newtype pattern in relation to traits</li>
|
||
<li>Advanced types: more about the newtype pattern, type aliases, the never type,
|
||
and dynamically sized types</li>
|
||
<li>Advanced functions and closures: function pointers and returning closures</li>
|
||
<li>Macros: ways to define code that defines more code at compile time</li>
|
||
</ul>
|
||
<p>It’s a panoply of Rust features with something for everyone! Let’s dive in!</p>
|
||
<h2><a class="header" href="#unsafe-rust" id="unsafe-rust">Unsafe Rust</a></h2>
|
||
<p>All the code we’ve discussed so far has had Rust’s memory safety guarantees
|
||
enforced at compile time. However, Rust has a second language hidden inside it
|
||
that doesn’t enforce these memory safety guarantees: it’s called <em>unsafe Rust</em>
|
||
and works just like regular Rust, but gives us extra superpowers.</p>
|
||
<p>Unsafe Rust exists because, by nature, static analysis is conservative. When
|
||
the compiler tries to determine whether or not code upholds the guarantees,
|
||
it’s better for it to reject some valid programs rather than accept some
|
||
invalid programs. Although the code might be okay, as far as Rust is able to
|
||
tell, it’s not! In these cases, you can use unsafe code to tell the compiler,
|
||
“Trust me, I know what I’m doing.” The downside is that you use it at your own
|
||
risk: if you use unsafe code incorrectly, problems due to memory unsafety, such
|
||
as null pointer dereferencing, can occur.</p>
|
||
<p>Another reason Rust has an unsafe alter ego is that the underlying computer
|
||
hardware is inherently unsafe. If Rust didn’t let you do unsafe operations, you
|
||
couldn’t do certain tasks. Rust needs to allow you to do low-level systems
|
||
programming, such as directly interacting with the operating system or even
|
||
writing your own operating system. Working with low-level systems programming
|
||
is one of the goals of the language. Let’s explore what we can do with unsafe
|
||
Rust and how to do it.</p>
|
||
<h3><a class="header" href="#unsafe-superpowers" id="unsafe-superpowers">Unsafe Superpowers</a></h3>
|
||
<p>To switch to unsafe Rust, use the <code>unsafe</code> keyword and then start a new block
|
||
that holds the unsafe code. You can take four actions in unsafe Rust, called
|
||
<em>unsafe superpowers</em>, that you can’t in safe Rust. Those superpowers include
|
||
the ability to:</p>
|
||
<ul>
|
||
<li>Dereference a raw pointer</li>
|
||
<li>Call an unsafe function or method</li>
|
||
<li>Access or modify a mutable static variable</li>
|
||
<li>Implement an unsafe trait</li>
|
||
<li>Access fields of <code>union</code>s</li>
|
||
</ul>
|
||
<p>It’s important to understand that <code>unsafe</code> doesn’t turn off the borrow checker
|
||
or disable any other of Rust’s safety checks: if you use a reference in unsafe
|
||
code, it will still be checked. The <code>unsafe</code> keyword only gives you access to
|
||
these four features that are then not checked by the compiler for memory
|
||
safety. You’ll still get some degree of safety inside of an unsafe block.</p>
|
||
<p>In addition, <code>unsafe</code> does not mean the code inside the block is necessarily
|
||
dangerous or that it will definitely have memory safety problems: the intent is
|
||
that as the programmer, you’ll ensure the code inside an <code>unsafe</code> block will
|
||
access memory in a valid way.</p>
|
||
<p>People are fallible, and mistakes will happen, but by requiring these four
|
||
unsafe operations to be inside blocks annotated with <code>unsafe</code> you’ll know that
|
||
any errors related to memory safety must be within an <code>unsafe</code> block. Keep
|
||
<code>unsafe</code> blocks small; you’ll be thankful later when you investigate memory
|
||
bugs.</p>
|
||
<p>To isolate unsafe code as much as possible, it’s best to enclose unsafe code
|
||
within a safe abstraction and provide a safe API, which we’ll discuss later in
|
||
the chapter when we examine unsafe functions and methods. Parts of the standard
|
||
library are implemented as safe abstractions over unsafe code that has been
|
||
audited. Wrapping unsafe code in a safe abstraction prevents uses of <code>unsafe</code>
|
||
from leaking out into all the places that you or your users might want to use
|
||
the functionality implemented with <code>unsafe</code> code, because using a safe
|
||
abstraction is safe.</p>
|
||
<p>Let’s look at each of the four unsafe superpowers in turn. We’ll also look at
|
||
some abstractions that provide a safe interface to unsafe code.</p>
|
||
<h3><a class="header" href="#dereferencing-a-raw-pointer" id="dereferencing-a-raw-pointer">Dereferencing a Raw Pointer</a></h3>
|
||
<p>In Chapter 4, in the <a href="ch04-02-references-and-borrowing.html#dangling-references">“Dangling References”</a><!-- ignore
|
||
--> section, we mentioned that the compiler ensures references are always
|
||
valid. Unsafe Rust has two new types called <em>raw pointers</em> that are similar to
|
||
references. As with references, raw pointers can be immutable or mutable and
|
||
are written as <code>*const T</code> and <code>*mut T</code>, respectively. The asterisk isn’t the
|
||
dereference operator; it’s part of the type name. In the context of raw
|
||
pointers, <em>immutable</em> means that the pointer can’t be directly assigned to
|
||
after being dereferenced.</p>
|
||
<p>Different from references and smart pointers, raw pointers:</p>
|
||
<ul>
|
||
<li>Are allowed to ignore the borrowing rules by having both immutable and
|
||
mutable pointers or multiple mutable pointers to the same location</li>
|
||
<li>Aren’t guaranteed to point to valid memory</li>
|
||
<li>Are allowed to be null</li>
|
||
<li>Don’t implement any automatic cleanup</li>
|
||
</ul>
|
||
<p>By opting out of having Rust enforce these guarantees, you can give up
|
||
guaranteed safety in exchange for greater performance or the ability to
|
||
interface with another language or hardware where Rust’s guarantees don’t apply.</p>
|
||
<p>Listing 19-1 shows how to create an immutable and a mutable raw pointer from
|
||
references.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut num = 5;
|
||
|
||
let r1 = &num as *const i32;
|
||
let r2 = &mut num as *mut i32;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-1: Creating raw pointers from references</span></p>
|
||
<p>Notice that we don’t include the <code>unsafe</code> keyword in this code. We can create
|
||
raw pointers in safe code; we just can’t dereference raw pointers outside an
|
||
unsafe block, as you’ll see in a bit.</p>
|
||
<p>We’ve created raw pointers by using <code>as</code> to cast an immutable and a mutable
|
||
reference into their corresponding raw pointer types. Because we created them
|
||
directly from references guaranteed to be valid, we know these particular raw
|
||
pointers are valid, but we can’t make that assumption about just any raw
|
||
pointer.</p>
|
||
<p>Next, we’ll create a raw pointer whose validity we can’t be so certain of.
|
||
Listing 19-2 shows how to create a raw pointer to an arbitrary location in
|
||
memory. Trying to use arbitrary memory is undefined: there might be data at
|
||
that address or there might not, the compiler might optimize the code so there
|
||
is no memory access, or the program might error with a segmentation fault.
|
||
Usually, there is no good reason to write code like this, but it is possible.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let address = 0x012345usize;
|
||
let r = address as *const i32;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-2: Creating a raw pointer to an arbitrary
|
||
memory address</span></p>
|
||
<p>Recall that we can create raw pointers in safe code, but we can’t <em>dereference</em>
|
||
raw pointers and read the data being pointed to. In Listing 19-3, we use the
|
||
dereference operator <code>*</code> on a raw pointer that requires an <code>unsafe</code> block.</p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut num = 5;
|
||
|
||
let r1 = &num as *const i32;
|
||
let r2 = &mut num as *mut i32;
|
||
|
||
unsafe {
|
||
println!("r1 is: {}", *r1);
|
||
println!("r2 is: {}", *r2);
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-3: Dereferencing raw pointers within an
|
||
<code>unsafe</code> block</span></p>
|
||
<p>Creating a pointer does no harm; it’s only when we try to access the value that
|
||
it points at that we might end up dealing with an invalid value.</p>
|
||
<p>Note also that in Listing 19-1 and 19-3, we created <code>*const i32</code> and <code>*mut i32</code>
|
||
raw pointers that both pointed to the same memory location, where <code>num</code> is
|
||
stored. If we instead tried to create an immutable and a mutable reference to
|
||
<code>num</code>, the code would not have compiled because Rust’s ownership rules don’t
|
||
allow a mutable reference at the same time as any immutable references. With
|
||
raw pointers, we can create a mutable pointer and an immutable pointer to the
|
||
same location and change data through the mutable pointer, potentially creating
|
||
a data race. Be careful!</p>
|
||
<p>With all of these dangers, why would you ever use raw pointers? One major use
|
||
case is when interfacing with C code, as you’ll see in the next section,
|
||
<a href="ch19-01-unsafe-rust.html#calling-an-unsafe-function-or-method">“Calling an Unsafe Function or
|
||
Method.”</a><!-- ignore --> Another case is
|
||
when building up safe abstractions that the borrow checker doesn’t understand.
|
||
We’ll introduce unsafe functions and then look at an example of a safe
|
||
abstraction that uses unsafe code.</p>
|
||
<h3><a class="header" href="#calling-an-unsafe-function-or-method" id="calling-an-unsafe-function-or-method">Calling an Unsafe Function or Method</a></h3>
|
||
<p>The second type of operation that requires an unsafe block is calls to unsafe
|
||
functions. Unsafe functions and methods look exactly like regular functions and
|
||
methods, but they have an extra <code>unsafe</code> before the rest of the definition. The
|
||
<code>unsafe</code> keyword in this context indicates the function has requirements we
|
||
need to uphold when we call this function, because Rust can’t guarantee we’ve
|
||
met these requirements. By calling an unsafe function within an <code>unsafe</code> block,
|
||
we’re saying that we’ve read this function’s documentation and take
|
||
responsibility for upholding the function’s contracts.</p>
|
||
<p>Here is an unsafe function named <code>dangerous</code> that doesn’t do anything in its
|
||
body:</p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>unsafe fn dangerous() {}
|
||
|
||
unsafe {
|
||
dangerous();
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We must call the <code>dangerous</code> function within a separate <code>unsafe</code> block. If we
|
||
try to call <code>dangerous</code> without the <code>unsafe</code> block, we’ll get an error:</p>
|
||
<pre><code class="language-text">error[E0133]: call to unsafe function requires unsafe function or block
|
||
-->
|
||
|
|
||
4 | dangerous();
|
||
| ^^^^^^^^^^^ call to unsafe function
|
||
</code></pre>
|
||
<p>By inserting the <code>unsafe</code> block around our call to <code>dangerous</code>, we’re asserting
|
||
to Rust that we’ve read the function’s documentation, we understand how to use
|
||
it properly, and we’ve verified that we’re fulfilling the contract of the
|
||
function.</p>
|
||
<p>Bodies of unsafe functions are effectively <code>unsafe</code> blocks, so to perform other
|
||
unsafe operations within an unsafe function, we don’t need to add another
|
||
<code>unsafe</code> block.</p>
|
||
<h4><a class="header" href="#creating-a-safe-abstraction-over-unsafe-code" id="creating-a-safe-abstraction-over-unsafe-code">Creating a Safe Abstraction over Unsafe Code</a></h4>
|
||
<p>Just because a function contains unsafe code doesn’t mean we need to mark the
|
||
entire function as unsafe. In fact, wrapping unsafe code in a safe function is
|
||
a common abstraction. As an example, let’s study a function from the standard
|
||
library, <code>split_at_mut</code>, that requires some unsafe code and explore how we
|
||
might implement it. This safe method is defined on mutable slices: it takes one
|
||
slice and makes it two by splitting the slice at the index given as an
|
||
argument. Listing 19-4 shows how to use <code>split_at_mut</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let mut v = vec![1, 2, 3, 4, 5, 6];
|
||
|
||
let r = &mut v[..];
|
||
|
||
let (a, b) = r.split_at_mut(3);
|
||
|
||
assert_eq!(a, &mut [1, 2, 3]);
|
||
assert_eq!(b, &mut [4, 5, 6]);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-4: Using the safe <code>split_at_mut</code>
|
||
function</span></p>
|
||
<p>We can’t implement this function using only safe Rust. An attempt might look
|
||
something like Listing 19-5, which won’t compile. For simplicity, we’ll
|
||
implement <code>split_at_mut</code> as a function rather than a method and only for slices
|
||
of <code>i32</code> values rather than for a generic type <code>T</code>.</p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn split_at_mut(slice: &mut [i32], mid: usize) -> (&mut [i32], &mut [i32]) {
|
||
let len = slice.len();
|
||
|
||
assert!(mid <= len);
|
||
|
||
(&mut slice[..mid],
|
||
&mut slice[mid..])
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-5: An attempted implementation of
|
||
<code>split_at_mut</code> using only safe Rust</span></p>
|
||
<p>This function first gets the total length of the slice. Then it asserts that
|
||
the index given as a parameter is within the slice by checking whether it’s
|
||
less than or equal to the length. The assertion means that if we pass an index
|
||
that is greater than the length to split the slice at, the function will panic
|
||
before it attempts to use that index.</p>
|
||
<p>Then we return two mutable slices in a tuple: one from the start of the
|
||
original slice to the <code>mid</code> index and another from <code>mid</code> to the end of the
|
||
slice.</p>
|
||
<p>When we try to compile the code in Listing 19-5, we’ll get an error.</p>
|
||
<pre><code class="language-text">error[E0499]: cannot borrow `*slice` as mutable more than once at a time
|
||
-->
|
||
|
|
||
6 | (&mut slice[..mid],
|
||
| ----- first mutable borrow occurs here
|
||
7 | &mut slice[mid..])
|
||
| ^^^^^ second mutable borrow occurs here
|
||
8 | }
|
||
| - first borrow ends here
|
||
</code></pre>
|
||
<p>Rust’s borrow checker can’t understand that we’re borrowing different parts of
|
||
the slice; it only knows that we’re borrowing from the same slice twice.
|
||
Borrowing different parts of a slice is fundamentally okay because the two
|
||
slices aren’t overlapping, but Rust isn’t smart enough to know this. When we
|
||
know code is okay, but Rust doesn’t, it’s time to reach for unsafe code.</p>
|
||
<p>Listing 19-6 shows how to use an <code>unsafe</code> block, a raw pointer, and some calls
|
||
to unsafe functions to make the implementation of <code>split_at_mut</code> work.</p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::slice;
|
||
|
||
fn split_at_mut(slice: &mut [i32], mid: usize) -> (&mut [i32], &mut [i32]) {
|
||
let len = slice.len();
|
||
let ptr = slice.as_mut_ptr();
|
||
|
||
assert!(mid <= len);
|
||
|
||
unsafe {
|
||
(slice::from_raw_parts_mut(ptr, mid),
|
||
slice::from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-6: Using unsafe code in the implementation of
|
||
the <code>split_at_mut</code> function</span></p>
|
||
<p>Recall from <a href="ch04-03-slices.html#the-slice-type">“The Slice Type”</a><!-- ignore --> section in
|
||
Chapter 4 that slices are a pointer to some data and the length of the slice.
|
||
We use the <code>len</code> method to get the length of a slice and the <code>as_mut_ptr</code>
|
||
method to access the raw pointer of a slice. In this case, because we have a
|
||
mutable slice to <code>i32</code> values, <code>as_mut_ptr</code> returns a raw pointer with the type
|
||
<code>*mut i32</code>, which we’ve stored in the variable <code>ptr</code>.</p>
|
||
<p>We keep the assertion that the <code>mid</code> index is within the slice. Then we get to
|
||
the unsafe code: the <code>slice::from_raw_parts_mut</code> function takes a raw pointer
|
||
and a length, and it creates a slice. We use this function to create a slice
|
||
that starts from <code>ptr</code> and is <code>mid</code> items long. Then we call the <code>offset</code>
|
||
method on <code>ptr</code> with <code>mid</code> as an argument to get a raw pointer that starts at
|
||
<code>mid</code>, and we create a slice using that pointer and the remaining number of
|
||
items after <code>mid</code> as the length.</p>
|
||
<p>The function <code>slice::from_raw_parts_mut</code> is unsafe because it takes a raw
|
||
pointer and must trust that this pointer is valid. The <code>offset</code> method on raw
|
||
pointers is also unsafe, because it must trust that the offset location is also
|
||
a valid pointer. Therefore, we had to put an <code>unsafe</code> block around our calls to
|
||
<code>slice::from_raw_parts_mut</code> and <code>offset</code> so we could call them. By looking at
|
||
the code and by adding the assertion that <code>mid</code> must be less than or equal to
|
||
<code>len</code>, we can tell that all the raw pointers used within the <code>unsafe</code> block
|
||
will be valid pointers to data within the slice. This is an acceptable and
|
||
appropriate use of <code>unsafe</code>.</p>
|
||
<p>Note that we don’t need to mark the resulting <code>split_at_mut</code> function as
|
||
<code>unsafe</code>, and we can call this function from safe Rust. We’ve created a safe
|
||
abstraction to the unsafe code with an implementation of the function that uses
|
||
<code>unsafe</code> code in a safe way, because it creates only valid pointers from the
|
||
data this function has access to.</p>
|
||
<p>In contrast, the use of <code>slice::from_raw_parts_mut</code> in Listing 19-7 would
|
||
likely crash when the slice is used. This code takes an arbitrary memory
|
||
location and creates a slice 10,000 items long.</p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::slice;
|
||
|
||
let address = 0x01234usize;
|
||
let r = address as *mut i32;
|
||
|
||
let slice: &[i32] = unsafe {
|
||
slice::from_raw_parts_mut(r, 10000)
|
||
};
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-7: Creating a slice from an arbitrary memory
|
||
location</span></p>
|
||
<p>We don’t own the memory at this arbitrary location, and there is no guarantee
|
||
that the slice this code creates contains valid <code>i32</code> values. Attempting to use
|
||
<code>slice</code> as though it’s a valid slice results in undefined behavior.</p>
|
||
<h4><a class="header" href="#using-extern-functions-to-call-external-code" id="using-extern-functions-to-call-external-code">Using <code>extern</code> Functions to Call External Code</a></h4>
|
||
<p>Sometimes, your Rust code might need to interact with code written in another
|
||
language. For this, Rust has a keyword, <code>extern</code>, that facilitates the creation
|
||
and use of a <em>Foreign Function Interface (FFI)</em>. An FFI is a way for a
|
||
programming language to define functions and enable a different (foreign)
|
||
programming language to call those functions.</p>
|
||
<p>Listing 19-8 demonstrates how to set up an integration with the <code>abs</code> function
|
||
from the C standard library. Functions declared within <code>extern</code> blocks are
|
||
always unsafe to call from Rust code. The reason is that other languages don’t
|
||
enforce Rust’s rules and guarantees, and Rust can’t check them, so
|
||
responsibility falls on the programmer to ensure safety.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">extern "C" {
|
||
fn abs(input: i32) -> i32;
|
||
}
|
||
|
||
fn main() {
|
||
unsafe {
|
||
println!("Absolute value of -3 according to C: {}", abs(-3));
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-8: Declaring and calling an <code>extern</code> function
|
||
defined in another language</span></p>
|
||
<p>Within the <code>extern "C"</code> block, we list the names and signatures of external
|
||
functions from another language we want to call. The <code>"C"</code> part defines which
|
||
<em>application binary interface (ABI)</em> the external function uses: the ABI
|
||
defines how to call the function at the assembly level. The <code>"C"</code> ABI is the
|
||
most common and follows the C programming language’s ABI.</p>
|
||
<blockquote>
|
||
<h4><a class="header" href="#calling-rust-functions-from-other-languages" id="calling-rust-functions-from-other-languages">Calling Rust Functions from Other Languages</a></h4>
|
||
<p>We can also use <code>extern</code> to create an interface that allows other languages
|
||
to call Rust functions. Instead of an <code>extern</code> block, we add the <code>extern</code>
|
||
keyword and specify the ABI to use just before the <code>fn</code> keyword. We also need
|
||
to add a <code>#[no_mangle]</code> annotation to tell the Rust compiler not to mangle
|
||
the name of this function. <em>Mangling</em> is when a compiler changes the name
|
||
we’ve given a function to a different name that contains more information for
|
||
other parts of the compilation process to consume but is less human readable.
|
||
Every programming language compiler mangles names slightly differently, so
|
||
for a Rust function to be nameable by other languages, we must disable the
|
||
Rust compiler’s name mangling.</p>
|
||
<p>In the following example, we make the <code>call_from_c</code> function accessible from
|
||
C code, after it’s compiled to a shared library and linked from C:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[no_mangle]
|
||
pub extern "C" fn call_from_c() {
|
||
println!("Just called a Rust function from C!");
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This usage of <code>extern</code> does not require <code>unsafe</code>.</p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#accessing-or-modifying-a-mutable-static-variable" id="accessing-or-modifying-a-mutable-static-variable">Accessing or Modifying a Mutable Static Variable</a></h3>
|
||
<p>Until now, we’ve not talked about <em>global variables</em>, which Rust does support
|
||
but can be problematic with Rust’s ownership rules. If two threads are
|
||
accessing the same mutable global variable, it can cause a data race.</p>
|
||
<p>In Rust, global variables are called <em>static</em> variables. Listing 19-9 shows an
|
||
example declaration and use of a static variable with a string slice as a
|
||
value.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">static HELLO_WORLD: &str = "Hello, world!";
|
||
|
||
fn main() {
|
||
println!("name is: {}", HELLO_WORLD);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-9: Defining and using an immutable static
|
||
variable</span></p>
|
||
<p>Static variables are similar to constants, which we discussed in the
|
||
<a href="ch03-01-variables-and-mutability.html#differences-between-variables-and-constants">“Differences Between Variables and
|
||
Constants”</a><!-- ignore -->
|
||
section in Chapter 3. The names of static variables are in
|
||
<code>SCREAMING_SNAKE_CASE</code> by convention, and we <em>must</em> annotate the variable’s
|
||
type, which is <code>&'static str</code> in this example. Static variables can only store
|
||
references with the <code>'static</code> lifetime, which means the Rust compiler can
|
||
figure out the lifetime; we don’t need to annotate it explicitly. Accessing an
|
||
immutable static variable is safe.</p>
|
||
<p>Constants and immutable static variables might seem similar, but a subtle
|
||
difference is that values in a static variable have a fixed address in memory.
|
||
Using the value will always access the same data. Constants, on the other hand,
|
||
are allowed to duplicate their data whenever they’re used.</p>
|
||
<p>Another difference between constants and static variables is that static
|
||
variables can be mutable. Accessing and modifying mutable static variables is
|
||
<em>unsafe</em>. Listing 19-10 shows how to declare, access, and modify a mutable
|
||
static variable named <code>COUNTER</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">static mut COUNTER: u32 = 0;
|
||
|
||
fn add_to_count(inc: u32) {
|
||
unsafe {
|
||
COUNTER += inc;
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
add_to_count(3);
|
||
|
||
unsafe {
|
||
println!("COUNTER: {}", COUNTER);
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-10: Reading from or writing to a mutable
|
||
static variable is unsafe</span></p>
|
||
<p>As with regular variables, we specify mutability using the <code>mut</code> keyword. Any
|
||
code that reads or writes from <code>COUNTER</code> must be within an <code>unsafe</code> block. This
|
||
code compiles and prints <code>COUNTER: 3</code> as we would expect because it’s single
|
||
threaded. Having multiple threads access <code>COUNTER</code> would likely result in data
|
||
races.</p>
|
||
<p>With mutable data that is globally accessible, it’s difficult to ensure there
|
||
are no data races, which is why Rust considers mutable static variables to be
|
||
unsafe. Where possible, it’s preferable to use the concurrency techniques and
|
||
thread-safe smart pointers we discussed in Chapter 16 so the compiler checks
|
||
that data accessed from different threads is done safely.</p>
|
||
<h3><a class="header" href="#implementing-an-unsafe-trait" id="implementing-an-unsafe-trait">Implementing an Unsafe Trait</a></h3>
|
||
<p>The final action that works only with <code>unsafe</code> is implementing an unsafe trait.
|
||
A trait is unsafe when at least one of its methods has some invariant that the
|
||
compiler can’t verify. We can declare that a trait is <code>unsafe</code> by adding the
|
||
<code>unsafe</code> keyword before <code>trait</code> and marking the implementation of the trait as
|
||
<code>unsafe</code> too, as shown in Listing 19-11.</p>
|
||
<pre><pre class="playpen"><code class="language-rust unsafe">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>unsafe trait Foo {
|
||
// methods go here
|
||
}
|
||
|
||
unsafe impl Foo for i32 {
|
||
// method implementations go here
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-11: Defining and implementing an unsafe
|
||
trait</span></p>
|
||
<p>By using <code>unsafe impl</code>, we’re promising that we’ll uphold the invariants that
|
||
the compiler can’t verify.</p>
|
||
<p>As an example, recall the <code>Sync</code> and <code>Send</code> marker traits we discussed in the
|
||
<a href="ch16-04-extensible-concurrency-sync-and-send.html#extensible-concurrency-with-the-sync-and-send-traits">“Extensible Concurrency with the <code>Sync</code> and <code>Send</code>
|
||
Traits”</a><!-- ignore -->
|
||
section in Chapter 16: the compiler implements these traits automatically if
|
||
our types are composed entirely of <code>Send</code> and <code>Sync</code> types. If we implement a
|
||
type that contains a type that is not <code>Send</code> or <code>Sync</code>, such as raw pointers,
|
||
and we want to mark that type as <code>Send</code> or <code>Sync</code>, we must use <code>unsafe</code>. Rust
|
||
can’t verify that our type upholds the guarantees that it can be safely sent
|
||
across threads or accessed from multiple threads; therefore, we need to do
|
||
those checks manually and indicate as such with <code>unsafe</code>.</p>
|
||
<h3><a class="header" href="#when-to-use-unsafe-code" id="when-to-use-unsafe-code">When to Use Unsafe Code</a></h3>
|
||
<p>Using <code>unsafe</code> to take one of the four actions (superpowers) just discussed
|
||
isn’t wrong or even frowned upon. But it is trickier to get <code>unsafe</code> code
|
||
correct because the compiler can’t help uphold memory safety. When you have a
|
||
reason to use <code>unsafe</code> code, you can do so, and having the explicit <code>unsafe</code>
|
||
annotation makes it easier to track down the source of problems if they occur.</p>
|
||
<h2><a class="header" href="#advanced-traits" id="advanced-traits">Advanced Traits</a></h2>
|
||
<p>We first covered traits in the <a href="ch10-02-traits.html#traits-defining-shared-behavior">“Traits: Defining Shared
|
||
Behavior”</a><!-- ignore --> section of Chapter
|
||
10, but as with lifetimes, we didn’t discuss the more advanced details. Now
|
||
that you know more about Rust, we can get into the nitty-gritty.</p>
|
||
<h3><a class="header" href="#specifying-placeholder-types-in-trait-definitions-with-associated-types" id="specifying-placeholder-types-in-trait-definitions-with-associated-types">Specifying Placeholder Types in Trait Definitions with Associated Types</a></h3>
|
||
<p><em>Associated types</em> connect a type placeholder with a trait such that the trait
|
||
method definitions can use these placeholder types in their signatures. The
|
||
implementor of a trait will specify the concrete type to be used in this type’s
|
||
place for the particular implementation. That way, we can define a trait that
|
||
uses some types without needing to know exactly what those types are until the
|
||
trait is implemented.</p>
|
||
<p>We’ve described most of the advanced features in this chapter as being rarely
|
||
needed. Associated types are somewhere in the middle: they’re used more rarely
|
||
than features explained in the rest of the book but more commonly than many of
|
||
the other features discussed in this chapter.</p>
|
||
<p>One example of a trait with an associated type is the <code>Iterator</code> trait that the
|
||
standard library provides. The associated type is named <code>Item</code> and stands in
|
||
for the type of the values the type implementing the <code>Iterator</code> trait is
|
||
iterating over. In <a href="ch13-02-iterators.html#the-iterator-trait-and-the-next-method">“The <code>Iterator</code> Trait and the <code>next</code>
|
||
Method”</a><!-- ignore --> section of
|
||
Chapter 13, we mentioned that the definition of the <code>Iterator</code> trait is as
|
||
shown in Listing 19-12.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Iterator {
|
||
type Item;
|
||
|
||
fn next(&mut self) -> Option<Self::Item>;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-12: The definition of the <code>Iterator</code> trait
|
||
that has an associated type <code>Item</code></span></p>
|
||
<p>The type <code>Item</code> is a placeholder type, and the <code>next</code> method’s definition shows
|
||
that it will return values of type <code>Option<Self::Item></code>. Implementors of the
|
||
<code>Iterator</code> trait will specify the concrete type for <code>Item</code>, and the <code>next</code>
|
||
method will return an <code>Option</code> containing a value of that concrete type.</p>
|
||
<p>Associated types might seem like a similar concept to generics, in that the
|
||
latter allow us to define a function without specifying what types it can
|
||
handle. So why use associated types?</p>
|
||
<p>Let’s examine the difference between the two concepts with an example from
|
||
Chapter 13 that implements the <code>Iterator</code> trait on the <code>Counter</code> struct. In
|
||
Listing 13-21, we specified that the <code>Item</code> type was <code>u32</code>:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Iterator for Counter {
|
||
type Item = u32;
|
||
|
||
fn next(&mut self) -> Option<Self::Item> {
|
||
// --snip--
|
||
</code></pre>
|
||
<p>This syntax seems comparable to that of generics. So why not just define the
|
||
<code>Iterator</code> trait with generics, as shown in Listing 19-13?</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait Iterator<T> {
|
||
fn next(&mut self) -> Option<T>;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-13: A hypothetical definition of the
|
||
<code>Iterator</code> trait using generics</span></p>
|
||
<p>The difference is that when using generics, as in Listing 19-13, we must
|
||
annotate the types in each implementation; because we can also implement
|
||
<code>Iterator<String> for Counter</code> or any other type, we could have multiple
|
||
implementations of <code>Iterator</code> for <code>Counter</code>. In other words, when a trait has a
|
||
generic parameter, it can be implemented for a type multiple times, changing
|
||
the concrete types of the generic type parameters each time. When we use the
|
||
<code>next</code> method on <code>Counter</code>, we would have to provide type annotations to
|
||
indicate which implementation of <code>Iterator</code> we want to use.</p>
|
||
<p>With associated types, we don’t need to annotate types because we can’t
|
||
implement a trait on a type multiple times. In Listing 19-12 with the
|
||
definition that uses associated types, we can only choose what the type of
|
||
<code>Item</code> will be once, because there can only be one <code>impl Iterator for Counter</code>.
|
||
We don’t have to specify that we want an iterator of <code>u32</code> values everywhere
|
||
that we call <code>next</code> on <code>Counter</code>.</p>
|
||
<h3><a class="header" href="#default-generic-type-parameters-and-operator-overloading" id="default-generic-type-parameters-and-operator-overloading">Default Generic Type Parameters and Operator Overloading</a></h3>
|
||
<p>When we use generic type parameters, we can specify a default concrete type for
|
||
the generic type. This eliminates the need for implementors of the trait to
|
||
specify a concrete type if the default type works. The syntax for specifying a
|
||
default type for a generic type is <code><PlaceholderType=ConcreteType></code> when
|
||
declaring the generic type.</p>
|
||
<p>A great example of a situation where this technique is useful is with operator
|
||
overloading. <em>Operator overloading</em> is customizing the behavior of an operator
|
||
(such as <code>+</code>) in particular situations.</p>
|
||
<p>Rust doesn’t allow you to create your own operators or overload arbitrary
|
||
operators. But you can overload the operations and corresponding traits listed
|
||
in <code>std::ops</code> by implementing the traits associated with the operator. For
|
||
example, in Listing 19-14 we overload the <code>+</code> operator to add two <code>Point</code>
|
||
instances together. We do this by implementing the <code>Add</code> trait on a <code>Point</code>
|
||
struct:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::ops::Add;
|
||
|
||
#[derive(Debug, PartialEq)]
|
||
struct Point {
|
||
x: i32,
|
||
y: i32,
|
||
}
|
||
|
||
impl Add for Point {
|
||
type Output = Point;
|
||
|
||
fn add(self, other: Point) -> Point {
|
||
Point {
|
||
x: self.x + other.x,
|
||
y: self.y + other.y,
|
||
}
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
assert_eq!(Point { x: 1, y: 0 } + Point { x: 2, y: 3 },
|
||
Point { x: 3, y: 3 });
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-14: Implementing the <code>Add</code> trait to overload
|
||
the <code>+</code> operator for <code>Point</code> instances</span></p>
|
||
<p>The <code>add</code> method adds the <code>x</code> values of two <code>Point</code> instances and the <code>y</code>
|
||
values of two <code>Point</code> instances to create a new <code>Point</code>. The <code>Add</code> trait has an
|
||
associated type named <code>Output</code> that determines the type returned from the <code>add</code>
|
||
method.</p>
|
||
<p>The default generic type in this code is within the <code>Add</code> trait. Here is its
|
||
definition:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>trait Add<RHS=Self> {
|
||
type Output;
|
||
|
||
fn add(self, rhs: RHS) -> Self::Output;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code should look generally familiar: a trait with one method and an
|
||
associated type. The new part is <code>RHS=Self</code>: this syntax is called <em>default
|
||
type parameters</em>. The <code>RHS</code> generic type parameter (short for “right hand
|
||
side”) defines the type of the <code>rhs</code> parameter in the <code>add</code> method. If we don’t
|
||
specify a concrete type for <code>RHS</code> when we implement the <code>Add</code> trait, the type
|
||
of <code>RHS</code> will default to <code>Self</code>, which will be the type we’re implementing
|
||
<code>Add</code> on.</p>
|
||
<p>When we implemented <code>Add</code> for <code>Point</code>, we used the default for <code>RHS</code> because we
|
||
wanted to add two <code>Point</code> instances. Let’s look at an example of implementing
|
||
the <code>Add</code> trait where we want to customize the <code>RHS</code> type rather than using the
|
||
default.</p>
|
||
<p>We have two structs, <code>Millimeters</code> and <code>Meters</code>, holding values in different
|
||
units. We want to add values in millimeters to values in meters and have the
|
||
implementation of <code>Add</code> do the conversion correctly. We can implement <code>Add</code> for
|
||
<code>Millimeters</code> with <code>Meters</code> as the <code>RHS</code>, as shown in Listing 19-15.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::ops::Add;
|
||
|
||
struct Millimeters(u32);
|
||
struct Meters(u32);
|
||
|
||
impl Add<Meters> for Millimeters {
|
||
type Output = Millimeters;
|
||
|
||
fn add(self, other: Meters) -> Millimeters {
|
||
Millimeters(self.0 + (other.0 * 1000))
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-15: Implementing the <code>Add</code> trait on
|
||
<code>Millimeters</code> to add <code>Millimeters</code> to <code>Meters</code></span></p>
|
||
<p>To add <code>Millimeters</code> and <code>Meters</code>, we specify <code>impl Add<Meters></code> to set the
|
||
value of the <code>RHS</code> type parameter instead of using the default of <code>Self</code>.</p>
|
||
<p>You’ll use default type parameters in two main ways:</p>
|
||
<ul>
|
||
<li>To extend a type without breaking existing code</li>
|
||
<li>To allow customization in specific cases most users won’t need</li>
|
||
</ul>
|
||
<p>The standard library’s <code>Add</code> trait is an example of the second purpose:
|
||
usually, you’ll add two like types, but the <code>Add</code> trait provides the ability to
|
||
customize beyond that. Using a default type parameter in the <code>Add</code> trait
|
||
definition means you don’t have to specify the extra parameter most of the
|
||
time. In other words, a bit of implementation boilerplate isn’t needed, making
|
||
it easier to use the trait.</p>
|
||
<p>The first purpose is similar to the second but in reverse: if you want to add a
|
||
type parameter to an existing trait, you can give it a default to allow
|
||
extension of the functionality of the trait without breaking the existing
|
||
implementation code.</p>
|
||
<h3><a class="header" href="#fully-qualified-syntax-for-disambiguation-calling-methods-with-the-same-name" id="fully-qualified-syntax-for-disambiguation-calling-methods-with-the-same-name">Fully Qualified Syntax for Disambiguation: Calling Methods with the Same Name</a></h3>
|
||
<p>Nothing in Rust prevents a trait from having a method with the same name as
|
||
another trait’s method, nor does Rust prevent you from implementing both traits
|
||
on one type. It’s also possible to implement a method directly on the type with
|
||
the same name as methods from traits.</p>
|
||
<p>When calling methods with the same name, you’ll need to tell Rust which one you
|
||
want to use. Consider the code in Listing 19-16 where we’ve defined two traits,
|
||
<code>Pilot</code> and <code>Wizard</code>, that both have a method called <code>fly</code>. We then implement
|
||
both traits on a type <code>Human</code> that already has a method named <code>fly</code> implemented
|
||
on it. Each <code>fly</code> method does something different.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>trait Pilot {
|
||
fn fly(&self);
|
||
}
|
||
|
||
trait Wizard {
|
||
fn fly(&self);
|
||
}
|
||
|
||
struct Human;
|
||
|
||
impl Pilot for Human {
|
||
fn fly(&self) {
|
||
println!("This is your captain speaking.");
|
||
}
|
||
}
|
||
|
||
impl Wizard for Human {
|
||
fn fly(&self) {
|
||
println!("Up!");
|
||
}
|
||
}
|
||
|
||
impl Human {
|
||
fn fly(&self) {
|
||
println!("*waving arms furiously*");
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-16: Two traits are defined to have a <code>fly</code>
|
||
method and are implemented on the <code>Human</code> type, and a <code>fly</code> method is
|
||
implemented on <code>Human</code> directly</span></p>
|
||
<p>When we call <code>fly</code> on an instance of <code>Human</code>, the compiler defaults to calling
|
||
the method that is directly implemented on the type, as shown in Listing 19-17.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">trait Pilot {
|
||
</span><span class="boring"> fn fly(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">trait Wizard {
|
||
</span><span class="boring"> fn fly(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct Human;
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Pilot for Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("This is your captain speaking.");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Wizard for Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("Up!");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("*waving arms furiously*");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let person = Human;
|
||
person.fly();
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-17: Calling <code>fly</code> on an instance of
|
||
<code>Human</code></span></p>
|
||
<p>Running this code will print <code>*waving arms furiously*</code>, showing that Rust
|
||
called the <code>fly</code> method implemented on <code>Human</code> directly.</p>
|
||
<p>To call the <code>fly</code> methods from either the <code>Pilot</code> trait or the <code>Wizard</code> trait,
|
||
we need to use more explicit syntax to specify which <code>fly</code> method we mean.
|
||
Listing 19-18 demonstrates this syntax.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">trait Pilot {
|
||
</span><span class="boring"> fn fly(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">trait Wizard {
|
||
</span><span class="boring"> fn fly(&self);
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct Human;
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Pilot for Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("This is your captain speaking.");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Wizard for Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("Up!");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Human {
|
||
</span><span class="boring"> fn fly(&self) {
|
||
</span><span class="boring"> println!("*waving arms furiously*");
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let person = Human;
|
||
Pilot::fly(&person);
|
||
Wizard::fly(&person);
|
||
person.fly();
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-18: Specifying which trait’s <code>fly</code> method we
|
||
want to call</span></p>
|
||
<p>Specifying the trait name before the method name clarifies to Rust which
|
||
implementation of <code>fly</code> we want to call. We could also write
|
||
<code>Human::fly(&person)</code>, which is equivalent to the <code>person.fly()</code> that we used
|
||
in Listing 19-18, but this is a bit longer to write if we don’t need to
|
||
disambiguate.</p>
|
||
<p>Running this code prints the following:</p>
|
||
<pre><code class="language-text">This is your captain speaking.
|
||
Up!
|
||
*waving arms furiously*
|
||
</code></pre>
|
||
<p>Because the <code>fly</code> method takes a <code>self</code> parameter, if we had two <em>types</em> that
|
||
both implement one <em>trait</em>, Rust could figure out which implementation of a
|
||
trait to use based on the type of <code>self</code>.</p>
|
||
<p>However, associated functions that are part of traits don’t have a <code>self</code>
|
||
parameter. When two types in the same scope implement that trait, Rust can’t
|
||
figure out which type you mean unless you use <em>fully qualified syntax</em>. For
|
||
example, the <code>Animal</code> trait in Listing 19-19 has the associated function
|
||
<code>baby_name</code>, the implementation of <code>Animal</code> for the struct <code>Dog</code>, and the
|
||
associated function <code>baby_name</code> defined on <code>Dog</code> directly.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">trait Animal {
|
||
fn baby_name() -> String;
|
||
}
|
||
|
||
struct Dog;
|
||
|
||
impl Dog {
|
||
fn baby_name() -> String {
|
||
String::from("Spot")
|
||
}
|
||
}
|
||
|
||
impl Animal for Dog {
|
||
fn baby_name() -> String {
|
||
String::from("puppy")
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
println!("A baby dog is called a {}", Dog::baby_name());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-19: A trait with an associated function and a
|
||
type with an associated function of the same name that also implements the
|
||
trait</span></p>
|
||
<p>This code is for an animal shelter that wants to name all puppies Spot, which
|
||
is implemented in the <code>baby_name</code> associated function that is defined on <code>Dog</code>.
|
||
The <code>Dog</code> type also implements the trait <code>Animal</code>, which describes
|
||
characteristics that all animals have. Baby dogs are called puppies, and that
|
||
is expressed in the implementation of the <code>Animal</code> trait on <code>Dog</code> in the
|
||
<code>baby_name</code> function associated with the <code>Animal</code> trait.</p>
|
||
<p>In <code>main</code>, we call the <code>Dog::baby_name</code> function, which calls the associated
|
||
function defined on <code>Dog</code> directly. This code prints the following:</p>
|
||
<pre><code class="language-text">A baby dog is called a Spot
|
||
</code></pre>
|
||
<p>This output isn’t what we wanted. We want to call the <code>baby_name</code> function that
|
||
is part of the <code>Animal</code> trait that we implemented on <code>Dog</code> so the code prints
|
||
<code>A baby dog is called a puppy</code>. The technique of specifying the trait name that
|
||
we used in Listing 19-18 doesn’t help here; if we change <code>main</code> to the code in
|
||
Listing 19-20, we’ll get a compilation error.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn main() {
|
||
println!("A baby dog is called a {}", Animal::baby_name());
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-20: Attempting to call the <code>baby_name</code>
|
||
function from the <code>Animal</code> trait, but Rust doesn’t know which implementation to
|
||
use</span></p>
|
||
<p>Because <code>Animal::baby_name</code> is an associated function rather than a method, and
|
||
thus doesn’t have a <code>self</code> parameter, Rust can’t figure out which
|
||
implementation of <code>Animal::baby_name</code> we want. We’ll get this compiler error:</p>
|
||
<pre><code class="language-text">error[E0283]: type annotations required: cannot resolve `_: Animal`
|
||
--> src/main.rs:20:43
|
||
|
|
||
20 | println!("A baby dog is called a {}", Animal::baby_name());
|
||
| ^^^^^^^^^^^^^^^^^
|
||
|
|
||
= note: required by `Animal::baby_name`
|
||
</code></pre>
|
||
<p>To disambiguate and tell Rust that we want to use the implementation of
|
||
<code>Animal</code> for <code>Dog</code>, we need to use fully qualified syntax. Listing 19-21
|
||
demonstrates how to use fully qualified syntax.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust"><span class="boring">trait Animal {
|
||
</span><span class="boring"> fn baby_name() -> String;
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">struct Dog;
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Dog {
|
||
</span><span class="boring"> fn baby_name() -> String {
|
||
</span><span class="boring"> String::from("Spot")
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Animal for Dog {
|
||
</span><span class="boring"> fn baby_name() -> String {
|
||
</span><span class="boring"> String::from("puppy")
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
println!("A baby dog is called a {}", <Dog as Animal>::baby_name());
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-21: Using fully qualified syntax to specify
|
||
that we want to call the <code>baby_name</code> function from the <code>Animal</code> trait as
|
||
implemented on <code>Dog</code></span></p>
|
||
<p>We’re providing Rust with a type annotation within the angle brackets, which
|
||
indicates we want to call the <code>baby_name</code> method from the <code>Animal</code> trait as
|
||
implemented on <code>Dog</code> by saying that we want to treat the <code>Dog</code> type as an
|
||
<code>Animal</code> for this function call. This code will now print what we want:</p>
|
||
<pre><code class="language-text">A baby dog is called a puppy
|
||
</code></pre>
|
||
<p>In general, fully qualified syntax is defined as follows:</p>
|
||
<pre><code class="language-rust ignore"><Type as Trait>::function(receiver_if_method, next_arg, ...);
|
||
</code></pre>
|
||
<p>For associated functions, there would not be a <code>receiver</code>: there would only be
|
||
the list of other arguments. You could use fully qualified syntax everywhere
|
||
that you call functions or methods. However, you’re allowed to omit any part of
|
||
this syntax that Rust can figure out from other information in the program. You
|
||
only need to use this more verbose syntax in cases where there are multiple
|
||
implementations that use the same name and Rust needs help to identify which
|
||
implementation you want to call.</p>
|
||
<h3><a class="header" href="#using-supertraits-to-require-one-traits-functionality-within-another-trait" id="using-supertraits-to-require-one-traits-functionality-within-another-trait">Using Supertraits to Require One Trait’s Functionality Within Another Trait</a></h3>
|
||
<p>Sometimes, you might need one trait to use another trait’s functionality. In
|
||
this case, you need to rely on the dependent trait also being implemented.
|
||
The trait you rely on is a <em>supertrait</em> of the trait you’re implementing.</p>
|
||
<p>For example, let’s say we want to make an <code>OutlinePrint</code> trait with an
|
||
<code>outline_print</code> method that will print a value framed in asterisks. That is,
|
||
given a <code>Point</code> struct that implements <code>Display</code> to result in <code>(x, y)</code>, when we
|
||
call <code>outline_print</code> on a <code>Point</code> instance that has <code>1</code> for <code>x</code> and <code>3</code> for
|
||
<code>y</code>, it should print the following:</p>
|
||
<pre><code class="language-text">**********
|
||
* *
|
||
* (1, 3) *
|
||
* *
|
||
**********
|
||
</code></pre>
|
||
<p>In the implementation of <code>outline_print</code>, we want to use the <code>Display</code> trait’s
|
||
functionality. Therefore, we need to specify that the <code>OutlinePrint</code> trait will
|
||
work only for types that also implement <code>Display</code> and provide the functionality
|
||
that <code>OutlinePrint</code> needs. We can do that in the trait definition by specifying
|
||
<code>OutlinePrint: Display</code>. This technique is similar to adding a trait bound to
|
||
the trait. Listing 19-22 shows an implementation of the <code>OutlinePrint</code> trait.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::fmt;
|
||
|
||
trait OutlinePrint: fmt::Display {
|
||
fn outline_print(&self) {
|
||
let output = self.to_string();
|
||
let len = output.len();
|
||
println!("{}", "*".repeat(len + 4));
|
||
println!("*{}*", " ".repeat(len + 2));
|
||
println!("* {} *", output);
|
||
println!("*{}*", " ".repeat(len + 2));
|
||
println!("{}", "*".repeat(len + 4));
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-22: Implementing the <code>OutlinePrint</code> trait that
|
||
requires the functionality from <code>Display</code></span></p>
|
||
<p>Because we’ve specified that <code>OutlinePrint</code> requires the <code>Display</code> trait, we
|
||
can use the <code>to_string</code> function that is automatically implemented for any type
|
||
that implements <code>Display</code>. If we tried to use <code>to_string</code> without adding a
|
||
colon and specifying the <code>Display</code> trait after the trait name, we’d get an
|
||
error saying that no method named <code>to_string</code> was found for the type <code>&Self</code> in
|
||
the current scope.</p>
|
||
<p>Let’s see what happens when we try to implement <code>OutlinePrint</code> on a type that
|
||
doesn’t implement <code>Display</code>, such as the <code>Point</code> struct:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">trait OutlinePrint {}
|
||
</span>struct Point {
|
||
x: i32,
|
||
y: i32,
|
||
}
|
||
|
||
impl OutlinePrint for Point {}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We get an error saying that <code>Display</code> is required but not implemented:</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `Point: std::fmt::Display` is not satisfied
|
||
--> src/main.rs:20:6
|
||
|
|
||
20 | impl OutlinePrint for Point {}
|
||
| ^^^^^^^^^^^^ `Point` cannot be formatted with the default formatter;
|
||
try using `:?` instead if you are using a format string
|
||
|
|
||
= help: the trait `std::fmt::Display` is not implemented for `Point`
|
||
</code></pre>
|
||
<p>To fix this, we implement <code>Display</code> on <code>Point</code> and satisfy the constraint that
|
||
<code>OutlinePrint</code> requires, like so:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Point {
|
||
</span><span class="boring"> x: i32,
|
||
</span><span class="boring"> y: i32,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>use std::fmt;
|
||
|
||
impl fmt::Display for Point {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
write!(f, "({}, {})", self.x, self.y)
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Then implementing the <code>OutlinePrint</code> trait on <code>Point</code> will compile
|
||
successfully, and we can call <code>outline_print</code> on a <code>Point</code> instance to display
|
||
it within an outline of asterisks.</p>
|
||
<h3><a class="header" href="#using-the-newtype-pattern-to-implement-external-traits-on-external-types" id="using-the-newtype-pattern-to-implement-external-traits-on-external-types">Using the Newtype Pattern to Implement External Traits on External Types</a></h3>
|
||
<p>In Chapter 10 in the <a href="ch10-02-traits.html#implementing-a-trait-on-a-type">“Implementing a Trait on a
|
||
Type”</a><!-- ignore --> section, we mentioned
|
||
the orphan rule that states we’re allowed to implement a trait on a type as
|
||
long as either the trait or the type are local to our crate. It’s possible to
|
||
get around this restriction using the <em>newtype pattern</em>, which involves
|
||
creating a new type in a tuple struct. (We covered tuple structs in the
|
||
<a href="ch05-01-defining-structs.html#using-tuple-structs-without-named-fields-to-create-different-types">“Using Tuple Structs without Named Fields to Create Different
|
||
Types”</a><!-- ignore --> section of Chapter 5.) The tuple struct
|
||
will have one field and be a thin wrapper around the type we want to implement
|
||
a trait for. Then the wrapper type is local to our crate, and we can implement
|
||
the trait on the wrapper. <em>Newtype</em> is a term that originates from the Haskell
|
||
programming language. There is no runtime performance penalty for using this
|
||
pattern, and the wrapper type is elided at compile time.</p>
|
||
<p>As an example, let’s say we want to implement <code>Display</code> on <code>Vec<T></code>, which the
|
||
orphan rule prevents us from doing directly because the <code>Display</code> trait and the
|
||
<code>Vec<T></code> type are defined outside our crate. We can make a <code>Wrapper</code> struct
|
||
that holds an instance of <code>Vec<T></code>; then we can implement <code>Display</code> on
|
||
<code>Wrapper</code> and use the <code>Vec<T></code> value, as shown in Listing 19-23.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">use std::fmt;
|
||
|
||
struct Wrapper(Vec<String>);
|
||
|
||
impl fmt::Display for Wrapper {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
write!(f, "[{}]", self.0.join(", "))
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
let w = Wrapper(vec![String::from("hello"), String::from("world")]);
|
||
println!("w = {}", w);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-23: Creating a <code>Wrapper</code> type around
|
||
<code>Vec<String></code> to implement <code>Display</code></span></p>
|
||
<p>The implementation of <code>Display</code> uses <code>self.0</code> to access the inner <code>Vec<T></code>,
|
||
because <code>Wrapper</code> is a tuple struct and <code>Vec<T></code> is the item at index 0 in the
|
||
tuple. Then we can use the functionality of the <code>Display</code> type on <code>Wrapper</code>.</p>
|
||
<p>The downside of using this technique is that <code>Wrapper</code> is a new type, so it
|
||
doesn’t have the methods of the value it’s holding. We would have to implement
|
||
all the methods of <code>Vec<T></code> directly on <code>Wrapper</code> such that the methods
|
||
delegate to <code>self.0</code>, which would allow us to treat <code>Wrapper</code> exactly like a
|
||
<code>Vec<T></code>. If we wanted the new type to have every method the inner type has,
|
||
implementing the <code>Deref</code> trait (discussed in Chapter 15 in the <a href="ch15-02-deref.html#treating-smart-pointers-like-regular-references-with-the-deref-trait">“Treating Smart
|
||
Pointers Like Regular References with the <code>Deref</code>
|
||
Trait”</a><!-- ignore --> section) on the <code>Wrapper</code> to return
|
||
the inner type would be a solution. If we don’t want the <code>Wrapper</code> type to have
|
||
all the methods of the inner type—for example, to restrict the <code>Wrapper</code> type’s
|
||
behavior—we would have to implement just the methods we do want manually.</p>
|
||
<p>Now you know how the newtype pattern is used in relation to traits; it’s also a
|
||
useful pattern even when traits are not involved. Let’s switch focus and look
|
||
at some advanced ways to interact with Rust’s type system.</p>
|
||
<h2><a class="header" href="#advanced-types" id="advanced-types">Advanced Types</a></h2>
|
||
<p>The Rust type system has some features that we’ve mentioned in this book but
|
||
haven’t yet discussed. We’ll start by discussing newtypes in general as we
|
||
examine why newtypes are useful as types. Then we’ll move on to type aliases, a
|
||
feature similar to newtypes but with slightly different semantics. We’ll also
|
||
discuss the <code>!</code> type and dynamically sized types.</p>
|
||
<blockquote>
|
||
<p>Note: The next section assumes you’ve read the earlier section <a href="ch19-03-advanced-traits.html#using-the-newtype-pattern-to-implement-external-traits-on-external-types">“Using the
|
||
Newtype Pattern to Implement External Traits on External
|
||
Types.”</a><!-- ignore --></p>
|
||
</blockquote>
|
||
<h3><a class="header" href="#using-the-newtype-pattern-for-type-safety-and-abstraction" id="using-the-newtype-pattern-for-type-safety-and-abstraction">Using the Newtype Pattern for Type Safety and Abstraction</a></h3>
|
||
<p>The newtype pattern is useful for tasks beyond those we’ve discussed so far,
|
||
including statically enforcing that values are never confused and indicating
|
||
the units of a value. You saw an example of using newtypes to indicate units in
|
||
Listing 19-15: recall that the <code>Millimeters</code> and <code>Meters</code> structs wrapped <code>u32</code>
|
||
values in a newtype. If we wrote a function with a parameter of type
|
||
<code>Millimeters</code>, we couldn’t compile a program that accidentally tried to call
|
||
that function with a value of type <code>Meters</code> or a plain <code>u32</code>.</p>
|
||
<p>Another use of the newtype pattern is in abstracting away some implementation
|
||
details of a type: the new type can expose a public API that is different from
|
||
the API of the private inner type if we used the new type directly to restrict
|
||
the available functionality, for example.</p>
|
||
<p>Newtypes can also hide internal implementation. For example, we could provide a
|
||
<code>People</code> type to wrap a <code>HashMap<i32, String></code> that stores a person’s ID
|
||
associated with their name. Code using <code>People</code> would only interact with the
|
||
public API we provide, such as a method to add a name string to the <code>People</code>
|
||
collection; that code wouldn’t need to know that we assign an <code>i32</code> ID to names
|
||
internally. The newtype pattern is a lightweight way to achieve encapsulation
|
||
to hide implementation details, which we discussed in the <a href="ch17-01-what-is-oo.html#encapsulation-that-hides-implementation-details">“Encapsulation that
|
||
Hides Implementation
|
||
Details”</a><!-- ignore -->
|
||
section of Chapter 17.</p>
|
||
<h3><a class="header" href="#creating-type-synonyms-with-type-aliases" id="creating-type-synonyms-with-type-aliases">Creating Type Synonyms with Type Aliases</a></h3>
|
||
<p>Along with the newtype pattern, Rust provides the ability to declare a <em>type
|
||
alias</em> to give an existing type another name. For this we use the <code>type</code>
|
||
keyword. For example, we can create the alias <code>Kilometers</code> to <code>i32</code> like so:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>type Kilometers = i32;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Now, the alias <code>Kilometers</code> is a <em>synonym</em> for <code>i32</code>; unlike the <code>Millimeters</code>
|
||
and <code>Meters</code> types we created in Listing 19-15, <code>Kilometers</code> is not a separate,
|
||
new type. Values that have the type <code>Kilometers</code> will be treated the same as
|
||
values of type <code>i32</code>:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>type Kilometers = i32;
|
||
|
||
let x: i32 = 5;
|
||
let y: Kilometers = 5;
|
||
|
||
println!("x + y = {}", x + y);
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Because <code>Kilometers</code> and <code>i32</code> are the same type, we can add values of both
|
||
types and we can pass <code>Kilometers</code> values to functions that take <code>i32</code>
|
||
parameters. However, using this method, we don’t get the type checking benefits
|
||
that we get from the newtype pattern discussed earlier.</p>
|
||
<p>The main use case for type synonyms is to reduce repetition. For example, we
|
||
might have a lengthy type like this:</p>
|
||
<pre><code class="language-rust ignore">Box<dyn Fn() + Send + 'static>
|
||
</code></pre>
|
||
<p>Writing this lengthy type in function signatures and as type annotations all
|
||
over the code can be tiresome and error prone. Imagine having a project full of
|
||
code like that in Listing 19-24.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let f: Box<dyn Fn() + Send + 'static> = Box::new(|| println!("hi"));
|
||
|
||
fn takes_long_type(f: Box<dyn Fn() + Send + 'static>) {
|
||
// --snip--
|
||
}
|
||
|
||
fn returns_long_type() -> Box<dyn Fn() + Send + 'static> {
|
||
// --snip--
|
||
<span class="boring"> Box::new(|| ())
|
||
</span>}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-24: Using a long type in many places</span></p>
|
||
<p>A type alias makes this code more manageable by reducing the repetition. In
|
||
Listing 19-25, we’ve introduced an alias named <code>Thunk</code> for the verbose type and
|
||
can replace all uses of the type with the shorter alias <code>Thunk</code>.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>type Thunk = Box<dyn Fn() + Send + 'static>;
|
||
|
||
let f: Thunk = Box::new(|| println!("hi"));
|
||
|
||
fn takes_long_type(f: Thunk) {
|
||
// --snip--
|
||
}
|
||
|
||
fn returns_long_type() -> Thunk {
|
||
// --snip--
|
||
<span class="boring"> Box::new(|| ())
|
||
</span>}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-25: Introducing a type alias <code>Thunk</code> to reduce
|
||
repetition</span></p>
|
||
<p>This code is much easier to read and write! Choosing a meaningful name for a
|
||
type alias can help communicate your intent as well (<em>thunk</em> is a word for code
|
||
to be evaluated at a later time, so it’s an appropriate name for a closure that
|
||
gets stored).</p>
|
||
<p>Type aliases are also commonly used with the <code>Result<T, E></code> type for reducing
|
||
repetition. Consider the <code>std::io</code> module in the standard library. I/O
|
||
operations often return a <code>Result<T, E></code> to handle situations when operations
|
||
fail to work. This library has a <code>std::io::Error</code> struct that represents all
|
||
possible I/O errors. Many of the functions in <code>std::io</code> will be returning
|
||
<code>Result<T, E></code> where the <code>E</code> is <code>std::io::Error</code>, such as these functions in
|
||
the <code>Write</code> trait:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::io::Error;
|
||
use std::fmt;
|
||
|
||
pub trait Write {
|
||
fn write(&mut self, buf: &[u8]) -> Result<usize, Error>;
|
||
fn flush(&mut self) -> Result<(), Error>;
|
||
|
||
fn write_all(&mut self, buf: &[u8]) -> Result<(), Error>;
|
||
fn write_fmt(&mut self, fmt: fmt::Arguments) -> Result<(), Error>;
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>The <code>Result<..., Error></code> is repeated a lot. As such, <code>std::io</code> has this type of
|
||
alias declaration:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>type Result<T> = std::result::Result<T, std::io::Error>;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Because this declaration is in the <code>std::io</code> module, we can use the fully
|
||
qualified alias <code>std::io::Result<T></code>—that is, a <code>Result<T, E></code> with the <code>E</code>
|
||
filled in as <code>std::io::Error</code>. The <code>Write</code> trait function signatures end up
|
||
looking like this:</p>
|
||
<pre><code class="language-rust ignore">pub trait Write {
|
||
fn write(&mut self, buf: &[u8]) -> Result<usize>;
|
||
fn flush(&mut self) -> Result<()>;
|
||
|
||
fn write_all(&mut self, buf: &[u8]) -> Result<()>;
|
||
fn write_fmt(&mut self, fmt: Arguments) -> Result<()>;
|
||
}
|
||
</code></pre>
|
||
<p>The type alias helps in two ways: it makes code easier to write <em>and</em> it gives
|
||
us a consistent interface across all of <code>std::io</code>. Because it’s an alias, it’s
|
||
just another <code>Result<T, E></code>, which means we can use any methods that work on
|
||
<code>Result<T, E></code> with it, as well as special syntax like the <code>?</code> operator.</p>
|
||
<h3><a class="header" href="#the-never-type-that-never-returns" id="the-never-type-that-never-returns">The Never Type that Never Returns</a></h3>
|
||
<p>Rust has a special type named <code>!</code> that’s known in type theory lingo as the
|
||
<em>empty type</em> because it has no values. We prefer to call it the <em>never type</em>
|
||
because it stands in the place of the return type when a function will never
|
||
return. Here is an example:</p>
|
||
<pre><code class="language-rust ignore">fn bar() -> ! {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>This code is read as “the function <code>bar</code> returns never.” Functions that return
|
||
never are called <em>diverging functions</em>. We can’t create values of the type <code>!</code>
|
||
so <code>bar</code> can never possibly return.</p>
|
||
<p>But what use is a type you can never create values for? Recall the code from
|
||
Listing 2-5; we’ve reproduced part of it here in Listing 19-26.</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">let guess = "3";
|
||
</span><span class="boring">loop {
|
||
</span>let guess: u32 = match guess.trim().parse() {
|
||
Ok(num) => num,
|
||
Err(_) => continue,
|
||
};
|
||
<span class="boring">break;
|
||
</span><span class="boring">}
|
||
</span><span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-26: A <code>match</code> with an arm that ends in
|
||
<code>continue</code></span></p>
|
||
<p>At the time, we skipped over some details in this code. In Chapter 6 in <a href="ch06-02-match.html#the-match-control-flow-operator">“The
|
||
<code>match</code> Control Flow Operator”</a><!-- ignore
|
||
--> section, we discussed that <code>match</code> arms must all return the same type. So,
|
||
for example, the following code doesn’t work:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let guess = match guess.trim().parse() {
|
||
Ok(_) => 5,
|
||
Err(_) => "hello",
|
||
}
|
||
</code></pre>
|
||
<p>The type of <code>guess</code> in this code would have to be an integer <em>and</em> a string,
|
||
and Rust requires that <code>guess</code> have only one type. So what does <code>continue</code>
|
||
return? How were we allowed to return a <code>u32</code> from one arm and have another arm
|
||
that ends with <code>continue</code> in Listing 19-26?</p>
|
||
<p>As you might have guessed, <code>continue</code> has a <code>!</code> value. That is, when Rust
|
||
computes the type of <code>guess</code>, it looks at both match arms, the former with a
|
||
value of <code>u32</code> and the latter with a <code>!</code> value. Because <code>!</code> can never have a
|
||
value, Rust decides that the type of <code>guess</code> is <code>u32</code>.</p>
|
||
<p>The formal way of describing this behavior is that expressions of type <code>!</code> can
|
||
be coerced into any other type. We’re allowed to end this <code>match</code> arm with
|
||
<code>continue</code> because <code>continue</code> doesn’t return a value; instead, it moves control
|
||
back to the top of the loop, so in the <code>Err</code> case, we never assign a value to
|
||
<code>guess</code>.</p>
|
||
<p>The never type is useful with the <code>panic!</code> macro as well. Remember the <code>unwrap</code>
|
||
function that we call on <code>Option<T></code> values to produce a value or panic? Here
|
||
is its definition:</p>
|
||
<pre><code class="language-rust ignore">impl<T> Option<T> {
|
||
pub fn unwrap(self) -> T {
|
||
match self {
|
||
Some(val) => val,
|
||
None => panic!("called `Option::unwrap()` on a `None` value"),
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>In this code, the same thing happens as in the <code>match</code> in Listing 19-26: Rust
|
||
sees that <code>val</code> has the type <code>T</code> and <code>panic!</code> has the type <code>!</code>, so the result
|
||
of the overall <code>match</code> expression is <code>T</code>. This code works because <code>panic!</code>
|
||
doesn’t produce a value; it ends the program. In the <code>None</code> case, we won’t be
|
||
returning a value from <code>unwrap</code>, so this code is valid.</p>
|
||
<p>One final expression that has the type <code>!</code> is a <code>loop</code>:</p>
|
||
<pre><code class="language-rust ignore">print!("forever ");
|
||
|
||
loop {
|
||
print!("and ever ");
|
||
}
|
||
</code></pre>
|
||
<p>Here, the loop never ends, so <code>!</code> is the value of the expression. However, this
|
||
wouldn’t be true if we included a <code>break</code>, because the loop would terminate
|
||
when it got to the <code>break</code>.</p>
|
||
<h3><a class="header" href="#dynamically-sized-types-and-the-sized-trait" id="dynamically-sized-types-and-the-sized-trait">Dynamically Sized Types and the <code>Sized</code> Trait</a></h3>
|
||
<p>Due to Rust’s need to know certain details, such as how much space to allocate
|
||
for a value of a particular type, there is a corner of its type system that can
|
||
be confusing: the concept of <em>dynamically sized types</em>. Sometimes referred to
|
||
as <em>DSTs</em> or <em>unsized types</em>, these types let us write code using values whose
|
||
size we can know only at runtime.</p>
|
||
<p>Let’s dig into the details of a dynamically sized type called <code>str</code>, which
|
||
we’ve been using throughout the book. That’s right, not <code>&str</code>, but <code>str</code> on
|
||
its own, is a DST. We can’t know how long the string is until runtime, meaning
|
||
we can’t create a variable of type <code>str</code>, nor can we take an argument of type
|
||
<code>str</code>. Consider the following code, which does not work:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">let s1: str = "Hello there!";
|
||
let s2: str = "How's it going?";
|
||
</code></pre>
|
||
<p>Rust needs to know how much memory to allocate for any value of a particular
|
||
type, and all values of a type must use the same amount of memory. If Rust
|
||
allowed us to write this code, these two <code>str</code> values would need to take up the
|
||
same amount of space. But they have different lengths: <code>s1</code> needs 12 bytes of
|
||
storage and <code>s2</code> needs 15. This is why it’s not possible to create a variable
|
||
holding a dynamically sized type.</p>
|
||
<p>So what do we do? In this case, you already know the answer: we make the types
|
||
of <code>s1</code> and <code>s2</code> a <code>&str</code> rather than a <code>str</code>. Recall that in the <a href="ch04-03-slices.html#string-slices">“String
|
||
Slices”</a><!-- ignore --> section of Chapter 4, we said the slice
|
||
data structure stores the starting position and the length of the slice.</p>
|
||
<p>So although a <code>&T</code> is a single value that stores the memory address of where
|
||
the <code>T</code> is located, a <code>&str</code> is <em>two</em> values: the address of the <code>str</code> and its
|
||
length. As such, we can know the size of a <code>&str</code> value at compile time: it’s
|
||
twice the length of a <code>usize</code>. That is, we always know the size of a <code>&str</code>, no
|
||
matter how long the string it refers to is. In general, this is the way in
|
||
which dynamically sized types are used in Rust: they have an extra bit of
|
||
metadata that stores the size of the dynamic information. The golden rule of
|
||
dynamically sized types is that we must always put values of dynamically sized
|
||
types behind a pointer of some kind.</p>
|
||
<p>We can combine <code>str</code> with all kinds of pointers: for example, <code>Box<str></code> or
|
||
<code>Rc<str></code>. In fact, you’ve seen this before but with a different dynamically
|
||
sized type: traits. Every trait is a dynamically sized type we can refer to by
|
||
using the name of the trait. In Chapter 17 in the <a href="ch17-02-trait-objects.html#using-trait-objects-that-allow-for-values-of-different-types">“Using Trait Objects That
|
||
Allow for Values of Different
|
||
Types”</a><!--
|
||
ignore --> section, we mentioned that to use traits as trait objects, we must
|
||
put them behind a pointer, such as <code>&dyn Trait</code> or <code>Box<dyn Trait></code> (<code>Rc<dyn Trait></code> would work too).</p>
|
||
<p>To work with DSTs, Rust has a particular trait called the <code>Sized</code> trait to
|
||
determine whether or not a type’s size is known at compile time. This trait is
|
||
automatically implemented for everything whose size is known at compile time.
|
||
In addition, Rust implicitly adds a bound on <code>Sized</code> to every generic function.
|
||
That is, a generic function definition like this:</p>
|
||
<pre><code class="language-rust ignore">fn generic<T>(t: T) {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>is actually treated as though we had written this:</p>
|
||
<pre><code class="language-rust ignore">fn generic<T: Sized>(t: T) {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>By default, generic functions will work only on types that have a known size at
|
||
compile time. However, you can use the following special syntax to relax this
|
||
restriction:</p>
|
||
<pre><code class="language-rust ignore">fn generic<T: ?Sized>(t: &T) {
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p>A trait bound on <code>?Sized</code> is the opposite of a trait bound on <code>Sized</code>: we would
|
||
read this as “<code>T</code> may or may not be <code>Sized</code>.” This syntax is only available for
|
||
<code>Sized</code>, not any other traits.</p>
|
||
<p>Also note that we switched the type of the <code>t</code> parameter from <code>T</code> to <code>&T</code>.
|
||
Because the type might not be <code>Sized</code>, we need to use it behind some kind of
|
||
pointer. In this case, we’ve chosen a reference.</p>
|
||
<p>Next, we’ll talk about functions and closures!</p>
|
||
<h2><a class="header" href="#advanced-functions-and-closures" id="advanced-functions-and-closures">Advanced Functions and Closures</a></h2>
|
||
<p>Finally, we’ll explore some advanced features related to functions and
|
||
closures, which include function pointers and returning closures.</p>
|
||
<h3><a class="header" href="#function-pointers" id="function-pointers">Function Pointers</a></h3>
|
||
<p>We’ve talked about how to pass closures to functions; you can also pass regular
|
||
functions to functions! This technique is useful when you want to pass a
|
||
function you’ve already defined rather than defining a new closure. Doing this
|
||
with function pointers will allow you to use functions as arguments to other
|
||
functions. Functions coerce to the type <code>fn</code> (with a lowercase f), not to be
|
||
confused with the <code>Fn</code> closure trait. The <code>fn</code> type is called a <em>function
|
||
pointer</em>. The syntax for specifying that a parameter is a function pointer is
|
||
similar to that of closures, as shown in Listing 19-27.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn add_one(x: i32) -> i32 {
|
||
x + 1
|
||
}
|
||
|
||
fn do_twice(f: fn(i32) -> i32, arg: i32) -> i32 {
|
||
f(arg) + f(arg)
|
||
}
|
||
|
||
fn main() {
|
||
let answer = do_twice(add_one, 5);
|
||
|
||
println!("The answer is: {}", answer);
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 19-27: Using the <code>fn</code> type to accept a function
|
||
pointer as an argument</span></p>
|
||
<p>This code prints <code>The answer is: 12</code>. We specify that the parameter <code>f</code> in
|
||
<code>do_twice</code> is an <code>fn</code> that takes one parameter of type <code>i32</code> and returns an
|
||
<code>i32</code>. We can then call <code>f</code> in the body of <code>do_twice</code>. In <code>main</code>, we can pass
|
||
the function name <code>add_one</code> as the first argument to <code>do_twice</code>.</p>
|
||
<p>Unlike closures, <code>fn</code> is a type rather than a trait, so we specify <code>fn</code> as the
|
||
parameter type directly rather than declaring a generic type parameter with one
|
||
of the <code>Fn</code> traits as a trait bound.</p>
|
||
<p>Function pointers implement all three of the closure traits (<code>Fn</code>, <code>FnMut</code>, and
|
||
<code>FnOnce</code>), so you can always pass a function pointer as an argument for a
|
||
function that expects a closure. It’s best to write functions using a generic
|
||
type and one of the closure traits so your functions can accept either
|
||
functions or closures.</p>
|
||
<p>An example of where you would want to only accept <code>fn</code> and not closures is when
|
||
interfacing with external code that doesn’t have closures: C functions can
|
||
accept functions as arguments, but C doesn’t have closures.</p>
|
||
<p>As an example of where you could use either a closure defined inline or a named
|
||
function, let’s look at a use of <code>map</code>. To use the <code>map</code> function to turn a
|
||
vector of numbers into a vector of strings, we could use a closure, like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let list_of_numbers = vec![1, 2, 3];
|
||
let list_of_strings: Vec<String> = list_of_numbers
|
||
.iter()
|
||
.map(|i| i.to_string())
|
||
.collect();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Or we could name a function as the argument to <code>map</code> instead of the closure,
|
||
like this:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let list_of_numbers = vec![1, 2, 3];
|
||
let list_of_strings: Vec<String> = list_of_numbers
|
||
.iter()
|
||
.map(ToString::to_string)
|
||
.collect();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Note that we must use the fully qualified syntax that we talked about earlier
|
||
in the <a href="ch19-03-advanced-traits.html#advanced-traits">“Advanced Traits”</a><!-- ignore --> section because
|
||
there are multiple functions available named <code>to_string</code>. Here, we’re using the
|
||
<code>to_string</code> function defined in the <code>ToString</code> trait, which the standard
|
||
library has implemented for any type that implements <code>Display</code>.</p>
|
||
<p>We have another useful pattern that exploits an implementation detail of tuple
|
||
structs and tuple-struct enum variants. These types use <code>()</code> as initializer
|
||
syntax, which looks like a function call. The initializers are actually
|
||
implemented as functions returning an instance that’s constructed from their
|
||
arguments. We can use these initializer functions as function pointers that
|
||
implement the closure traits, which means we can specify the initializer
|
||
functions as arguments for methods that take closures, like so:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>enum Status {
|
||
Value(u32),
|
||
Stop,
|
||
}
|
||
|
||
let list_of_statuses: Vec<Status> =
|
||
(0u32..20)
|
||
.map(Status::Value)
|
||
.collect();
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Here we create <code>Status::Value</code> instances using each <code>u32</code> value in the range
|
||
that <code>map</code> is called on by using the initializer function of <code>Status::Value</code>.
|
||
Some people prefer this style, and some people prefer to use closures. They
|
||
compile to the same code, so use whichever style is clearer to you.</p>
|
||
<h3><a class="header" href="#returning-closures" id="returning-closures">Returning Closures</a></h3>
|
||
<p>Closures are represented by traits, which means you can’t return closures
|
||
directly. In most cases where you might want to return a trait, you can instead
|
||
use the concrete type that implements the trait as the return value of the
|
||
function. But you can’t do that with closures because they don’t have a
|
||
concrete type that is returnable; you’re not allowed to use the function
|
||
pointer <code>fn</code> as a return type, for example.</p>
|
||
<p>The following code tries to return a closure directly, but it won’t compile:</p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn returns_closure() -> Fn(i32) -> i32 {
|
||
|x| x + 1
|
||
}
|
||
</code></pre>
|
||
<p>The compiler error is as follows:</p>
|
||
<pre><code class="language-text">error[E0277]: the trait bound `std::ops::Fn(i32) -> i32 + 'static:
|
||
std::marker::Sized` is not satisfied
|
||
-->
|
||
|
|
||
1 | fn returns_closure() -> Fn(i32) -> i32 {
|
||
| ^^^^^^^^^^^^^^ `std::ops::Fn(i32) -> i32 + 'static`
|
||
does not have a constant size known at compile-time
|
||
|
|
||
= help: the trait `std::marker::Sized` is not implemented for
|
||
`std::ops::Fn(i32) -> i32 + 'static`
|
||
= note: the return type of a function must have a statically known size
|
||
</code></pre>
|
||
<p>The error references the <code>Sized</code> trait again! Rust doesn’t know how much space
|
||
it will need to store the closure. We saw a solution to this problem earlier.
|
||
We can use a trait object:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>fn returns_closure() -> Box<dyn Fn(i32) -> i32> {
|
||
Box::new(|x| x + 1)
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This code will compile just fine. For more about trait objects, refer to the
|
||
section <a href="ch17-02-trait-objects.html#using-trait-objects-that-allow-for-values-of-different-types">“Using Trait Objects That Allow for Values of Different
|
||
Types”</a><!--
|
||
ignore --> in Chapter 17.</p>
|
||
<p>Next, let’s look at macros!</p>
|
||
<h2><a class="header" href="#macros" id="macros">Macros</a></h2>
|
||
<p>We’ve used macros like <code>println!</code> throughout this book, but we haven’t fully
|
||
explored what a macro is and how it works. The term <em>macro</em> refers to a family
|
||
of features in Rust: <em>declarative</em> macros with <code>macro_rules!</code> and three kinds
|
||
of <em>procedural</em> macros:</p>
|
||
<ul>
|
||
<li>Custom <code>#[derive]</code> macros that specify code added with the <code>derive</code> attribute
|
||
used on structs and enums</li>
|
||
<li>Attribute-like macros that define custom attributes usable on any item</li>
|
||
<li>Function-like macros that look like function calls but operate on the tokens
|
||
specified as their argument</li>
|
||
</ul>
|
||
<p>We’ll talk about each of these in turn, but first, let’s look at why we even
|
||
need macros when we already have functions.</p>
|
||
<h3><a class="header" href="#the-difference-between-macros-and-functions" id="the-difference-between-macros-and-functions">The Difference Between Macros and Functions</a></h3>
|
||
<p>Fundamentally, macros are a way of writing code that writes other code, which
|
||
is known as <em>metaprogramming</em>. In Appendix C, we discuss the <code>derive</code>
|
||
attribute, which generates an implementation of various traits for you. We’ve
|
||
also used the <code>println!</code> and <code>vec!</code> macros throughout the book. All of these
|
||
macros <em>expand</em> to produce more code than the code you’ve written manually.</p>
|
||
<p>Metaprogramming is useful for reducing the amount of code you have to write and
|
||
maintain, which is also one of the roles of functions. However, macros have
|
||
some additional powers that functions don’t.</p>
|
||
<p>A function signature must declare the number and type of parameters the
|
||
function has. Macros, on the other hand, can take a variable number of
|
||
parameters: we can call <code>println!("hello")</code> with one argument or
|
||
<code>println!("hello {}", name)</code> with two arguments. Also, macros are expanded
|
||
before the compiler interprets the meaning of the code, so a macro can, for
|
||
example, implement a trait on a given type. A function can’t, because it gets
|
||
called at runtime and a trait needs to be implemented at compile time.</p>
|
||
<p>The downside to implementing a macro instead of a function is that macro
|
||
definitions are more complex than function definitions because you’re writing
|
||
Rust code that writes Rust code. Due to this indirection, macro definitions are
|
||
generally more difficult to read, understand, and maintain than function
|
||
definitions.</p>
|
||
<p>Another important difference between macros and functions is that you must
|
||
define macros or bring them into scope <em>before</em> you call them in a file, as
|
||
opposed to functions you can define anywhere and call anywhere.</p>
|
||
<h3><a class="header" href="#declarative-macros-with-macro_rules-for-general-metaprogramming" id="declarative-macros-with-macro_rules-for-general-metaprogramming">Declarative Macros with <code>macro_rules!</code> for General Metaprogramming</a></h3>
|
||
<p>The most widely used form of macros in Rust is <em>declarative macros</em>. These are
|
||
also sometimes referred to as “macros by example,” “<code>macro_rules!</code> macros,” or
|
||
just plain “macros.” At their core, declarative macros allow you to write
|
||
something similar to a Rust <code>match</code> expression. As discussed in Chapter 6,
|
||
<code>match</code> expressions are control structures that take an expression, compare the
|
||
resulting value of the expression to patterns, and then run the code associated
|
||
with the matching pattern. Macros also compare a value to patterns that are
|
||
associated with particular code: in this situation, the value is the literal
|
||
Rust source code passed to the macro; the patterns are compared with the
|
||
structure of that source code; and the code associated with each pattern, when
|
||
matched, replaces the code passed to the macro. This all happens during
|
||
compilation.</p>
|
||
<p>To define a macro, you use the <code>macro_rules!</code> construct. Let’s explore how to
|
||
use <code>macro_rules!</code> by looking at how the <code>vec!</code> macro is defined. Chapter 8
|
||
covered how we can use the <code>vec!</code> macro to create a new vector with particular
|
||
values. For example, the following macro creates a new vector containing three
|
||
integers:</p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>let v: Vec<u32> = vec![1, 2, 3];
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We could also use the <code>vec!</code> macro to make a vector of two integers or a vector
|
||
of five string slices. We wouldn’t be able to use a function to do the same
|
||
because we wouldn’t know the number or type of values up front.</p>
|
||
<p>Listing 19-28 shows a slightly simplified definition of the <code>vec!</code> macro.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>#[macro_export]
|
||
macro_rules! vec {
|
||
( $( $x:expr ),* ) => {
|
||
{
|
||
let mut temp_vec = Vec::new();
|
||
$(
|
||
temp_vec.push($x);
|
||
)*
|
||
temp_vec
|
||
}
|
||
};
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 19-28: A simplified version of the <code>vec!</code> macro
|
||
definition</span></p>
|
||
<blockquote>
|
||
<p>Note: The actual definition of the <code>vec!</code> macro in the standard library
|
||
includes code to preallocate the correct amount of memory up front. That code
|
||
is an optimization that we don’t include here to make the example simpler.</p>
|
||
</blockquote>
|
||
<p>The <code>#[macro_export]</code> annotation indicates that this macro should be made
|
||
available whenever the crate in which the macro is defined is brought into
|
||
scope. Without this annotation, the macro can’t be brought into scope.</p>
|
||
<p>We then start the macro definition with <code>macro_rules!</code> and the name of the
|
||
macro we’re defining <em>without</em> the exclamation mark. The name, in this case
|
||
<code>vec</code>, is followed by curly brackets denoting the body of the macro definition.</p>
|
||
<p>The structure in the <code>vec!</code> body is similar to the structure of a <code>match</code>
|
||
expression. Here we have one arm with the pattern <code>( $( $x:expr ),* )</code>,
|
||
followed by <code>=></code> and the block of code associated with this pattern. If the
|
||
pattern matches, the associated block of code will be emitted. Given that this
|
||
is the only pattern in this macro, there is only one valid way to match; any
|
||
other pattern will result in an error. More complex macros will have more than
|
||
one arm.</p>
|
||
<p>Valid pattern syntax in macro definitions is different than the pattern syntax
|
||
covered in Chapter 18 because macro patterns are matched against Rust code
|
||
structure rather than values. Let’s walk through what the pattern pieces in
|
||
Listing 19-28 mean; for the full macro pattern syntax, see <a href="../reference/macros.html">the reference</a>.</p>
|
||
<p>First, a set of parentheses encompasses the whole pattern. A dollar sign (<code>$</code>)
|
||
is next, followed by a set of parentheses that captures values that match the
|
||
pattern within the parentheses for use in the replacement code. Within <code>$()</code> is
|
||
<code>$x:expr</code>, which matches any Rust expression and gives the expression the name
|
||
<code>$x</code>.</p>
|
||
<p>The comma following <code>$()</code> indicates that a literal comma separator character
|
||
could optionally appear after the code that matches the code in <code>$()</code>. The <code>*</code>
|
||
specifies that the pattern matches zero or more of whatever precedes the <code>*</code>.</p>
|
||
<p>When we call this macro with <code>vec![1, 2, 3];</code>, the <code>$x</code> pattern matches three
|
||
times with the three expressions <code>1</code>, <code>2</code>, and <code>3</code>.</p>
|
||
<p>Now let’s look at the pattern in the body of the code associated with this arm:
|
||
<code>temp_vec.push()</code> within <code>$()*</code> is generated for each part that matches <code>$()</code>
|
||
in the pattern zero or more times depending on how many times the pattern
|
||
matches. The <code>$x</code> is replaced with each expression matched. When we call this
|
||
macro with <code>vec![1, 2, 3];</code>, the code generated that replaces this macro call
|
||
will be the following:</p>
|
||
<pre><code class="language-rust ignore">let mut temp_vec = Vec::new();
|
||
temp_vec.push(1);
|
||
temp_vec.push(2);
|
||
temp_vec.push(3);
|
||
temp_vec
|
||
</code></pre>
|
||
<p>We’ve defined a macro that can take any number of arguments of any type and can
|
||
generate code to create a vector containing the specified elements.</p>
|
||
<p>There are some strange edge cases with <code>macro_rules!</code>. In the future, Rust will
|
||
have a second kind of declarative macro that will work in a similar fashion but
|
||
fix some of these edge cases. After that update, <code>macro_rules!</code> will be
|
||
effectively deprecated. With this in mind, as well as the fact that most Rust
|
||
programmers will <em>use</em> macros more than <em>write</em> macros, we won’t discuss
|
||
<code>macro_rules!</code> any further. To learn more about how to write macros, consult
|
||
the online documentation or other resources, such as <a href="https://danielkeep.github.io/tlborm/book/index.html">“The Little Book of Rust
|
||
Macros”</a>.</p>
|
||
<h3><a class="header" href="#procedural-macros-for-generating-code-from-attributes" id="procedural-macros-for-generating-code-from-attributes">Procedural Macros for Generating Code from Attributes</a></h3>
|
||
<p>The second form of macros is <em>procedural macros</em>, which act more like functions
|
||
(and are a type of procedure). Procedural macros accept some code as an input,
|
||
operate on that code, and produce some code as an output rather than matching
|
||
against patterns and replacing the code with other code as declarative macros
|
||
do.</p>
|
||
<p>The three kinds of procedural macros (custom derive, attribute-like, and
|
||
function-like) all work in a similar fashion.</p>
|
||
<p>When creating procedural macros, the definitions must reside in their own crate
|
||
with a special crate type. This is for complex technical reasons that we hope
|
||
to eliminate in the future. Using procedural macros looks like the code in
|
||
Listing 19-29, where <code>some_attribute</code> is a placeholder for using a specific
|
||
macro.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">use proc_macro;
|
||
|
||
#[some_attribute]
|
||
pub fn some_name(input: TokenStream) -> TokenStream {
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-29: An example of using a procedural
|
||
macro</span></p>
|
||
<p>The function that defines a procedural macro takes a <code>TokenStream</code> as an input
|
||
and produces a <code>TokenStream</code> as an output. The <code>TokenStream</code> type is defined by
|
||
the <code>proc_macro</code> crate that is included with Rust and represents a sequence of
|
||
tokens. This is the core of the macro: the source code that the macro is
|
||
operating on makes up the input <code>TokenStream</code>, and the code the macro produces
|
||
is the output <code>TokenStream</code>. The function also has an attribute attached to it
|
||
that specifies which kind of procedural macro we’re creating. We can have
|
||
multiple kinds of procedural macros in the same crate.</p>
|
||
<p>Let’s look at the different kinds of procedural macros. We’ll start with a
|
||
custom derive macro and then explain the small dissimilarities that make the
|
||
other forms different.</p>
|
||
<h3><a class="header" href="#how-to-write-a-custom-derive-macro" id="how-to-write-a-custom-derive-macro">How to Write a Custom <code>derive</code> Macro</a></h3>
|
||
<p>Let’s create a crate named <code>hello_macro</code> that defines a trait named
|
||
<code>HelloMacro</code> with one associated function named <code>hello_macro</code>. Rather than
|
||
making our crate users implement the <code>HelloMacro</code> trait for each of their
|
||
types, we’ll provide a procedural macro so users can annotate their type with
|
||
<code>#[derive(HelloMacro)]</code> to get a default implementation of the <code>hello_macro</code>
|
||
function. The default implementation will print <code>Hello, Macro! My name is TypeName!</code> where <code>TypeName</code> is the name of the type on which this trait has
|
||
been defined. In other words, we’ll write a crate that enables another
|
||
programmer to write code like Listing 19-30 using our crate.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use hello_macro::HelloMacro;
|
||
use hello_macro_derive::HelloMacro;
|
||
|
||
#[derive(HelloMacro)]
|
||
struct Pancakes;
|
||
|
||
fn main() {
|
||
Pancakes::hello_macro();
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-30: The code a user of our crate will be able
|
||
to write when using our procedural macro</span></p>
|
||
<p>This code will print <code>Hello, Macro! My name is Pancakes!</code> when we’re done. The
|
||
first step is to make a new library crate, like this:</p>
|
||
<pre><code class="language-text">$ cargo new hello_macro --lib
|
||
</code></pre>
|
||
<p>Next, we’ll define the <code>HelloMacro</code> trait and its associated function:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub trait HelloMacro {
|
||
fn hello_macro();
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We have a trait and its function. At this point, our crate user could implement
|
||
the trait to achieve the desired functionality, like so:</p>
|
||
<pre><code class="language-rust ignore">use hello_macro::HelloMacro;
|
||
|
||
struct Pancakes;
|
||
|
||
impl HelloMacro for Pancakes {
|
||
fn hello_macro() {
|
||
println!("Hello, Macro! My name is Pancakes!");
|
||
}
|
||
}
|
||
|
||
fn main() {
|
||
Pancakes::hello_macro();
|
||
}
|
||
</code></pre>
|
||
<p>However, they would need to write the implementation block for each type they
|
||
wanted to use with <code>hello_macro</code>; we want to spare them from having to do this
|
||
work.</p>
|
||
<p>Additionally, we can’t yet provide the <code>hello_macro</code> function with default
|
||
implementation that will print the name of the type the trait is implemented
|
||
on: Rust doesn’t have reflection capabilities, so it can’t look up the type’s
|
||
name at runtime. We need a macro to generate code at compile time.</p>
|
||
<p>The next step is to define the procedural macro. At the time of this writing,
|
||
procedural macros need to be in their own crate. Eventually, this restriction
|
||
might be lifted. The convention for structuring crates and macro crates is as
|
||
follows: for a crate named <code>foo</code>, a custom derive procedural macro crate is
|
||
called <code>foo_derive</code>. Let’s start a new crate called <code>hello_macro_derive</code> inside
|
||
our <code>hello_macro</code> project:</p>
|
||
<pre><code class="language-text">$ cargo new hello_macro_derive --lib
|
||
</code></pre>
|
||
<p>Our two crates are tightly related, so we create the procedural macro crate
|
||
within the directory of our <code>hello_macro</code> crate. If we change the trait
|
||
definition in <code>hello_macro</code>, we’ll have to change the implementation of the
|
||
procedural macro in <code>hello_macro_derive</code> as well. The two crates will need to
|
||
be published separately, and programmers using these crates will need to add
|
||
both as dependencies and bring them both into scope. We could instead have the
|
||
<code>hello_macro</code> crate use <code>hello_macro_derive</code> as a dependency and re-export the
|
||
procedural macro code. However, the way we’ve structured the project makes it
|
||
possible for programmers to use <code>hello_macro</code> even if they don’t want the
|
||
<code>derive</code> functionality.</p>
|
||
<p>We need to declare the <code>hello_macro_derive</code> crate as a procedural macro crate.
|
||
We’ll also need functionality from the <code>syn</code> and <code>quote</code> crates, as you’ll see
|
||
in a moment, so we need to add them as dependencies. Add the following to the
|
||
<em>Cargo.toml</em> file for <code>hello_macro_derive</code>:</p>
|
||
<p><span class="filename">Filename: hello_macro_derive/Cargo.toml</span></p>
|
||
<pre><code class="language-toml">[lib]
|
||
proc-macro = true
|
||
|
||
[dependencies]
|
||
syn = "0.14.4"
|
||
quote = "0.6.3"
|
||
</code></pre>
|
||
<p>To start defining the procedural macro, place the code in Listing 19-31 into
|
||
your <em>src/lib.rs</em> file for the <code>hello_macro_derive</code> crate. Note that this code
|
||
won’t compile until we add a definition for the <code>impl_hello_macro</code> function.</p>
|
||
<p><span class="filename">Filename: hello_macro_derive/src/lib.rs</span></p>
|
||
<!--
|
||
This usage of `extern crate` is required for the moment with 1.31.0, see:
|
||
https://github.com/rust-lang/rust/issues/54418
|
||
https://github.com/rust-lang/rust/pull/54658
|
||
https://github.com/rust-lang/rust/issues/55599
|
||
-->
|
||
<pre><code class="language-rust ignore">extern crate proc_macro;
|
||
|
||
use crate::proc_macro::TokenStream;
|
||
use quote::quote;
|
||
use syn;
|
||
|
||
#[proc_macro_derive(HelloMacro)]
|
||
pub fn hello_macro_derive(input: TokenStream) -> TokenStream {
|
||
// Construct a representation of Rust code as a syntax tree
|
||
// that we can manipulate
|
||
let ast = syn::parse(input).unwrap();
|
||
|
||
// Build the trait implementation
|
||
impl_hello_macro(&ast)
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-31: Code that most procedural macro crates
|
||
will require in order to process Rust code</span></p>
|
||
<p>Notice that we’ve split the code into the <code>hello_macro_derive</code> function, which
|
||
is responsible for parsing the <code>TokenStream</code>, and the <code>impl_hello_macro</code>
|
||
function, which is responsible for transforming the syntax tree: this makes
|
||
writing a procedural macro more convenient. The code in the outer function
|
||
(<code>hello_macro_derive</code> in this case) will be the same for almost every
|
||
procedural macro crate you see or create. The code you specify in the body of
|
||
the inner function (<code>impl_hello_macro</code> in this case) will be different
|
||
depending on your procedural macro’s purpose.</p>
|
||
<p>We’ve introduced three new crates: <code>proc_macro</code>, <a href="https://crates.io/crates/syn"><code>syn</code></a>, and <a href="https://crates.io/crates/quote"><code>quote</code></a>. The
|
||
<code>proc_macro</code> crate comes with Rust, so we didn’t need to add that to the
|
||
dependencies in <em>Cargo.toml</em>. The <code>proc_macro</code> crate is the compiler’s API that
|
||
allows us to read and manipulate Rust code from our code.</p>
|
||
<p>The <code>syn</code> crate parses Rust code from a string into a data structure that we
|
||
can perform operations on. The <code>quote</code> crate turns <code>syn</code> data structures back
|
||
into Rust code. These crates make it much simpler to parse any sort of Rust
|
||
code we might want to handle: writing a full parser for Rust code is no simple
|
||
task.</p>
|
||
<p>The <code>hello_macro_derive</code> function will be called when a user of our library
|
||
specifies <code>#[derive(HelloMacro)]</code> on a type. This is possible because we’ve
|
||
annotated the <code>hello_macro_derive</code> function here with <code>proc_macro_derive</code> and
|
||
specified the name, <code>HelloMacro</code>, which matches our trait name; this is the
|
||
convention most procedural macros follow.</p>
|
||
<p>The <code>hello_macro_derive</code> function first converts the <code>input</code> from a
|
||
<code>TokenStream</code> to a data structure that we can then interpret and perform
|
||
operations on. This is where <code>syn</code> comes into play. The <code>parse</code> function in
|
||
<code>syn</code> takes a <code>TokenStream</code> and returns a <code>DeriveInput</code> struct representing the
|
||
parsed Rust code. Listing 19-32 shows the relevant parts of the <code>DeriveInput</code>
|
||
struct we get from parsing the <code>struct Pancakes;</code> string:</p>
|
||
<pre><code class="language-rust ignore">DeriveInput {
|
||
// --snip--
|
||
|
||
ident: Ident {
|
||
ident: "Pancakes",
|
||
span: #0 bytes(95..103)
|
||
},
|
||
data: Struct(
|
||
DataStruct {
|
||
struct_token: Struct,
|
||
fields: Unit,
|
||
semi_token: Some(
|
||
Semi
|
||
)
|
||
}
|
||
)
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-32: The <code>DeriveInput</code> instance we get when
|
||
parsing the code that has the macro’s attribute in Listing 19-30</span></p>
|
||
<p>The fields of this struct show that the Rust code we’ve parsed is a unit struct
|
||
with the <code>ident</code> (identifier, meaning the name) of <code>Pancakes</code>. There are more
|
||
fields on this struct for describing all sorts of Rust code; check the <a href="https://docs.rs/syn/0.14.4/syn/struct.DeriveInput.html"><code>syn</code>
|
||
documentation for <code>DeriveInput</code></a> for more information.</p>
|
||
<p>Soon we’ll define the <code>impl_hello_macro</code> function, which is where we’ll build
|
||
the new Rust code we want to include. But before we do, note that the output
|
||
for our derive macro is also a <code>TokenStream</code>. The returned <code>TokenStream</code> is
|
||
added to the code that our crate users write, so when they compile their crate,
|
||
they’ll get the extra functionality that we provide in the modified
|
||
<code>TokenStream</code>.</p>
|
||
<p>You might have noticed that we’re calling <code>unwrap</code> to cause the
|
||
<code>hello_macro_derive</code> function to panic if the call to the <code>syn::parse</code> function
|
||
fails here. It’s necessary for our procedural macro to panic on errors because
|
||
<code>proc_macro_derive</code> functions must return <code>TokenStream</code> rather than <code>Result</code> to
|
||
conform to the procedural macro API. We’ve simplified this example by using
|
||
<code>unwrap</code>; in production code, you should provide more specific error messages
|
||
about what went wrong by using <code>panic!</code> or <code>expect</code>.</p>
|
||
<p>Now that we have the code to turn the annotated Rust code from a <code>TokenStream</code>
|
||
into a <code>DeriveInput</code> instance, let’s generate the code that implements the
|
||
<code>HelloMacro</code> trait on the annotated type, as shown in Listing 19-33.</p>
|
||
<p><span class="filename">Filename: hello_macro_derive/src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn impl_hello_macro(ast: &syn::DeriveInput) -> TokenStream {
|
||
let name = &ast.ident;
|
||
let gen = quote! {
|
||
impl HelloMacro for #name {
|
||
fn hello_macro() {
|
||
println!("Hello, Macro! My name is {}", stringify!(#name));
|
||
}
|
||
}
|
||
};
|
||
gen.into()
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 19-33: Implementing the <code>HelloMacro</code> trait using
|
||
the parsed Rust code</span></p>
|
||
<p>We get an <code>Ident</code> struct instance containing the name (identifier) of the
|
||
annotated type using <code>ast.ident</code>. The struct in Listing 19-32 shows that when
|
||
we run the <code>impl_hello_macro</code> function on the code in Listing 19-30, the
|
||
<code>ident</code> we get will have the <code>ident</code> field with a value of <code>"Pancakes"</code>. Thus,
|
||
the <code>name</code> variable in Listing 19-33 will contain an <code>Ident</code> struct instance
|
||
that, when printed, will be the string <code>"Pancakes"</code>, the name of the struct in
|
||
Listing 19-30.</p>
|
||
<p>The <code>quote!</code> macro lets us define the Rust code that we want to return. The
|
||
compiler expects something different to the direct result of the <code>quote!</code>
|
||
macro’s execution, so we need to convert it to a <code>TokenStream</code>. We do this by
|
||
calling the <code>into</code> method, which consumes this intermediate representation and
|
||
returns a value of the required <code>TokenStream</code> type.</p>
|
||
<p>The <code>quote!</code> macro also provides some very cool templating mechanics: we can
|
||
enter <code>#name</code>, and <code>quote!</code> will replace it with the value in the variable
|
||
<code>name</code>. You can even do some repetition similar to the way regular macros work.
|
||
Check out <a href="https://docs.rs/quote">the <code>quote</code> crate’s docs</a> for a thorough introduction.</p>
|
||
<p>We want our procedural macro to generate an implementation of our <code>HelloMacro</code>
|
||
trait for the type the user annotated, which we can get by using <code>#name</code>. The
|
||
trait implementation has one function, <code>hello_macro</code>, whose body contains the
|
||
functionality we want to provide: printing <code>Hello, Macro! My name is</code> and then
|
||
the name of the annotated type.</p>
|
||
<p>The <code>stringify!</code> macro used here is built into Rust. It takes a Rust
|
||
expression, such as <code>1 + 2</code>, and at compile time turns the expression into a
|
||
string literal, such as <code>"1 + 2"</code>. This is different than <code>format!</code> or
|
||
<code>println!</code>, macros which evaluate the expression and then turn the result into
|
||
a <code>String</code>. There is a possibility that the <code>#name</code> input might be an
|
||
expression to print literally, so we use <code>stringify!</code>. Using <code>stringify!</code> also
|
||
saves an allocation by converting <code>#name</code> to a string literal at compile time.</p>
|
||
<p>At this point, <code>cargo build</code> should complete successfully in both <code>hello_macro</code>
|
||
and <code>hello_macro_derive</code>. Let’s hook up these crates to the code in Listing
|
||
19-30 to see the procedural macro in action! Create a new binary project in
|
||
your <em>projects</em> directory using <code>cargo new pancakes</code>. We need to add
|
||
<code>hello_macro</code> and <code>hello_macro_derive</code> as dependencies in the <code>pancakes</code>
|
||
crate’s <em>Cargo.toml</em>. If you’re publishing your versions of <code>hello_macro</code> and
|
||
<code>hello_macro_derive</code> to <a href="https://crates.io/">crates.io</a>, they would be regular
|
||
dependencies; if not, you can specify them as <code>path</code> dependencies as follows:</p>
|
||
<pre><code class="language-toml">[dependencies]
|
||
hello_macro = { path = "../hello_macro" }
|
||
hello_macro_derive = { path = "../hello_macro/hello_macro_derive" }
|
||
</code></pre>
|
||
<p>Put the code in Listing 19-30 into <em>src/main.rs</em>, and run <code>cargo run</code>: it
|
||
should print <code>Hello, Macro! My name is Pancakes!</code> The implementation of the
|
||
<code>HelloMacro</code> trait from the procedural macro was included without the
|
||
<code>pancakes</code> crate needing to implement it; the <code>#[derive(HelloMacro)]</code> added the
|
||
trait implementation.</p>
|
||
<p>Next, let’s explore how the other kinds of procedural macros differ from custom
|
||
derive macros.</p>
|
||
<h3><a class="header" href="#attribute-like-macros" id="attribute-like-macros">Attribute-like macros</a></h3>
|
||
<p>Attribute-like macros are similar to custom derive macros, but instead of
|
||
generating code for the <code>derive</code> attribute, they allow you to create new
|
||
attributes. They’re also more flexible: <code>derive</code> only works for structs and
|
||
enums; attributes can be applied to other items as well, such as functions.
|
||
Here’s an example of using an attribute-like macro: say you have an attribute
|
||
named <code>route</code> that annotates functions when using a web application framework:</p>
|
||
<pre><code class="language-rust ignore">#[route(GET, "/")]
|
||
fn index() {
|
||
</code></pre>
|
||
<p>This <code>#[route]</code> attribute would be defined by the framework as a procedural
|
||
macro. The signature of the macro definition function would look like this:</p>
|
||
<pre><code class="language-rust ignore">#[proc_macro_attribute]
|
||
pub fn route(attr: TokenStream, item: TokenStream) -> TokenStream {
|
||
</code></pre>
|
||
<p>Here, we have two parameters of type <code>TokenStream</code>. The first is for the
|
||
contents of the attribute: the <code>GET, "/"</code> part. The second is the body of the
|
||
item the attribute is attached to: in this case, <code>fn index() {}</code> and the rest
|
||
of the function’s body.</p>
|
||
<p>Other than that, attribute-like macros work the same way as custom derive
|
||
macros: you create a crate with the <code>proc-macro</code> crate type and implement a
|
||
function that generates the code you want!</p>
|
||
<h3><a class="header" href="#function-like-macros" id="function-like-macros">Function-like macros</a></h3>
|
||
<p>Function-like macros define macros that look like function calls. Similarly to
|
||
<code>macro_rules!</code> macros, they’re more flexible than functions; for example, they
|
||
can take an unknown number of arguments. However, <code>macro_rules!</code> macros can be
|
||
defined only using the match-like syntax we discussed in the section
|
||
<a href="ch19-06-macros.html#declarative-macros-with-macro_rules-for-general-metaprogramming">“Declarative Macros with <code>macro_rules!</code> for General Metaprogramming”</a>
|
||
earlier. Function-like macros take a <code>TokenStream</code> parameter and their
|
||
definition manipulates that <code>TokenStream</code> using Rust code as the other two
|
||
types of procedural macros do. An example of a function-like macro is an <code>sql!</code>
|
||
macro that might be called like so:</p>
|
||
<pre><code class="language-rust ignore">let sql = sql!(SELECT * FROM posts WHERE id=1);
|
||
</code></pre>
|
||
<p>This macro would parse the SQL statement inside it and check that it’s
|
||
syntactically correct, which is much more complex processing than a
|
||
<code>macro_rules!</code> macro can do. The <code>sql!</code> macro would be defined like this:</p>
|
||
<pre><code class="language-rust ignore">#[proc_macro]
|
||
pub fn sql(input: TokenStream) -> TokenStream {
|
||
</code></pre>
|
||
<p>This definition is similar to the custom derive macro’s signature: we receive
|
||
the tokens that are inside the parentheses and return the code we wanted to
|
||
generate.</p>
|
||
<h2><a class="header" href="#summary-18" id="summary-18">Summary</a></h2>
|
||
<p>Whew! Now you have some Rust features in your toolbox that you won’t use often,
|
||
but you’ll know they’re available in very particular circumstances. We’ve
|
||
introduced several complex topics so that when you encounter them in error
|
||
message suggestions or in other peoples’ code, you’ll be able to recognize
|
||
these concepts and syntax. Use this chapter as a reference to guide you to
|
||
solutions.</p>
|
||
<p>Next, we’ll put everything we’ve discussed throughout the book into practice
|
||
and do one more project!</p>
|
||
<h1><a class="header" href="#final-project-building-a-multithreaded-web-server" id="final-project-building-a-multithreaded-web-server">Final Project: Building a Multithreaded Web Server</a></h1>
|
||
<p>It’s been a long journey, but we’ve reached the end of the book. In this
|
||
chapter, we’ll build one more project together to demonstrate some of the
|
||
concepts we covered in the final chapters, as well as recap some earlier
|
||
lessons.</p>
|
||
<p>For our final project, we’ll make a web server that says “hello” and looks like
|
||
Figure 20-1 in a web browser.</p>
|
||
<p><img src="img/trpl20-01.png" alt="hello from rust" /></p>
|
||
<p><span class="caption">Figure 20-1: Our final shared project</span></p>
|
||
<p>Here is the plan to build the web server:</p>
|
||
<ol>
|
||
<li>Learn a bit about TCP and HTTP.</li>
|
||
<li>Listen for TCP connections on a socket.</li>
|
||
<li>Parse a small number of HTTP requests.</li>
|
||
<li>Create a proper HTTP response.</li>
|
||
<li>Improve the throughput of our server with a thread pool.</li>
|
||
</ol>
|
||
<p>But before we get started, we should mention one detail: the method we’ll use
|
||
won’t be the best way to build a web server with Rust. A number of
|
||
production-ready crates are available on <a href="https://crates.io/">crates.io</a> that
|
||
provide more complete web server and thread pool implementations than we’ll
|
||
build.</p>
|
||
<p>However, our intention in this chapter is to help you learn, not to take the
|
||
easy route. Because Rust is a systems programming language, we can choose the
|
||
level of abstraction we want to work with and can go to a lower level than is
|
||
possible or practical in other languages. We’ll write the basic HTTP server and
|
||
thread pool manually so you can learn the general ideas and techniques behind
|
||
the crates you might use in the future.</p>
|
||
<h2><a class="header" href="#building-a-single-threaded-web-server" id="building-a-single-threaded-web-server">Building a Single-Threaded Web Server</a></h2>
|
||
<p>We’ll start by getting a single-threaded web server working. Before we begin,
|
||
let’s look at a quick overview of the protocols involved in building web
|
||
servers. The details of these protocols are beyond the scope of this book, but
|
||
a brief overview will give you the information you need.</p>
|
||
<p>The two main protocols involved in web servers are the <em>Hypertext Transfer
|
||
Protocol</em> <em>(HTTP)</em> and the <em>Transmission Control Protocol</em> <em>(TCP)</em>. Both
|
||
protocols are <em>request-response</em> protocols, meaning a <em>client</em> initiates
|
||
requests and a <em>server</em> listens to the requests and provides a response to the
|
||
client. The contents of those requests and responses are defined by the
|
||
protocols.</p>
|
||
<p>TCP is the lower-level protocol that describes the details of how information
|
||
gets from one server to another but doesn’t specify what that information is.
|
||
HTTP builds on top of TCP by defining the contents of the requests and
|
||
responses. It’s technically possible to use HTTP with other protocols, but in
|
||
the vast majority of cases, HTTP sends its data over TCP. We’ll work with the
|
||
raw bytes of TCP and HTTP requests and responses.</p>
|
||
<h3><a class="header" href="#listening-to-the-tcp-connection" id="listening-to-the-tcp-connection">Listening to the TCP Connection</a></h3>
|
||
<p>Our web server needs to listen to a TCP connection, so that’s the first part
|
||
we’ll work on. The standard library offers a <code>std::net</code> module that lets us do
|
||
this. Let’s make a new project in the usual fashion:</p>
|
||
<pre><code class="language-text">$ cargo new hello
|
||
Created binary (application) `hello` project
|
||
$ cd hello
|
||
</code></pre>
|
||
<p>Now enter the code in Listing 20-1 in <em>src/main.rs</em> to start. This code will
|
||
listen at the address <code>127.0.0.1:7878</code> for incoming TCP streams. When it gets
|
||
an incoming stream, it will print <code>Connection established!</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust no_run">use std::net::TcpListener;
|
||
|
||
fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
|
||
for stream in listener.incoming() {
|
||
let stream = stream.unwrap();
|
||
|
||
println!("Connection established!");
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 20-1: Listening for incoming streams and printing
|
||
a message when we receive a stream</span></p>
|
||
<p>Using <code>TcpListener</code>, we can listen for TCP connections at the address
|
||
<code>127.0.0.1:7878</code>. In the address, the section before the colon is an IP address
|
||
representing your computer (this is the same on every computer and doesn’t
|
||
represent the authors’ computer specifically), and <code>7878</code> is the port. We’ve
|
||
chosen this port for two reasons: HTTP is normally accepted on this port, and
|
||
7878 is <em>rust</em> typed on a telephone.</p>
|
||
<p>The <code>bind</code> function in this scenario works like the <code>new</code> function in that it
|
||
will return a new <code>TcpListener</code> instance. The reason the function is called
|
||
<code>bind</code> is that in networking, connecting to a port to listen to is known as
|
||
“binding to a port.”</p>
|
||
<p>The <code>bind</code> function returns a <code>Result<T, E></code>, which indicates that binding
|
||
might fail. For example, connecting to port 80 requires administrator
|
||
privileges (nonadministrators can listen only on ports higher than 1024), so if
|
||
we tried to connect to port 80 without being an administrator, binding wouldn’t
|
||
work. As another example, binding wouldn’t work if we ran two instances of our
|
||
program and so had two programs listening to the same port. Because we’re
|
||
writing a basic server just for learning purposes, we won’t worry about
|
||
handling these kinds of errors; instead, we use <code>unwrap</code> to stop the program if
|
||
errors happen.</p>
|
||
<p>The <code>incoming</code> method on <code>TcpListener</code> returns an iterator that gives us a
|
||
sequence of streams (more specifically, streams of type <code>TcpStream</code>). A single
|
||
<em>stream</em> represents an open connection between the client and the server. A
|
||
<em>connection</em> is the name for the full request and response process in which a
|
||
client connects to the server, the server generates a response, and the server
|
||
closes the connection. As such, <code>TcpStream</code> will read from itself to see what
|
||
the client sent and then allow us to write our response to the stream. Overall,
|
||
this <code>for</code> loop will process each connection in turn and produce a series of
|
||
streams for us to handle.</p>
|
||
<p>For now, our handling of the stream consists of calling <code>unwrap</code> to terminate
|
||
our program if the stream has any errors; if there aren’t any errors, the
|
||
program prints a message. We’ll add more functionality for the success case in
|
||
the next listing. The reason we might receive errors from the <code>incoming</code> method
|
||
when a client connects to the server is that we’re not actually iterating over
|
||
connections. Instead, we’re iterating over <em>connection attempts</em>. The
|
||
connection might not be successful for a number of reasons, many of them
|
||
operating system specific. For example, many operating systems have a limit to
|
||
the number of simultaneous open connections they can support; new connection
|
||
attempts beyond that number will produce an error until some of the open
|
||
connections are closed.</p>
|
||
<p>Let’s try running this code! Invoke <code>cargo run</code> in the terminal and then load
|
||
<em>127.0.0.1:7878</em> in a web browser. The browser should show an error message
|
||
like “Connection reset,” because the server isn’t currently sending back any
|
||
data. But when you look at your terminal, you should see several messages that
|
||
were printed when the browser connected to the server!</p>
|
||
<pre><code class="language-text"> Running `target/debug/hello`
|
||
Connection established!
|
||
Connection established!
|
||
Connection established!
|
||
</code></pre>
|
||
<p>Sometimes, you’ll see multiple messages printed for one browser request; the
|
||
reason might be that the browser is making a request for the page as well as a
|
||
request for other resources, like the <em>favicon.ico</em> icon that appears in the
|
||
browser tab.</p>
|
||
<p>It could also be that the browser is trying to connect to the server multiple
|
||
times because the server isn’t responding with any data. When <code>stream</code> goes out
|
||
of scope and is dropped at the end of the loop, the connection is closed as
|
||
part of the <code>drop</code> implementation. Browsers sometimes deal with closed
|
||
connections by retrying, because the problem might be temporary. The important
|
||
factor is that we’ve successfully gotten a handle to a TCP connection!</p>
|
||
<p>Remember to stop the program by pressing <span class="keystroke">ctrl-c</span>
|
||
when you’re done running a particular version of the code. Then restart <code>cargo run</code> after you’ve made each set of code changes to make sure you’re running the
|
||
newest code.</p>
|
||
<h3><a class="header" href="#reading-the-request" id="reading-the-request">Reading the Request</a></h3>
|
||
<p>Let’s implement the functionality to read the request from the browser! To
|
||
separate the concerns of first getting a connection and then taking some action
|
||
with the connection, we’ll start a new function for processing connections. In
|
||
this new <code>handle_connection</code> function, we’ll read data from the TCP stream and
|
||
print it so we can see the data being sent from the browser. Change the code to
|
||
look like Listing 20-2.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust no_run">use std::io::prelude::*;
|
||
use std::net::TcpStream;
|
||
use std::net::TcpListener;
|
||
|
||
fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
|
||
for stream in listener.incoming() {
|
||
let stream = stream.unwrap();
|
||
|
||
handle_connection(stream);
|
||
}
|
||
}
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
let mut buffer = [0; 512];
|
||
|
||
stream.read(&mut buffer).unwrap();
|
||
|
||
println!("Request: {}", String::from_utf8_lossy(&buffer[..]));
|
||
}
|
||
</code></pre></pre>
|
||
<p><span class="caption">Listing 20-2: Reading from the <code>TcpStream</code> and printing
|
||
the data</span></p>
|
||
<p>We bring <code>std::io::prelude</code> into scope to get access to certain traits that let
|
||
us read from and write to the stream. In the <code>for</code> loop in the <code>main</code> function,
|
||
instead of printing a message that says we made a connection, we now call the
|
||
new <code>handle_connection</code> function and pass the <code>stream</code> to it.</p>
|
||
<p>In the <code>handle_connection</code> function, we’ve made the <code>stream</code> parameter mutable.
|
||
The reason is that the <code>TcpStream</code> instance keeps track of what data it returns
|
||
to us internally. It might read more data than we asked for and save that data
|
||
for the next time we ask for data. It therefore needs to be <code>mut</code> because its
|
||
internal state might change; usually, we think of “reading” as not needing
|
||
mutation, but in this case we need the <code>mut</code> keyword.</p>
|
||
<p>Next, we need to actually read from the stream. We do this in two steps: first,
|
||
we declare a <code>buffer</code> on the stack to hold the data that is read in. We’ve made
|
||
the buffer 512 bytes in size, which is big enough to hold the data of a basic
|
||
request and sufficient for our purposes in this chapter. If we wanted to handle
|
||
requests of an arbitrary size, buffer management would need to be more
|
||
complicated; we’ll keep it simple for now. We pass the buffer to <code>stream.read</code>,
|
||
which will read bytes from the <code>TcpStream</code> and put them in the buffer.</p>
|
||
<p>Second, we convert the bytes in the buffer to a string and print that string.
|
||
The <code>String::from_utf8_lossy</code> function takes a <code>&[u8]</code> and produces a <code>String</code>
|
||
from it. The “lossy” part of the name indicates the behavior of this function
|
||
when it sees an invalid UTF-8 sequence: it will replace the invalid sequence
|
||
with <code><3E></code>, the <code>U+FFFD REPLACEMENT CHARACTER</code>. You might see replacement
|
||
characters for characters in the buffer that aren’t filled by request data.</p>
|
||
<p>Let’s try this code! Start the program and make a request in a web browser
|
||
again. Note that we’ll still get an error page in the browser, but our
|
||
program’s output in the terminal will now look similar to this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.42 secs
|
||
Running `target/debug/hello`
|
||
Request: GET / HTTP/1.1
|
||
Host: 127.0.0.1:7878
|
||
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101
|
||
Firefox/52.0
|
||
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
|
||
Accept-Language: en-US,en;q=0.5
|
||
Accept-Encoding: gzip, deflate
|
||
Connection: keep-alive
|
||
Upgrade-Insecure-Requests: 1
|
||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||
</code></pre>
|
||
<p>Depending on your browser, you might get slightly different output. Now that
|
||
we’re printing the request data, we can see why we get multiple connections
|
||
from one browser request by looking at the path after <code>Request: GET</code>. If the
|
||
repeated connections are all requesting <em>/</em>, we know the browser is trying to
|
||
fetch <em>/</em> repeatedly because it’s not getting a response from our program.</p>
|
||
<p>Let’s break down this request data to understand what the browser is asking of
|
||
our program.</p>
|
||
<h3><a class="header" href="#a-closer-look-at-an-http-request" id="a-closer-look-at-an-http-request">A Closer Look at an HTTP Request</a></h3>
|
||
<p>HTTP is a text-based protocol, and a request takes this format:</p>
|
||
<pre><code class="language-text">Method Request-URI HTTP-Version CRLF
|
||
headers CRLF
|
||
message-body
|
||
</code></pre>
|
||
<p>The first line is the <em>request line</em> that holds information about what the
|
||
client is requesting. The first part of the request line indicates the <em>method</em>
|
||
being used, such as <code>GET</code> or <code>POST</code>, which describes how the client is making
|
||
this request. Our client used a <code>GET</code> request.</p>
|
||
<p>The next part of the request line is <em>/</em>, which indicates the <em>Uniform Resource
|
||
Identifier</em> <em>(URI)</em> the client is requesting: a URI is almost, but not quite,
|
||
the same as a <em>Uniform Resource Locator</em> <em>(URL)</em>. The difference between URIs
|
||
and URLs isn’t important for our purposes in this chapter, but the HTTP spec
|
||
uses the term URI, so we can just mentally substitute URL for URI here.</p>
|
||
<p>The last part is the HTTP version the client uses, and then the request line
|
||
ends in a <em>CRLF sequence</em>. (CRLF stands for <em>carriage return</em> and <em>line feed</em>,
|
||
which are terms from the typewriter days!) The CRLF sequence can also be
|
||
written as <code>\r\n</code>, where <code>\r</code> is a carriage return and <code>\n</code> is a line feed. The
|
||
CRLF sequence separates the request line from the rest of the request data.
|
||
Note that when the CRLF is printed, we see a new line start rather than <code>\r\n</code>.</p>
|
||
<p>Looking at the request line data we received from running our program so far,
|
||
we see that <code>GET</code> is the method, <em>/</em> is the request URI, and <code>HTTP/1.1</code> is the
|
||
version.</p>
|
||
<p>After the request line, the remaining lines starting from <code>Host:</code> onward are
|
||
headers. <code>GET</code> requests have no body.</p>
|
||
<p>Try making a request from a different browser or asking for a different
|
||
address, such as <em>127.0.0.1:7878/test</em>, to see how the request data changes.</p>
|
||
<p>Now that we know what the browser is asking for, let’s send back some data!</p>
|
||
<h3><a class="header" href="#writing-a-response" id="writing-a-response">Writing a Response</a></h3>
|
||
<p>Now we’ll implement sending data in response to a client request. Responses
|
||
have the following format:</p>
|
||
<pre><code class="language-text">HTTP-Version Status-Code Reason-Phrase CRLF
|
||
headers CRLF
|
||
message-body
|
||
</code></pre>
|
||
<p>The first line is a <em>status line</em> that contains the HTTP version used in the
|
||
response, a numeric status code that summarizes the result of the request, and
|
||
a reason phrase that provides a text description of the status code. After the
|
||
CRLF sequence are any headers, another CRLF sequence, and the body of the
|
||
response.</p>
|
||
<p>Here is an example response that uses HTTP version 1.1, has a status code of
|
||
200, an OK reason phrase, no headers, and no body:</p>
|
||
<pre><code class="language-text">HTTP/1.1 200 OK\r\n\r\n
|
||
</code></pre>
|
||
<p>The status code 200 is the standard success response. The text is a tiny
|
||
successful HTTP response. Let’s write this to the stream as our response to a
|
||
successful request! From the <code>handle_connection</code> function, remove the
|
||
<code>println!</code> that was printing the request data and replace it with the code in
|
||
Listing 20-3.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span>fn handle_connection(mut stream: TcpStream) {
|
||
let mut buffer = [0; 512];
|
||
|
||
stream.read(&mut buffer).unwrap();
|
||
|
||
let response = "HTTP/1.1 200 OK\r\n\r\n";
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-3: Writing a tiny successful HTTP response to
|
||
the stream</span></p>
|
||
<p>The first new line defines the <code>response</code> variable that holds the success
|
||
message’s data. Then we call <code>as_bytes</code> on our <code>response</code> to convert the string
|
||
data to bytes. The <code>write</code> method on <code>stream</code> takes a <code>&[u8]</code> and sends those
|
||
bytes directly down the connection.</p>
|
||
<p>Because the <code>write</code> operation could fail, we use <code>unwrap</code> on any error result
|
||
as before. Again, in a real application you would add error handling here.
|
||
Finally, <code>flush</code> will wait and prevent the program from continuing until all
|
||
the bytes are written to the connection; <code>TcpStream</code> contains an internal
|
||
buffer to minimize calls to the underlying operating system.</p>
|
||
<p>With these changes, let’s run our code and make a request. We’re no longer
|
||
printing any data to the terminal, so we won’t see any output other than the
|
||
output from Cargo. When you load <em>127.0.0.1:7878</em> in a web browser, you should
|
||
get a blank page instead of an error. You’ve just hand-coded an HTTP request
|
||
and response!</p>
|
||
<h3><a class="header" href="#returning-real-html" id="returning-real-html">Returning Real HTML</a></h3>
|
||
<p>Let’s implement the functionality for returning more than a blank page. Create
|
||
a new file, <em>hello.html</em>, in the root of your project directory, not in the
|
||
<em>src</em> directory. You can input any HTML you want; Listing 20-4 shows one
|
||
possibility.</p>
|
||
<p><span class="filename">Filename: hello.html</span></p>
|
||
<pre><code class="language-html"><!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<title>Hello!</title>
|
||
</head>
|
||
<body>
|
||
<h1>Hello!</h1>
|
||
<p>Hi from Rust</p>
|
||
</body>
|
||
</html>
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-4: A sample HTML file to return in a
|
||
response</span></p>
|
||
<p>This is a minimal HTML5 document with a heading and some text. To return this
|
||
from the server when a request is received, we’ll modify <code>handle_connection</code> as
|
||
shown in Listing 20-5 to read the HTML file, add it to the response as a body,
|
||
and send it.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span>use std::fs;
|
||
// --snip--
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
let mut buffer = [0; 512];
|
||
stream.read(&mut buffer).unwrap();
|
||
|
||
let contents = fs::read_to_string("hello.html").unwrap();
|
||
|
||
let response = format!("HTTP/1.1 200 OK\r\n\r\n{}", contents);
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-5: Sending the contents of <em>hello.html</em> as the
|
||
body of the response</span></p>
|
||
<p>We’ve added a line at the top to bring the standard library’s filesystem module
|
||
into scope. The code for reading the contents of a file to a string should look
|
||
familiar; we used it in Chapter 12 when we read the contents of a file for our
|
||
I/O project in Listing 12-4.</p>
|
||
<p>Next, we use <code>format!</code> to add the file’s contents as the body of the success
|
||
response.</p>
|
||
<p>Run this code with <code>cargo run</code> and load <em>127.0.0.1:7878</em> in your browser; you
|
||
should see your HTML rendered!</p>
|
||
<p>Currently, we’re ignoring the request data in <code>buffer</code> and just sending back
|
||
the contents of the HTML file unconditionally. That means if you try requesting
|
||
<em>127.0.0.1:7878/something-else</em> in your browser, you’ll still get back this
|
||
same HTML response. Our server is very limited and is not what most web servers
|
||
do. We want to customize our responses depending on the request and only send
|
||
back the HTML file for a well-formed request to <em>/</em>.</p>
|
||
<h3><a class="header" href="#validating-the-request-and-selectively-responding" id="validating-the-request-and-selectively-responding">Validating the Request and Selectively Responding</a></h3>
|
||
<p>Right now, our web server will return the HTML in the file no matter what the
|
||
client requested. Let’s add functionality to check that the browser is
|
||
requesting <em>/</em> before returning the HTML file and return an error if the
|
||
browser requests anything else. For this we need to modify <code>handle_connection</code>,
|
||
as shown in Listing 20-6. This new code checks the content of the request
|
||
received against what we know a request for <em>/</em> looks like and adds <code>if</code> and
|
||
<code>else</code> blocks to treat requests differently.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">use std::fs;
|
||
</span>// --snip--
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
let mut buffer = [0; 512];
|
||
stream.read(&mut buffer).unwrap();
|
||
|
||
let get = b"GET / HTTP/1.1\r\n";
|
||
|
||
if buffer.starts_with(get) {
|
||
let contents = fs::read_to_string("hello.html").unwrap();
|
||
|
||
let response = format!("HTTP/1.1 200 OK\r\n\r\n{}", contents);
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
} else {
|
||
// some other request
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-6: Matching the request and handling requests
|
||
to <em>/</em> differently from other requests</span></p>
|
||
<p>First, we hardcode the data corresponding to the <em>/</em> request into the <code>get</code>
|
||
variable. Because we’re reading raw bytes into the buffer, we transform <code>get</code>
|
||
into a byte string by adding the <code>b""</code> byte string syntax at the start of the
|
||
content data. Then we check whether <code>buffer</code> starts with the bytes in <code>get</code>. If
|
||
it does, it means we’ve received a well-formed request to <em>/</em>, which is the
|
||
success case we’ll handle in the <code>if</code> block that returns the contents of our
|
||
HTML file.</p>
|
||
<p>If <code>buffer</code> does <em>not</em> start with the bytes in <code>get</code>, it means we’ve received
|
||
some other request. We’ll add code to the <code>else</code> block in a moment to respond
|
||
to all other requests.</p>
|
||
<p>Run this code now and request <em>127.0.0.1:7878</em>; you should get the HTML in
|
||
<em>hello.html</em>. If you make any other request, such as
|
||
<em>127.0.0.1:7878/something-else</em>, you’ll get a connection error like those you
|
||
saw when running the code in Listing 20-1 and Listing 20-2.</p>
|
||
<p>Now let’s add the code in Listing 20-7 to the <code>else</code> block to return a response
|
||
with the status code 404, which signals that the content for the request was
|
||
not found. We’ll also return some HTML for a page to render in the browser
|
||
indicating the response to the end user.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">use std::fs;
|
||
</span><span class="boring">fn handle_connection(mut stream: TcpStream) {
|
||
</span><span class="boring">if true {
|
||
</span>// --snip--
|
||
|
||
} else {
|
||
let status_line = "HTTP/1.1 404 NOT FOUND\r\n\r\n";
|
||
let contents = fs::read_to_string("404.html").unwrap();
|
||
|
||
let response = format!("{}{}", status_line, contents);
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
}
|
||
<span class="boring">}
|
||
</span><span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-7: Responding with status code 404 and an
|
||
error page if anything other than <em>/</em> was requested</span></p>
|
||
<p>Here, our response has a status line with status code 404 and the reason
|
||
phrase <code>NOT FOUND</code>. We’re still not returning headers, and the body of the
|
||
response will be the HTML in the file <em>404.html</em>. You’ll need to create a
|
||
<em>404.html</em> file next to <em>hello.html</em> for the error page; again feel free to use
|
||
any HTML you want or use the example HTML in Listing 20-8.</p>
|
||
<p><span class="filename">Filename: 404.html</span></p>
|
||
<pre><code class="language-html"><!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<title>Hello!</title>
|
||
</head>
|
||
<body>
|
||
<h1>Oops!</h1>
|
||
<p>Sorry, I don't know what you're asking for.</p>
|
||
</body>
|
||
</html>
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-8: Sample content for the page to send back
|
||
with any 404 response</span></p>
|
||
<p>With these changes, run your server again. Requesting <em>127.0.0.1:7878</em>
|
||
should return the contents of <em>hello.html</em>, and any other request, like
|
||
<em>127.0.0.1:7878/foo</em>, should return the error HTML from <em>404.html</em>.</p>
|
||
<h3><a class="header" href="#a-touch-of-refactoring" id="a-touch-of-refactoring">A Touch of Refactoring</a></h3>
|
||
<p>At the moment the <code>if</code> and <code>else</code> blocks have a lot of repetition: they’re both
|
||
reading files and writing the contents of the files to the stream. The only
|
||
differences are the status line and the filename. Let’s make the code more
|
||
concise by pulling out those differences into separate <code>if</code> and <code>else</code> lines
|
||
that will assign the values of the status line and the filename to variables;
|
||
we can then use those variables unconditionally in the code to read the file
|
||
and write the response. Listing 20-9 shows the resulting code after replacing
|
||
the large <code>if</code> and <code>else</code> blocks.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">use std::fs;
|
||
</span>// --snip--
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
<span class="boring"> let mut buffer = [0; 512];
|
||
</span><span class="boring"> stream.read(&mut buffer).unwrap();
|
||
</span><span class="boring">
|
||
</span><span class="boring"> let get = b"GET / HTTP/1.1\r\n";
|
||
</span> // --snip--
|
||
|
||
let (status_line, filename) = if buffer.starts_with(get) {
|
||
("HTTP/1.1 200 OK\r\n\r\n", "hello.html")
|
||
} else {
|
||
("HTTP/1.1 404 NOT FOUND\r\n\r\n", "404.html")
|
||
};
|
||
|
||
let contents = fs::read_to_string(filename).unwrap();
|
||
|
||
let response = format!("{}{}", status_line, contents);
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-9: Refactoring the <code>if</code> and <code>else</code> blocks to
|
||
contain only the code that differs between the two cases</span></p>
|
||
<p>Now the <code>if</code> and <code>else</code> blocks only return the appropriate values for the
|
||
status line and filename in a tuple; we then use destructuring to assign these
|
||
two values to <code>status_line</code> and <code>filename</code> using a pattern in the <code>let</code>
|
||
statement, as discussed in Chapter 18.</p>
|
||
<p>The previously duplicated code is now outside the <code>if</code> and <code>else</code> blocks and
|
||
uses the <code>status_line</code> and <code>filename</code> variables. This makes it easier to see
|
||
the difference between the two cases, and it means we have only one place to
|
||
update the code if we want to change how the file reading and response writing
|
||
work. The behavior of the code in Listing 20-9 will be the same as that in
|
||
Listing 20-8.</p>
|
||
<p>Awesome! We now have a simple web server in approximately 40 lines of Rust code
|
||
that responds to one request with a page of content and responds to all other
|
||
requests with a 404 response.</p>
|
||
<p>Currently, our server runs in a single thread, meaning it can only serve one
|
||
request at a time. Let’s examine how that can be a problem by simulating some
|
||
slow requests. Then we’ll fix it so our server can handle multiple requests at
|
||
once.</p>
|
||
<h2><a class="header" href="#turning-our-single-threaded-server-into-a-multithreaded-server" id="turning-our-single-threaded-server-into-a-multithreaded-server">Turning Our Single-Threaded Server into a Multithreaded Server</a></h2>
|
||
<p>Right now, the server will process each request in turn, meaning it won’t
|
||
process a second connection until the first is finished processing. If the
|
||
server received more and more requests, this serial execution would be less and
|
||
less optimal. If the server receives a request that takes a long time to
|
||
process, subsequent requests will have to wait until the long request is
|
||
finished, even if the new requests can be processed quickly. We’ll need to fix
|
||
this, but first, we’ll look at the problem in action.</p>
|
||
<h3><a class="header" href="#simulating-a-slow-request-in-the-current-server-implementation" id="simulating-a-slow-request-in-the-current-server-implementation">Simulating a Slow Request in the Current Server Implementation</a></h3>
|
||
<p>We’ll look at how a slow-processing request can affect other requests made to
|
||
our current server implementation. Listing 20-10 implements handling a request
|
||
to <em>/sleep</em> with a simulated slow response that will cause the server to sleep
|
||
for 5 seconds before responding.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::thread;
|
||
use std::time::Duration;
|
||
<span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">use std::fs::File;
|
||
</span>// --snip--
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
<span class="boring"> let mut buffer = [0; 512];
|
||
</span><span class="boring"> stream.read(&mut buffer).unwrap();
|
||
</span> // --snip--
|
||
|
||
let get = b"GET / HTTP/1.1\r\n";
|
||
let sleep = b"GET /sleep HTTP/1.1\r\n";
|
||
|
||
let (status_line, filename) = if buffer.starts_with(get) {
|
||
("HTTP/1.1 200 OK\r\n\r\n", "hello.html")
|
||
} else if buffer.starts_with(sleep) {
|
||
thread::sleep(Duration::from_secs(5));
|
||
("HTTP/1.1 200 OK\r\n\r\n", "hello.html")
|
||
} else {
|
||
("HTTP/1.1 404 NOT FOUND\r\n\r\n", "404.html")
|
||
};
|
||
|
||
// --snip--
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-10: Simulating a slow request by recognizing
|
||
<em>/sleep</em> and sleeping for 5 seconds</span></p>
|
||
<p>This code is a bit messy, but it’s good enough for simulation purposes. We
|
||
created a second request <code>sleep</code>, whose data our server recognizes. We added an
|
||
<code>else if</code> after the <code>if</code> block to check for the request to <em>/sleep</em>. When that
|
||
request is received, the server will sleep for 5 seconds before rendering the
|
||
successful HTML page.</p>
|
||
<p>You can see how primitive our server is: real libraries would handle the
|
||
recognition of multiple requests in a much less verbose way!</p>
|
||
<p>Start the server using <code>cargo run</code>. Then open two browser windows: one for
|
||
<em>http://127.0.0.1:7878/</em> and the other for <em>http://127.0.0.1:7878/sleep</em>. If
|
||
you enter the <em>/</em> URI a few times, as before, you’ll see it respond quickly.
|
||
But if you enter <em>/sleep</em> and then load <em>/</em>, you’ll see that <em>/</em> waits until
|
||
<code>sleep</code> has slept for its full 5 seconds before loading.</p>
|
||
<p>There are multiple ways we could change how our web server works to avoid
|
||
having more requests back up behind a slow request; the one we’ll implement is
|
||
a thread pool.</p>
|
||
<h3><a class="header" href="#improving-throughput-with-a-thread-pool" id="improving-throughput-with-a-thread-pool">Improving Throughput with a Thread Pool</a></h3>
|
||
<p>A <em>thread pool</em> is a group of spawned threads that are waiting and ready to
|
||
handle a task. When the program receives a new task, it assigns one of the
|
||
threads in the pool to the task, and that thread will process the task. The
|
||
remaining threads in the pool are available to handle any other tasks that come
|
||
in while the first thread is processing. When the first thread is done
|
||
processing its task, it’s returned to the pool of idle threads, ready to handle
|
||
a new task. A thread pool allows you to process connections concurrently,
|
||
increasing the throughput of your server.</p>
|
||
<p>We’ll limit the number of threads in the pool to a small number to protect us
|
||
from Denial of Service (DoS) attacks; if we had our program create a new thread
|
||
for each request as it came in, someone making 10 million requests to our
|
||
server could create havoc by using up all our server’s resources and grinding
|
||
the processing of requests to a halt.</p>
|
||
<p>Rather than spawning unlimited threads, we’ll have a fixed number of threads
|
||
waiting in the pool. As requests come in, they’ll be sent to the pool for
|
||
processing. The pool will maintain a queue of incoming requests. Each of the
|
||
threads in the pool will pop off a request from this queue, handle the request,
|
||
and then ask the queue for another request. With this design, we can process
|
||
<code>N</code> requests concurrently, where <code>N</code> is the number of threads. If each thread
|
||
is responding to a long-running request, subsequent requests can still back up
|
||
in the queue, but we’ve increased the number of long-running requests we can
|
||
handle before reaching that point.</p>
|
||
<p>This technique is just one of many ways to improve the throughput of a web
|
||
server. Other options you might explore are the fork/join model and the
|
||
single-threaded async I/O model. If you’re interested in this topic, you can
|
||
read more about other solutions and try to implement them in Rust; with a
|
||
low-level language like Rust, all of these options are possible.</p>
|
||
<p>Before we begin implementing a thread pool, let’s talk about what using the
|
||
pool should look like. When you’re trying to design code, writing the client
|
||
interface first can help guide your design. Write the API of the code so it’s
|
||
structured in the way you want to call it; then implement the functionality
|
||
within that structure rather than implementing the functionality and then
|
||
designing the public API.</p>
|
||
<p>Similar to how we used test-driven development in the project in Chapter 12,
|
||
we’ll use compiler-driven development here. We’ll write the code that calls the
|
||
functions we want, and then we’ll look at errors from the compiler to determine
|
||
what we should change next to get the code to work.</p>
|
||
<h4><a class="header" href="#code-structure-if-we-could-spawn-a-thread-for-each-request" id="code-structure-if-we-could-spawn-a-thread-for-each-request">Code Structure If We Could Spawn a Thread for Each Request</a></h4>
|
||
<p>First, let’s explore how our code might look if it did create a new thread for
|
||
every connection. As mentioned earlier, this isn’t our final plan due to the
|
||
problems with potentially spawning an unlimited number of threads, but it is a
|
||
starting point. Listing 20-11 shows the changes to make to <code>main</code> to spawn a
|
||
new thread to handle each stream within the <code>for</code> loop.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust no_run"><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpListener;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
|
||
for stream in listener.incoming() {
|
||
let stream = stream.unwrap();
|
||
|
||
thread::spawn(|| {
|
||
handle_connection(stream);
|
||
});
|
||
}
|
||
}
|
||
<span class="boring">fn handle_connection(mut stream: TcpStream) {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-11: Spawning a new thread for each
|
||
stream</span></p>
|
||
<p>As you learned in Chapter 16, <code>thread::spawn</code> will create a new thread and then
|
||
run the code in the closure in the new thread. If you run this code and load
|
||
<em>/sleep</em> in your browser, then <em>/</em> in two more browser tabs, you’ll indeed see
|
||
that the requests to <em>/</em> don’t have to wait for <em>/sleep</em> to finish. But as we
|
||
mentioned, this will eventually overwhelm the system because you’d be making
|
||
new threads without any limit.</p>
|
||
<h4><a class="header" href="#creating-a-similar-interface-for-a-finite-number-of-threads" id="creating-a-similar-interface-for-a-finite-number-of-threads">Creating a Similar Interface for a Finite Number of Threads</a></h4>
|
||
<p>We want our thread pool to work in a similar, familiar way so switching from
|
||
threads to a thread pool doesn’t require large changes to the code that uses
|
||
our API. Listing 20-12 shows the hypothetical interface for a <code>ThreadPool</code>
|
||
struct we want to use instead of <code>thread::spawn</code>.</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust no_run"><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::io::prelude::*;
|
||
</span><span class="boring">use std::net::TcpListener;
|
||
</span><span class="boring">use std::net::TcpStream;
|
||
</span><span class="boring">struct ThreadPool;
|
||
</span><span class="boring">impl ThreadPool {
|
||
</span><span class="boring"> fn new(size: u32) -> ThreadPool { ThreadPool }
|
||
</span><span class="boring"> fn execute<F>(&self, f: F)
|
||
</span><span class="boring"> where F: FnOnce() + Send + 'static {}
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
let pool = ThreadPool::new(4);
|
||
|
||
for stream in listener.incoming() {
|
||
let stream = stream.unwrap();
|
||
|
||
pool.execute(|| {
|
||
handle_connection(stream);
|
||
});
|
||
}
|
||
}
|
||
<span class="boring">fn handle_connection(mut stream: TcpStream) {}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-12: Our ideal <code>ThreadPool</code> interface</span></p>
|
||
<p>We use <code>ThreadPool::new</code> to create a new thread pool with a configurable number
|
||
of threads, in this case four. Then, in the <code>for</code> loop, <code>pool.execute</code> has a
|
||
similar interface as <code>thread::spawn</code> in that it takes a closure the pool should
|
||
run for each stream. We need to implement <code>pool.execute</code> so it takes the
|
||
closure and gives it to a thread in the pool to run. This code won’t yet
|
||
compile, but we’ll try so the compiler can guide us in how to fix it.</p>
|
||
<h4><a class="header" href="#building-the-threadpool-struct-using-compiler-driven-development" id="building-the-threadpool-struct-using-compiler-driven-development">Building the <code>ThreadPool</code> Struct Using Compiler Driven Development</a></h4>
|
||
<p>Make the changes in Listing 20-12 to <em>src/main.rs</em>, and then let’s use the
|
||
compiler errors from <code>cargo check</code> to drive our development. Here is the first
|
||
error we get:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
error[E0433]: failed to resolve. Use of undeclared type or module `ThreadPool`
|
||
--> src\main.rs:10:16
|
||
|
|
||
10 | let pool = ThreadPool::new(4);
|
||
| ^^^^^^^^^^^^^^^ Use of undeclared type or module
|
||
`ThreadPool`
|
||
|
||
error: aborting due to previous error
|
||
</code></pre>
|
||
<p>Great! This error tells us we need a <code>ThreadPool</code> type or module, so we’ll
|
||
build one now. Our <code>ThreadPool</code> implementation will be independent of the kind
|
||
of work our web server is doing. So, let’s switch the <code>hello</code> crate from a
|
||
binary crate to a library crate to hold our <code>ThreadPool</code> implementation. After
|
||
we change to a library crate, we could also use the separate thread pool
|
||
library for any work we want to do using a thread pool, not just for serving
|
||
web requests.</p>
|
||
<p>Create a <em>src/lib.rs</em> that contains the following, which is the simplest
|
||
definition of a <code>ThreadPool</code> struct that we can have for now:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct ThreadPool;
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Then create a new directory, <em>src/bin</em>, and move the binary crate rooted in
|
||
<em>src/main.rs</em> into <em>src/bin/main.rs</em>. Doing so will make the library crate the
|
||
primary crate in the <em>hello</em> directory; we can still run the binary in
|
||
<em>src/bin/main.rs</em> using <code>cargo run</code>. After moving the <em>main.rs</em> file, edit it
|
||
to bring the library crate in and bring <code>ThreadPool</code> into scope by adding the
|
||
following code to the top of <em>src/bin/main.rs</em>:</p>
|
||
<p><span class="filename">Filename: src/bin/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use hello::ThreadPool;
|
||
</code></pre>
|
||
<p>This code still won’t work, but let’s check it again to get the next error that
|
||
we need to address:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
error[E0599]: no function or associated item named `new` found for type
|
||
`hello::ThreadPool` in the current scope
|
||
--> src/bin/main.rs:13:16
|
||
|
|
||
13 | let pool = ThreadPool::new(4);
|
||
| ^^^^^^^^^^^^^^^ function or associated item not found in
|
||
`hello::ThreadPool`
|
||
</code></pre>
|
||
<p>This error indicates that next we need to create an associated function named
|
||
<code>new</code> for <code>ThreadPool</code>. We also know that <code>new</code> needs to have one parameter
|
||
that can accept <code>4</code> as an argument and should return a <code>ThreadPool</code> instance.
|
||
Let’s implement the simplest <code>new</code> function that will have those
|
||
characteristics:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>pub struct ThreadPool;
|
||
|
||
impl ThreadPool {
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
ThreadPool
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We chose <code>usize</code> as the type of the <code>size</code> parameter, because we know that a
|
||
negative number of threads doesn’t make any sense. We also know we’ll use this
|
||
4 as the number of elements in a collection of threads, which is what the
|
||
<code>usize</code> type is for, as discussed in the <a href="ch03-02-data-types.html#integer-types">“Integer Types”</a><!--
|
||
ignore --> section of Chapter 3.</p>
|
||
<p>Let’s check the code again:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
warning: unused variable: `size`
|
||
--> src/lib.rs:4:16
|
||
|
|
||
4 | pub fn new(size: usize) -> ThreadPool {
|
||
| ^^^^
|
||
|
|
||
= note: #[warn(unused_variables)] on by default
|
||
= note: to avoid this warning, consider using `_size` instead
|
||
|
||
error[E0599]: no method named `execute` found for type `hello::ThreadPool` in the current scope
|
||
--> src/bin/main.rs:18:14
|
||
|
|
||
18 | pool.execute(|| {
|
||
| ^^^^^^^
|
||
</code></pre>
|
||
<p>Now we get a warning and an error. Ignoring the warning for a moment, the error
|
||
occurs because we don’t have an <code>execute</code> method on <code>ThreadPool</code>. Recall from
|
||
the <a href="ch20-02-multithreaded.html#creating-a-similar-interface-for-a-finite-number-of-threads">“Creating a Similar Interface for a Finite Number of
|
||
Threads”</a><!--
|
||
ignore --> section that we decided our thread pool should have an interface
|
||
similar to <code>thread::spawn</code>. In addition, we’ll implement the <code>execute</code> function
|
||
so it takes the closure it’s given and gives it to an idle thread in the pool
|
||
to run.</p>
|
||
<p>We’ll define the <code>execute</code> method on <code>ThreadPool</code> to take a closure as a
|
||
parameter. Recall from the <a href="ch13-01-closures.html#storing-closures-using-generic-parameters-and-the-fn-traits">“Storing Closures Using Generic Parameters and the
|
||
<code>Fn</code> Traits”</a><!--
|
||
ignore --> section in Chapter 13 that we can take closures as parameters with
|
||
three different traits: <code>Fn</code>, <code>FnMut</code>, and <code>FnOnce</code>. We need to decide which
|
||
kind of closure to use here. We know we’ll end up doing something similar to
|
||
the standard library <code>thread::spawn</code> implementation, so we can look at what
|
||
bounds the signature of <code>thread::spawn</code> has on its parameter. The documentation
|
||
shows us the following:</p>
|
||
<pre><code class="language-rust ignore">pub fn spawn<F, T>(f: F) -> JoinHandle<T>
|
||
where
|
||
F: FnOnce() -> T + Send + 'static,
|
||
T: Send + 'static
|
||
</code></pre>
|
||
<p>The <code>F</code> type parameter is the one we’re concerned with here; the <code>T</code> type
|
||
parameter is related to the return value, and we’re not concerned with that. We
|
||
can see that <code>spawn</code> uses <code>FnOnce</code> as the trait bound on <code>F</code>. This is probably
|
||
what we want as well, because we’ll eventually pass the argument we get in
|
||
<code>execute</code> to <code>spawn</code>. We can be further confident that <code>FnOnce</code> is the trait we
|
||
want to use because the thread for running a request will only execute that
|
||
request’s closure one time, which matches the <code>Once</code> in <code>FnOnce</code>.</p>
|
||
<p>The <code>F</code> type parameter also has the trait bound <code>Send</code> and the lifetime bound
|
||
<code>'static</code>, which are useful in our situation: we need <code>Send</code> to transfer the
|
||
closure from one thread to another and <code>'static</code> because we don’t know how long
|
||
the thread will take to execute. Let’s create an <code>execute</code> method on
|
||
<code>ThreadPool</code> that will take a generic parameter of type <code>F</code> with these bounds:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct ThreadPool;
|
||
</span>impl ThreadPool {
|
||
// --snip--
|
||
|
||
pub fn execute<F>(&self, f: F)
|
||
where
|
||
F: FnOnce() + Send + 'static
|
||
{
|
||
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We still use the <code>()</code> after <code>FnOnce</code> because this <code>FnOnce</code> represents a closure
|
||
that takes no parameters and returns the unit type <code>()</code>. Just like function
|
||
definitions, the return type can be omitted from the signature, but even if we
|
||
have no parameters, we still need the parentheses.</p>
|
||
<p>Again, this is the simplest implementation of the <code>execute</code> method: it does
|
||
nothing, but we’re trying only to make our code compile. Let’s check it again:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
warning: unused variable: `size`
|
||
--> src/lib.rs:4:16
|
||
|
|
||
4 | pub fn new(size: usize) -> ThreadPool {
|
||
| ^^^^
|
||
|
|
||
= note: #[warn(unused_variables)] on by default
|
||
= note: to avoid this warning, consider using `_size` instead
|
||
|
||
warning: unused variable: `f`
|
||
--> src/lib.rs:8:30
|
||
|
|
||
8 | pub fn execute<F>(&self, f: F)
|
||
| ^
|
||
|
|
||
= note: to avoid this warning, consider using `_f` instead
|
||
</code></pre>
|
||
<p>We’re receiving only warnings now, which means it compiles! But note that if
|
||
you try <code>cargo run</code> and make a request in the browser, you’ll see the errors in
|
||
the browser that we saw at the beginning of the chapter. Our library isn’t
|
||
actually calling the closure passed to <code>execute</code> yet!</p>
|
||
<blockquote>
|
||
<p>Note: A saying you might hear about languages with strict compilers, such as
|
||
Haskell and Rust, is “if the code compiles, it works.” But this saying is not
|
||
universally true. Our project compiles, but it does absolutely nothing! If we
|
||
were building a real, complete project, this would be a good time to start
|
||
writing unit tests to check that the code compiles <em>and</em> has the behavior we
|
||
want.</p>
|
||
</blockquote>
|
||
<h4><a class="header" href="#validating-the-number-of-threads-in-new" id="validating-the-number-of-threads-in-new">Validating the Number of Threads in <code>new</code></a></h4>
|
||
<p>We’ll continue to get warnings because we aren’t doing anything with the
|
||
parameters to <code>new</code> and <code>execute</code>. Let’s implement the bodies of these
|
||
functions with the behavior we want. To start, let’s think about <code>new</code>. Earlier
|
||
we chose an unsigned type for the <code>size</code> parameter, because a pool with a
|
||
negative number of threads makes no sense. However, a pool with zero threads
|
||
also makes no sense, yet zero is a perfectly valid <code>usize</code>. We’ll add code to
|
||
check that <code>size</code> is greater than zero before we return a <code>ThreadPool</code> instance
|
||
and have the program panic if it receives a zero by using the <code>assert!</code> macro,
|
||
as shown in Listing 20-13.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">pub struct ThreadPool;
|
||
</span>impl ThreadPool {
|
||
/// Create a new ThreadPool.
|
||
///
|
||
/// The size is the number of threads in the pool.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// The `new` function will panic if the size is zero.
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
ThreadPool
|
||
}
|
||
|
||
// --snip--
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-13: Implementing <code>ThreadPool::new</code> to panic if
|
||
<code>size</code> is zero</span></p>
|
||
<p>We’ve added some documentation for our <code>ThreadPool</code> with doc comments. Note
|
||
that we followed good documentation practices by adding a section that calls
|
||
out the situations in which our function can panic, as discussed in Chapter 14.
|
||
Try running <code>cargo doc --open</code> and clicking the <code>ThreadPool</code> struct to see what
|
||
the generated docs for <code>new</code> look like!</p>
|
||
<p>Instead of adding the <code>assert!</code> macro as we’ve done here, we could make <code>new</code>
|
||
return a <code>Result</code> like we did with <code>Config::new</code> in the I/O project in Listing
|
||
12-9. But we’ve decided in this case that trying to create a thread pool
|
||
without any threads should be an unrecoverable error. If you’re feeling
|
||
ambitious, try to write a version of <code>new</code> with the following signature to
|
||
compare both versions:</p>
|
||
<pre><code class="language-rust ignore">pub fn new(size: usize) -> Result<ThreadPool, PoolCreationError> {
|
||
</code></pre>
|
||
<h4><a class="header" href="#creating-space-to-store-the-threads" id="creating-space-to-store-the-threads">Creating Space to Store the Threads</a></h4>
|
||
<p>Now that we have a way to know we have a valid number of threads to store in
|
||
the pool, we can create those threads and store them in the <code>ThreadPool</code> struct
|
||
before returning it. But how do we “store” a thread? Let’s take another look at
|
||
the <code>thread::spawn</code> signature:</p>
|
||
<pre><code class="language-rust ignore">pub fn spawn<F, T>(f: F) -> JoinHandle<T>
|
||
where
|
||
F: FnOnce() -> T + Send + 'static,
|
||
T: Send + 'static
|
||
</code></pre>
|
||
<p>The <code>spawn</code> function returns a <code>JoinHandle<T></code>, where <code>T</code> is the type that the
|
||
closure returns. Let’s try using <code>JoinHandle</code> too and see what happens. In our
|
||
case, the closures we’re passing to the thread pool will handle the connection
|
||
and not return anything, so <code>T</code> will be the unit type <code>()</code>.</p>
|
||
<p>The code in Listing 20-14 will compile but doesn’t create any threads yet.
|
||
We’ve changed the definition of <code>ThreadPool</code> to hold a vector of
|
||
<code>thread::JoinHandle<()></code> instances, initialized the vector with a capacity of
|
||
<code>size</code>, set up a <code>for</code> loop that will run some code to create the threads, and
|
||
returned a <code>ThreadPool</code> instance containing them.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore not_desired_behavior">use std::thread;
|
||
|
||
pub struct ThreadPool {
|
||
threads: Vec<thread::JoinHandle<()>>,
|
||
}
|
||
|
||
impl ThreadPool {
|
||
// --snip--
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let mut threads = Vec::with_capacity(size);
|
||
|
||
for _ in 0..size {
|
||
// create some threads and store them in the vector
|
||
}
|
||
|
||
ThreadPool {
|
||
threads
|
||
}
|
||
}
|
||
|
||
// --snip--
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-14: Creating a vector for <code>ThreadPool</code> to hold
|
||
the threads</span></p>
|
||
<p>We’ve brought <code>std::thread</code> into scope in the library crate, because we’re
|
||
using <code>thread::JoinHandle</code> as the type of the items in the vector in
|
||
<code>ThreadPool</code>.</p>
|
||
<p>Once a valid size is received, our <code>ThreadPool</code> creates a new vector that can
|
||
hold <code>size</code> items. We haven’t used the <code>with_capacity</code> function in this book
|
||
yet, which performs the same task as <code>Vec::new</code> but with an important
|
||
difference: it preallocates space in the vector. Because we know we need to
|
||
store <code>size</code> elements in the vector, doing this allocation up front is slightly
|
||
more efficient than using <code>Vec::new</code>, which resizes itself as elements are
|
||
inserted.</p>
|
||
<p>When you run <code>cargo check</code> again, you’ll get a few more warnings, but it should
|
||
succeed.</p>
|
||
<h4><a class="header" href="#a-worker-struct-responsible-for-sending-code-from-the-threadpool-to-a-thread" id="a-worker-struct-responsible-for-sending-code-from-the-threadpool-to-a-thread">A <code>Worker</code> Struct Responsible for Sending Code from the <code>ThreadPool</code> to a Thread</a></h4>
|
||
<p>We left a comment in the <code>for</code> loop in Listing 20-14 regarding the creation of
|
||
threads. Here, we’ll look at how we actually create threads. The standard
|
||
library provides <code>thread::spawn</code> as a way to create threads, and
|
||
<code>thread::spawn</code> expects to get some code the thread should run as soon as the
|
||
thread is created. However, in our case, we want to create the threads and have
|
||
them <em>wait</em> for code that we’ll send later. The standard library’s
|
||
implementation of threads doesn’t include any way to do that; we have to
|
||
implement it manually.</p>
|
||
<p>We’ll implement this behavior by introducing a new data structure between the
|
||
<code>ThreadPool</code> and the threads that will manage this new behavior. We’ll call
|
||
this data structure <code>Worker</code>, which is a common term in pooling
|
||
implementations. Think of people working in the kitchen at a restaurant: the
|
||
workers wait until orders come in from customers, and then they’re responsible
|
||
for taking those orders and filling them.</p>
|
||
<p>Instead of storing a vector of <code>JoinHandle<()></code> instances in the thread pool,
|
||
we’ll store instances of the <code>Worker</code> struct. Each <code>Worker</code> will store a single
|
||
<code>JoinHandle<()></code> instance. Then we’ll implement a method on <code>Worker</code> that will
|
||
take a closure of code to run and send it to the already running thread for
|
||
execution. We’ll also give each worker an <code>id</code> so we can distinguish between
|
||
the different workers in the pool when logging or debugging.</p>
|
||
<p>Let’s make the following changes to what happens when we create a <code>ThreadPool</code>.
|
||
We’ll implement the code that sends the closure to the thread after we have
|
||
<code>Worker</code> set up in this way:</p>
|
||
<ol>
|
||
<li>Define a <code>Worker</code> struct that holds an <code>id</code> and a <code>JoinHandle<()></code>.</li>
|
||
<li>Change <code>ThreadPool</code> to hold a vector of <code>Worker</code> instances.</li>
|
||
<li>Define a <code>Worker::new</code> function that takes an <code>id</code> number and returns a
|
||
<code>Worker</code> instance that holds the <code>id</code> and a thread spawned with an empty
|
||
closure.</li>
|
||
<li>In <code>ThreadPool::new</code>, use the <code>for</code> loop counter to generate an <code>id</code>, create
|
||
a new <code>Worker</code> with that <code>id</code>, and store the worker in the vector.</li>
|
||
</ol>
|
||
<p>If you’re up for a challenge, try implementing these changes on your own before
|
||
looking at the code in Listing 20-15.</p>
|
||
<p>Ready? Here is Listing 20-15 with one way to make the preceding modifications.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::thread;
|
||
|
||
pub struct ThreadPool {
|
||
workers: Vec<Worker>,
|
||
}
|
||
|
||
impl ThreadPool {
|
||
// --snip--
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let mut workers = Vec::with_capacity(size);
|
||
|
||
for id in 0..size {
|
||
workers.push(Worker::new(id));
|
||
}
|
||
|
||
ThreadPool {
|
||
workers
|
||
}
|
||
}
|
||
// --snip--
|
||
}
|
||
|
||
struct Worker {
|
||
id: usize,
|
||
thread: thread::JoinHandle<()>,
|
||
}
|
||
|
||
impl Worker {
|
||
fn new(id: usize) -> Worker {
|
||
let thread = thread::spawn(|| {});
|
||
|
||
Worker {
|
||
id,
|
||
thread,
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-15: Modifying <code>ThreadPool</code> to hold <code>Worker</code>
|
||
instances instead of holding threads directly</span></p>
|
||
<p>We’ve changed the name of the field on <code>ThreadPool</code> from <code>threads</code> to <code>workers</code>
|
||
because it’s now holding <code>Worker</code> instances instead of <code>JoinHandle<()></code>
|
||
instances. We use the counter in the <code>for</code> loop as an argument to
|
||
<code>Worker::new</code>, and we store each new <code>Worker</code> in the vector named <code>workers</code>.</p>
|
||
<p>External code (like our server in <em>src/bin/main.rs</em>) doesn’t need to know the
|
||
implementation details regarding using a <code>Worker</code> struct within <code>ThreadPool</code>,
|
||
so we make the <code>Worker</code> struct and its <code>new</code> function private. The
|
||
<code>Worker::new</code> function uses the <code>id</code> we give it and stores a <code>JoinHandle<()></code>
|
||
instance that is created by spawning a new thread using an empty closure.</p>
|
||
<p>This code will compile and will store the number of <code>Worker</code> instances we
|
||
specified as an argument to <code>ThreadPool::new</code>. But we’re <em>still</em> not processing
|
||
the closure that we get in <code>execute</code>. Let’s look at how to do that next.</p>
|
||
<h4><a class="header" href="#sending-requests-to-threads-via-channels" id="sending-requests-to-threads-via-channels">Sending Requests to Threads via Channels</a></h4>
|
||
<p>Now we’ll tackle the problem that the closures given to <code>thread::spawn</code> do
|
||
absolutely nothing. Currently, we get the closure we want to execute in the
|
||
<code>execute</code> method. But we need to give <code>thread::spawn</code> a closure to run when we
|
||
create each <code>Worker</code> during the creation of the <code>ThreadPool</code>.</p>
|
||
<p>We want the <code>Worker</code> structs that we just created to fetch code to run from a
|
||
queue held in the <code>ThreadPool</code> and send that code to its thread to run.</p>
|
||
<p>In Chapter 16, you learned about <em>channels</em>—a simple way to communicate between
|
||
two threads—that would be perfect for this use case. We’ll use a channel to
|
||
function as the queue of jobs, and <code>execute</code> will send a job from the
|
||
<code>ThreadPool</code> to the <code>Worker</code> instances, which will send the job to its thread.
|
||
Here is the plan:</p>
|
||
<ol>
|
||
<li>The <code>ThreadPool</code> will create a channel and hold on to the sending side of
|
||
the channel.</li>
|
||
<li>Each <code>Worker</code> will hold on to the receiving side of the channel.</li>
|
||
<li>We’ll create a new <code>Job</code> struct that will hold the closures we want to send
|
||
down the channel.</li>
|
||
<li>The <code>execute</code> method will send the job it wants to execute down the sending
|
||
side of the channel.</li>
|
||
<li>In its thread, the <code>Worker</code> will loop over its receiving side of the channel
|
||
and execute the closures of any jobs it receives.</li>
|
||
</ol>
|
||
<p>Let’s start by creating a channel in <code>ThreadPool::new</code> and holding the sending
|
||
side in the <code>ThreadPool</code> instance, as shown in Listing 20-16. The <code>Job</code> struct
|
||
doesn’t hold anything for now but will be the type of item we’re sending down
|
||
the channel.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span>// --snip--
|
||
use std::sync::mpsc;
|
||
|
||
pub struct ThreadPool {
|
||
workers: Vec<Worker>,
|
||
sender: mpsc::Sender<Job>,
|
||
}
|
||
|
||
struct Job;
|
||
|
||
impl ThreadPool {
|
||
// --snip--
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let (sender, receiver) = mpsc::channel();
|
||
|
||
let mut workers = Vec::with_capacity(size);
|
||
|
||
for id in 0..size {
|
||
workers.push(Worker::new(id));
|
||
}
|
||
|
||
ThreadPool {
|
||
workers,
|
||
sender,
|
||
}
|
||
}
|
||
// --snip--
|
||
}
|
||
<span class="boring">
|
||
</span><span class="boring">struct Worker {
|
||
</span><span class="boring"> id: usize,
|
||
</span><span class="boring"> thread: thread::JoinHandle<()>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span><span class="boring">impl Worker {
|
||
</span><span class="boring"> fn new(id: usize) -> Worker {
|
||
</span><span class="boring"> let thread = thread::spawn(|| {});
|
||
</span><span class="boring">
|
||
</span><span class="boring"> Worker {
|
||
</span><span class="boring"> id,
|
||
</span><span class="boring"> thread,
|
||
</span><span class="boring"> }
|
||
</span><span class="boring"> }
|
||
</span><span class="boring">}
|
||
</span><span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-16: Modifying <code>ThreadPool</code> to store the
|
||
sending end of a channel that sends <code>Job</code> instances</span></p>
|
||
<p>In <code>ThreadPool::new</code>, we create our new channel and have the pool hold the
|
||
sending end. This will successfully compile, still with warnings.</p>
|
||
<p>Let’s try passing a receiving end of the channel into each worker as the thread
|
||
pool creates the channel. We know we want to use the receiving end in the
|
||
thread that the workers spawn, so we’ll reference the <code>receiver</code> parameter in
|
||
the closure. The code in Listing 20-17 won’t quite compile yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">impl ThreadPool {
|
||
// --snip--
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let (sender, receiver) = mpsc::channel();
|
||
|
||
let mut workers = Vec::with_capacity(size);
|
||
|
||
for id in 0..size {
|
||
workers.push(Worker::new(id, receiver));
|
||
}
|
||
|
||
ThreadPool {
|
||
workers,
|
||
sender,
|
||
}
|
||
}
|
||
// --snip--
|
||
}
|
||
|
||
// --snip--
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: mpsc::Receiver<Job>) -> Worker {
|
||
let thread = thread::spawn(|| {
|
||
receiver;
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread,
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-17: Passing the receiving end of the channel
|
||
to the workers</span></p>
|
||
<p>We’ve made some small and straightforward changes: we pass the receiving end of
|
||
the channel into <code>Worker::new</code>, and then we use it inside the closure.</p>
|
||
<p>When we try to check this code, we get this error:</p>
|
||
<pre><code class="language-text">$ cargo check
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
error[E0382]: use of moved value: `receiver`
|
||
--> src/lib.rs:27:42
|
||
|
|
||
27 | workers.push(Worker::new(id, receiver));
|
||
| ^^^^^^^^ value moved here in
|
||
previous iteration of loop
|
||
|
|
||
= note: move occurs because `receiver` has type
|
||
`std::sync::mpsc::Receiver<Job>`, which does not implement the `Copy` trait
|
||
</code></pre>
|
||
<p>The code is trying to pass <code>receiver</code> to multiple <code>Worker</code> instances. This
|
||
won’t work, as you’ll recall from Chapter 16: the channel implementation that
|
||
Rust provides is multiple <em>producer</em>, single <em>consumer</em>. This means we can’t
|
||
just clone the consuming end of the channel to fix this code. Even if we could,
|
||
that is not the technique we would want to use; instead, we want to distribute
|
||
the jobs across threads by sharing the single <code>receiver</code> among all the workers.</p>
|
||
<p>Additionally, taking a job off the channel queue involves mutating the
|
||
<code>receiver</code>, so the threads need a safe way to share and modify <code>receiver</code>;
|
||
otherwise, we might get race conditions (as covered in Chapter 16).</p>
|
||
<p>Recall the thread-safe smart pointers discussed in Chapter 16: to share
|
||
ownership across multiple threads and allow the threads to mutate the value, we
|
||
need to use <code>Arc<Mutex<T>></code>. The <code>Arc</code> type will let multiple workers own the
|
||
receiver, and <code>Mutex</code> will ensure that only one worker gets a job from the
|
||
receiver at a time. Listing 20-18 shows the changes we need to make.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span><span class="boring">use std::sync::mpsc;
|
||
</span>use std::sync::Arc;
|
||
use std::sync::Mutex;
|
||
// --snip--
|
||
|
||
<span class="boring">pub struct ThreadPool {
|
||
</span><span class="boring"> workers: Vec<Worker>,
|
||
</span><span class="boring"> sender: mpsc::Sender<Job>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">struct Job;
|
||
</span><span class="boring">
|
||
</span>impl ThreadPool {
|
||
// --snip--
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let (sender, receiver) = mpsc::channel();
|
||
|
||
let receiver = Arc::new(Mutex::new(receiver));
|
||
|
||
let mut workers = Vec::with_capacity(size);
|
||
|
||
for id in 0..size {
|
||
workers.push(Worker::new(id, Arc::clone(&receiver)));
|
||
}
|
||
|
||
ThreadPool {
|
||
workers,
|
||
sender,
|
||
}
|
||
}
|
||
|
||
// --snip--
|
||
}
|
||
|
||
<span class="boring">struct Worker {
|
||
</span><span class="boring"> id: usize,
|
||
</span><span class="boring"> thread: thread::JoinHandle<()>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">
|
||
</span>impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Job>>>) -> Worker {
|
||
// --snip--
|
||
<span class="boring"> let thread = thread::spawn(|| {
|
||
</span><span class="boring"> receiver;
|
||
</span><span class="boring"> });
|
||
</span><span class="boring">
|
||
</span><span class="boring"> Worker {
|
||
</span><span class="boring"> id,
|
||
</span><span class="boring"> thread,
|
||
</span><span class="boring"> }
|
||
</span> }
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-18: Sharing the receiving end of the channel
|
||
among the workers using <code>Arc</code> and <code>Mutex</code></span></p>
|
||
<p>In <code>ThreadPool::new</code>, we put the receiving end of the channel in an <code>Arc</code> and a
|
||
<code>Mutex</code>. For each new worker, we clone the <code>Arc</code> to bump the reference count so
|
||
the workers can share ownership of the receiving end.</p>
|
||
<p>With these changes, the code compiles! We’re getting there!</p>
|
||
<h4><a class="header" href="#implementing-the-execute-method" id="implementing-the-execute-method">Implementing the <code>execute</code> Method</a></h4>
|
||
<p>Let’s finally implement the <code>execute</code> method on <code>ThreadPool</code>. We’ll also change
|
||
<code>Job</code> from a struct to a type alias for a trait object that holds the type of
|
||
closure that <code>execute</code> receives. As discussed in the <a href="ch19-04-advanced-types.html#creating-type-synonyms-with-type-aliases">“Creating Type Synonyms
|
||
with Type Aliases”</a><!-- ignore -->
|
||
section of Chapter 19, type aliases allow us to make long types shorter. Look
|
||
at Listing 20-19.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>// --snip--
|
||
<span class="boring">pub struct ThreadPool {
|
||
</span><span class="boring"> workers: Vec<Worker>,
|
||
</span><span class="boring"> sender: mpsc::Sender<Job>,
|
||
</span><span class="boring">}
|
||
</span><span class="boring">use std::sync::mpsc;
|
||
</span><span class="boring">struct Worker {}
|
||
</span>
|
||
type Job = Box<dyn FnOnce() + Send + 'static>;
|
||
|
||
impl ThreadPool {
|
||
// --snip--
|
||
|
||
pub fn execute<F>(&self, f: F)
|
||
where
|
||
F: FnOnce() + Send + 'static
|
||
{
|
||
let job = Box::new(f);
|
||
|
||
self.sender.send(job).unwrap();
|
||
}
|
||
}
|
||
|
||
// --snip--
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p><span class="caption">Listing 20-19: Creating a <code>Job</code> type alias for a <code>Box</code>
|
||
that holds each closure and then sending the job down the channel</span></p>
|
||
<p>After creating a new <code>Job</code> instance using the closure we get in <code>execute</code>, we
|
||
send that job down the sending end of the channel. We’re calling <code>unwrap</code> on
|
||
<code>send</code> for the case that sending fails. This might happen if, for example, we
|
||
stop all our threads from executing, meaning the receiving end has stopped
|
||
receiving new messages. At the moment, we can’t stop our threads from
|
||
executing: our threads continue executing as long as the pool exists. The
|
||
reason we use <code>unwrap</code> is that we know the failure case won’t happen, but the
|
||
compiler doesn’t know that.</p>
|
||
<p>But we’re not quite done yet! In the worker, our closure being passed to
|
||
<code>thread::spawn</code> still only <em>references</em> the receiving end of the channel.
|
||
Instead, we need the closure to loop forever, asking the receiving end of the
|
||
channel for a job and running the job when it gets one. Let’s make the change
|
||
shown in Listing 20-20 to <code>Worker::new</code>.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">// --snip--
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Job>>>) -> Worker {
|
||
let thread = thread::spawn(move || {
|
||
loop {
|
||
let job = receiver.lock().unwrap().recv().unwrap();
|
||
|
||
println!("Worker {} got a job; executing.", id);
|
||
|
||
(*job)();
|
||
}
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread,
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-20: Receiving and executing the jobs in the
|
||
worker’s thread</span></p>
|
||
<p>Here, we first call <code>lock</code> on the <code>receiver</code> to acquire the mutex, and then we
|
||
call <code>unwrap</code> to panic on any errors. Acquiring a lock might fail if the mutex
|
||
is in a <em>poisoned</em> state, which can happen if some other thread panicked while
|
||
holding the lock rather than releasing the lock. In this situation, calling
|
||
<code>unwrap</code> to have this thread panic is the correct action to take. Feel free to
|
||
change this <code>unwrap</code> to an <code>expect</code> with an error message that is meaningful to
|
||
you.</p>
|
||
<p>If we get the lock on the mutex, we call <code>recv</code> to receive a <code>Job</code> from the
|
||
channel. A final <code>unwrap</code> moves past any errors here as well, which might occur
|
||
if the thread holding the sending side of the channel has shut down, similar to
|
||
how the <code>send</code> method returns <code>Err</code> if the receiving side shuts down.</p>
|
||
<p>The call to <code>recv</code> blocks, so if there is no job yet, the current thread will
|
||
wait until a job becomes available. The <code>Mutex<T></code> ensures that only one
|
||
<code>Worker</code> thread at a time is trying to request a job.</p>
|
||
<p>Theoretically, this code should compile. Unfortunately, the Rust compiler isn’t
|
||
perfect yet, and we get this error:</p>
|
||
<pre><code class="language-text">error[E0161]: cannot move a value of type std::ops::FnOnce() +
|
||
std::marker::Send: the size of std::ops::FnOnce() + std::marker::Send cannot be
|
||
statically determined
|
||
--> src/lib.rs:63:17
|
||
|
|
||
63 | (*job)();
|
||
| ^^^^^^
|
||
</code></pre>
|
||
<p>This error is fairly cryptic because the problem is fairly cryptic. To call a
|
||
<code>FnOnce</code> closure that is stored in a <code>Box<T></code> (which is what our <code>Job</code> type
|
||
alias is), the closure needs to move itself <em>out</em> of the <code>Box<T></code> because the
|
||
closure takes ownership of <code>self</code> when we call it. In general, Rust doesn’t
|
||
allow us to move a value out of a <code>Box<T></code> because Rust doesn’t know how big
|
||
the value inside the <code>Box<T></code> will be: recall in Chapter 15 that we used
|
||
<code>Box<T></code> precisely because we had something of an unknown size that we wanted
|
||
to store in a <code>Box<T></code> to get a value of a known size.</p>
|
||
<p>As you saw in Listing 17-15, we can write methods that use the syntax <code>self: Box<Self></code>, which allows the method to take ownership of a <code>Self</code> value stored
|
||
in a <code>Box<T></code>. That’s exactly what we want to do here, but unfortunately Rust
|
||
won’t let us: the part of Rust that implements behavior when a closure is
|
||
called isn’t implemented using <code>self: Box<Self></code>. So Rust doesn’t yet
|
||
understand that it could use <code>self: Box<Self></code> in this situation to take
|
||
ownership of the closure and move the closure out of the <code>Box<T></code>.</p>
|
||
<p>Rust is still a work in progress with places where the compiler could be
|
||
improved, but in the future, the code in Listing 20-20 should work just fine.
|
||
People just like you are working to fix this and other issues! After you’ve
|
||
finished this book, we would love for you to join in.</p>
|
||
<p>But for now, let’s work around this problem using a handy trick. We can tell
|
||
Rust explicitly that in this case we can take ownership of the value inside the
|
||
<code>Box<T></code> using <code>self: Box<Self></code>; then, once we have ownership of the closure,
|
||
we can call it. This involves defining a new trait <code>FnBox</code> with the method
|
||
<code>call_box</code> that will use <code>self: Box<Self></code> in its signature, defining <code>FnBox</code>
|
||
for any type that implements <code>FnOnce()</code>, changing our type alias to use the new
|
||
trait, and changing <code>Worker</code> to use the <code>call_box</code> method. These changes are
|
||
shown in Listing 20-21.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">trait FnBox {
|
||
fn call_box(self: Box<Self>);
|
||
}
|
||
|
||
impl<F: FnOnce()> FnBox for F {
|
||
fn call_box(self: Box<F>) {
|
||
(*self)()
|
||
}
|
||
}
|
||
|
||
type Job = Box<dyn FnBox + Send + 'static>;
|
||
|
||
// --snip--
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Job>>>) -> Worker {
|
||
let thread = thread::spawn(move || {
|
||
loop {
|
||
let job = receiver.lock().unwrap().recv().unwrap();
|
||
|
||
println!("Worker {} got a job; executing.", id);
|
||
|
||
job.call_box();
|
||
}
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread,
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-21: Adding a new trait <code>FnBox</code> to work around
|
||
the current limitations of <code>Box<FnOnce()></code></span></p>
|
||
<p>First, we create a new trait named <code>FnBox</code>. This trait has the one method
|
||
<code>call_box</code>, which is similar to the <code>call</code> methods on the other <code>Fn*</code> traits
|
||
except that it takes <code>self: Box<Self></code> to take ownership of <code>self</code> and move the
|
||
value out of the <code>Box<T></code>.</p>
|
||
<p>Next, we implement the <code>FnBox</code> trait for any type <code>F</code> that implements the
|
||
<code>FnOnce()</code> trait. Effectively, this means that any <code>FnOnce()</code> closures can use
|
||
our <code>call_box</code> method. The implementation of <code>call_box</code> uses <code>(*self)()</code> to
|
||
move the closure out of the <code>Box<T></code> and call the closure.</p>
|
||
<p>We now need our <code>Job</code> type alias to be a <code>Box</code> of anything that implements our
|
||
new trait <code>FnBox</code>. This will allow us to use <code>call_box</code> in <code>Worker</code> when we get
|
||
a <code>Job</code> value instead of invoking the closure directly. Implementing the
|
||
<code>FnBox</code> trait for any <code>FnOnce()</code> closure means we don’t have to change anything
|
||
about the actual values we’re sending down the channel. Now Rust is able to
|
||
recognize that what we want to do is fine.</p>
|
||
<p>This trick is very sneaky and complicated. Don’t worry if it doesn’t make
|
||
perfect sense; someday, it will be completely unnecessary.</p>
|
||
<p>With the implementation of this trick, our thread pool is in a working state!
|
||
Give it a <code>cargo run</code> and make some requests:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
warning: field is never used: `workers`
|
||
--> src/lib.rs:7:5
|
||
|
|
||
7 | workers: Vec<Worker>,
|
||
| ^^^^^^^^^^^^^^^^^^^^
|
||
|
|
||
= note: #[warn(dead_code)] on by default
|
||
|
||
warning: field is never used: `id`
|
||
--> src/lib.rs:61:5
|
||
|
|
||
61 | id: usize,
|
||
| ^^^^^^^^^
|
||
|
|
||
= note: #[warn(dead_code)] on by default
|
||
|
||
warning: field is never used: `thread`
|
||
--> src/lib.rs:62:5
|
||
|
|
||
62 | thread: thread::JoinHandle<()>,
|
||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
|
|
||
= note: #[warn(dead_code)] on by default
|
||
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.99 secs
|
||
Running `target/debug/hello`
|
||
Worker 0 got a job; executing.
|
||
Worker 2 got a job; executing.
|
||
Worker 1 got a job; executing.
|
||
Worker 3 got a job; executing.
|
||
Worker 0 got a job; executing.
|
||
Worker 2 got a job; executing.
|
||
Worker 1 got a job; executing.
|
||
Worker 3 got a job; executing.
|
||
Worker 0 got a job; executing.
|
||
Worker 2 got a job; executing.
|
||
</code></pre>
|
||
<p>Success! We now have a thread pool that executes connections asynchronously.
|
||
There are never more than four threads created, so our system won’t get
|
||
overloaded if the server receives a lot of requests. If we make a request to
|
||
<em>/sleep</em>, the server will be able to serve other requests by having another
|
||
thread run them.</p>
|
||
<blockquote>
|
||
<p>Note: if you open <em>/sleep</em> in multiple browser windows simultaneously, they
|
||
might load one at a time in 5 second intervals. Some web browsers execute
|
||
multiple instances of the same request sequentially for caching reasons. This
|
||
limitation is not caused by our web server.</p>
|
||
</blockquote>
|
||
<p>After learning about the <code>while let</code> loop in Chapter 18, you might be wondering
|
||
why we didn’t write the worker thread code as shown in Listing 20-22.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore not_desired_behavior">// --snip--
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Job>>>) -> Worker {
|
||
let thread = thread::spawn(move || {
|
||
while let Ok(job) = receiver.lock().unwrap().recv() {
|
||
println!("Worker {} got a job; executing.", id);
|
||
|
||
job.call_box();
|
||
}
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread,
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-22: An alternative implementation of
|
||
<code>Worker::new</code> using <code>while let</code></span></p>
|
||
<p>This code compiles and runs but doesn’t result in the desired threading
|
||
behavior: a slow request will still cause other requests to wait to be
|
||
processed. The reason is somewhat subtle: the <code>Mutex</code> struct has no public
|
||
<code>unlock</code> method because the ownership of the lock is based on the lifetime of
|
||
the <code>MutexGuard<T></code> within the <code>LockResult<MutexGuard<T>></code> that the <code>lock</code>
|
||
method returns. At compile time, the borrow checker can then enforce the rule
|
||
that a resource guarded by a <code>Mutex</code> cannot be accessed unless we hold the
|
||
lock. But this implementation can also result in the lock being held longer
|
||
than intended if we don’t think carefully about the lifetime of the
|
||
<code>MutexGuard<T></code>. Because the values in the <code>while</code> expression remain in scope
|
||
for the duration of the block, the lock remains held for the duration of the
|
||
call to <code>job.call_box()</code>, meaning other workers cannot receive jobs.</p>
|
||
<p>By using <code>loop</code> instead and acquiring the lock and a job within the block
|
||
rather than outside it, the <code>MutexGuard</code> returned from the <code>lock</code> method is
|
||
dropped as soon as the <code>let job</code> statement ends. This ensures that the lock is
|
||
held during the call to <code>recv</code>, but it is released before the call to
|
||
<code>job.call_box()</code>, allowing multiple requests to be serviced concurrently.</p>
|
||
<h2><a class="header" href="#graceful-shutdown-and-cleanup" id="graceful-shutdown-and-cleanup">Graceful Shutdown and Cleanup</a></h2>
|
||
<p>The code in Listing 20-21 is responding to requests asynchronously through the
|
||
use of a thread pool, as we intended. We get some warnings about the <code>workers</code>,
|
||
<code>id</code>, and <code>thread</code> fields that we’re not using in a direct way that reminds us
|
||
we’re not cleaning up anything. When we use the less elegant <span
|
||
class="keystroke">ctrl-c</span> method to halt the main thread, all other
|
||
threads are stopped immediately as well, even if they’re in the middle of
|
||
serving a request.</p>
|
||
<p>Now we’ll implement the <code>Drop</code> trait to call <code>join</code> on each of the threads in
|
||
the pool so they can finish the requests they’re working on before closing.
|
||
Then we’ll implement a way to tell the threads they should stop accepting new
|
||
requests and shut down. To see this code in action, we’ll modify our server to
|
||
accept only two requests before gracefully shutting down its thread pool.</p>
|
||
<h3><a class="header" href="#implementing-the-drop-trait-on-threadpool" id="implementing-the-drop-trait-on-threadpool">Implementing the <code>Drop</code> Trait on <code>ThreadPool</code></a></h3>
|
||
<p>Let’s start with implementing <code>Drop</code> on our thread pool. When the pool is
|
||
dropped, our threads should all join to make sure they finish their work.
|
||
Listing 20-23 shows a first attempt at a <code>Drop</code> implementation; this code won’t
|
||
quite work yet.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">impl Drop for ThreadPool {
|
||
fn drop(&mut self) {
|
||
for worker in &mut self.workers {
|
||
println!("Shutting down worker {}", worker.id);
|
||
|
||
worker.thread.join().unwrap();
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-23: Joining each thread when the thread pool
|
||
goes out of scope</span></p>
|
||
<p>First, we loop through each of the thread pool <code>workers</code>. We use <code>&mut</code> for
|
||
this because <code>self</code> is a mutable reference, and we also need to be able to
|
||
mutate <code>worker</code>. For each worker, we print a message saying that this
|
||
particular worker is shutting down, and then we call <code>join</code> on that worker’s
|
||
thread. If the call to <code>join</code> fails, we use <code>unwrap</code> to make Rust panic and go
|
||
into an ungraceful shutdown.</p>
|
||
<p>Here is the error we get when we compile this code:</p>
|
||
<pre><code class="language-text">error[E0507]: cannot move out of borrowed content
|
||
--> src/lib.rs:65:13
|
||
|
|
||
65 | worker.thread.join().unwrap();
|
||
| ^^^^^^ cannot move out of borrowed content
|
||
</code></pre>
|
||
<p>The error tells us we can’t call <code>join</code> because we only have a mutable borrow
|
||
of each <code>worker</code> and <code>join</code> takes ownership of its argument. To solve this
|
||
issue, we need to move the thread out of the <code>Worker</code> instance that owns
|
||
<code>thread</code> so <code>join</code> can consume the thread. We did this in Listing 17-15: if
|
||
<code>Worker</code> holds an <code>Option<thread::JoinHandle<()>></code> instead, we can call the
|
||
<code>take</code> method on the <code>Option</code> to move the value out of the <code>Some</code> variant and
|
||
leave a <code>None</code> variant in its place. In other words, a <code>Worker</code> that is running
|
||
will have a <code>Some</code> variant in <code>thread</code>, and when we want to clean up a
|
||
<code>Worker</code>, we’ll replace <code>Some</code> with <code>None</code> so the <code>Worker</code> doesn’t have a
|
||
thread to run.</p>
|
||
<p>So we know we want to update the definition of <code>Worker</code> like this:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">use std::thread;
|
||
</span>struct Worker {
|
||
id: usize,
|
||
thread: Option<thread::JoinHandle<()>>,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>Now let’s lean on the compiler to find the other places that need to change.
|
||
Checking this code, we get two errors:</p>
|
||
<pre><code class="language-text">error[E0599]: no method named `join` found for type
|
||
`std::option::Option<std::thread::JoinHandle<()>>` in the current scope
|
||
--> src/lib.rs:65:27
|
||
|
|
||
65 | worker.thread.join().unwrap();
|
||
| ^^^^
|
||
|
||
error[E0308]: mismatched types
|
||
--> src/lib.rs:89:13
|
||
|
|
||
89 | thread,
|
||
| ^^^^^^
|
||
| |
|
||
| expected enum `std::option::Option`, found struct
|
||
`std::thread::JoinHandle`
|
||
| help: try using a variant of the expected type: `Some(thread)`
|
||
|
|
||
= note: expected type `std::option::Option<std::thread::JoinHandle<()>>`
|
||
found type `std::thread::JoinHandle<_>`
|
||
</code></pre>
|
||
<p>Let’s address the second error, which points to the code at the end of
|
||
<code>Worker::new</code>; we need to wrap the <code>thread</code> value in <code>Some</code> when we create a
|
||
new <code>Worker</code>. Make the following changes to fix this error:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Job>>>) -> Worker {
|
||
// --snip--
|
||
|
||
Worker {
|
||
id,
|
||
thread: Some(thread),
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>The first error is in our <code>Drop</code> implementation. We mentioned earlier that we
|
||
intended to call <code>take</code> on the <code>Option</code> value to move <code>thread</code> out of <code>worker</code>.
|
||
The following changes will do so:</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Drop for ThreadPool {
|
||
fn drop(&mut self) {
|
||
for worker in &mut self.workers {
|
||
println!("Shutting down worker {}", worker.id);
|
||
|
||
if let Some(thread) = worker.thread.take() {
|
||
thread.join().unwrap();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p>As discussed in Chapter 17, the <code>take</code> method on <code>Option</code> takes the <code>Some</code>
|
||
variant out and leaves <code>None</code> in its place. We’re using <code>if let</code> to destructure
|
||
the <code>Some</code> and get the thread; then we call <code>join</code> on the thread. If a worker’s
|
||
thread is already <code>None</code>, we know that worker has already had its thread
|
||
cleaned up, so nothing happens in that case.</p>
|
||
<h3><a class="header" href="#signaling-to-the-threads-to-stop-listening-for-jobs" id="signaling-to-the-threads-to-stop-listening-for-jobs">Signaling to the Threads to Stop Listening for Jobs</a></h3>
|
||
<p>With all the changes we’ve made, our code compiles without any warnings. But
|
||
the bad news is this code doesn’t function the way we want it to yet. The key
|
||
is the logic in the closures run by the threads of the <code>Worker</code> instances: at
|
||
the moment, we call <code>join</code>, but that won’t shut down the threads because they
|
||
<code>loop</code> forever looking for jobs. If we try to drop our <code>ThreadPool</code> with our
|
||
current implementation of <code>drop</code>, the main thread will block forever waiting
|
||
for the first thread to finish.</p>
|
||
<p>To fix this problem, we’ll modify the threads so they listen for either a <code>Job</code>
|
||
to run or a signal that they should stop listening and exit the infinite loop.
|
||
Instead of <code>Job</code> instances, our channel will send one of these two enum
|
||
variants.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span><span class="boring">struct Job;
|
||
</span>enum Message {
|
||
NewJob(Job),
|
||
Terminate,
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>This <code>Message</code> enum will either be a <code>NewJob</code> variant that holds the <code>Job</code> the
|
||
thread should run, or it will be a <code>Terminate</code> variant that will cause the
|
||
thread to exit its loop and stop.</p>
|
||
<p>We need to adjust the channel to use values of type <code>Message</code> rather than type
|
||
<code>Job</code>, as shown in Listing 20-24.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">pub struct ThreadPool {
|
||
workers: Vec<Worker>,
|
||
sender: mpsc::Sender<Message>,
|
||
}
|
||
|
||
// --snip--
|
||
|
||
impl ThreadPool {
|
||
// --snip--
|
||
|
||
pub fn execute<F>(&self, f: F)
|
||
where
|
||
F: FnOnce() + Send + 'static
|
||
{
|
||
let job = Box::new(f);
|
||
|
||
self.sender.send(Message::NewJob(job)).unwrap();
|
||
}
|
||
}
|
||
|
||
// --snip--
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Message>>>) ->
|
||
Worker {
|
||
|
||
let thread = thread::spawn(move ||{
|
||
loop {
|
||
let message = receiver.lock().unwrap().recv().unwrap();
|
||
|
||
match message {
|
||
Message::NewJob(job) => {
|
||
println!("Worker {} got a job; executing.", id);
|
||
|
||
job.call_box();
|
||
},
|
||
Message::Terminate => {
|
||
println!("Worker {} was told to terminate.", id);
|
||
|
||
break;
|
||
},
|
||
}
|
||
}
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread: Some(thread),
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-24: Sending and receiving <code>Message</code> values and
|
||
exiting the loop if a <code>Worker</code> receives <code>Message::Terminate</code></span></p>
|
||
<p>To incorporate the <code>Message</code> enum, we need to change <code>Job</code> to <code>Message</code> in two
|
||
places: the definition of <code>ThreadPool</code> and the signature of <code>Worker::new</code>. The
|
||
<code>execute</code> method of <code>ThreadPool</code> needs to send jobs wrapped in the
|
||
<code>Message::NewJob</code> variant. Then, in <code>Worker::new</code> where a <code>Message</code> is received
|
||
from the channel, the job will be processed if the <code>NewJob</code> variant is
|
||
received, and the thread will break out of the loop if the <code>Terminate</code> variant
|
||
is received.</p>
|
||
<p>With these changes, the code will compile and continue to function in the same
|
||
way as it did after Listing 20-21. But we’ll get a warning because we aren’t
|
||
creating any messages of the <code>Terminate</code> variety. Let’s fix this warning by
|
||
changing our <code>Drop</code> implementation to look like Listing 20-25.</p>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><code class="language-rust ignore">impl Drop for ThreadPool {
|
||
fn drop(&mut self) {
|
||
println!("Sending terminate message to all workers.");
|
||
|
||
for _ in &mut self.workers {
|
||
self.sender.send(Message::Terminate).unwrap();
|
||
}
|
||
|
||
println!("Shutting down all workers.");
|
||
|
||
for worker in &mut self.workers {
|
||
println!("Shutting down worker {}", worker.id);
|
||
|
||
if let Some(thread) = worker.thread.take() {
|
||
thread.join().unwrap();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-25: Sending <code>Message::Terminate</code> to the
|
||
workers before calling <code>join</code> on each worker thread</span></p>
|
||
<p>We’re now iterating over the workers twice: once to send one <code>Terminate</code>
|
||
message for each worker and once to call <code>join</code> on each worker’s thread. If we
|
||
tried to send a message and <code>join</code> immediately in the same loop, we couldn’t
|
||
guarantee that the worker in the current iteration would be the one to get the
|
||
message from the channel.</p>
|
||
<p>To better understand why we need two separate loops, imagine a scenario with
|
||
two workers. If we used a single loop to iterate through each worker, on the
|
||
first iteration a terminate message would be sent down the channel and <code>join</code>
|
||
called on the first worker’s thread. If that first worker was busy processing a
|
||
request at that moment, the second worker would pick up the terminate message
|
||
from the channel and shut down. We would be left waiting on the first worker to
|
||
shut down, but it never would because the second thread picked up the terminate
|
||
message. Deadlock!</p>
|
||
<p>To prevent this scenario, we first put all of our <code>Terminate</code> messages on the
|
||
channel in one loop; then we join on all the threads in another loop. Each
|
||
worker will stop receiving requests on the channel once it gets a terminate
|
||
message. So, we can be sure that if we send the same number of terminate
|
||
messages as there are workers, each worker will receive a terminate message
|
||
before <code>join</code> is called on its thread.</p>
|
||
<p>To see this code in action, let’s modify <code>main</code> to accept only two requests
|
||
before gracefully shutting down the server, as shown in Listing 20-26.</p>
|
||
<p><span class="filename">Filename: src/bin/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
let pool = ThreadPool::new(4);
|
||
|
||
for stream in listener.incoming().take(2) {
|
||
let stream = stream.unwrap();
|
||
|
||
pool.execute(|| {
|
||
handle_connection(stream);
|
||
});
|
||
}
|
||
|
||
println!("Shutting down.");
|
||
}
|
||
</code></pre>
|
||
<p><span class="caption">Listing 20-26: Shut down the server after serving two
|
||
requests by exiting the loop</span></p>
|
||
<p>You wouldn’t want a real-world web server to shut down after serving only two
|
||
requests. This code just demonstrates that the graceful shutdown and cleanup is
|
||
in working order.</p>
|
||
<p>The <code>take</code> method is defined in the <code>Iterator</code> trait and limits the iteration
|
||
to the first two items at most. The <code>ThreadPool</code> will go out of scope at the
|
||
end of <code>main</code>, and the <code>drop</code> implementation will run.</p>
|
||
<p>Start the server with <code>cargo run</code>, and make three requests. The third request
|
||
should error, and in your terminal you should see output similar to this:</p>
|
||
<pre><code class="language-text">$ cargo run
|
||
Compiling hello v0.1.0 (file:///projects/hello)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 1.0 secs
|
||
Running `target/debug/hello`
|
||
Worker 0 got a job; executing.
|
||
Worker 3 got a job; executing.
|
||
Shutting down.
|
||
Sending terminate message to all workers.
|
||
Shutting down all workers.
|
||
Shutting down worker 0
|
||
Worker 1 was told to terminate.
|
||
Worker 2 was told to terminate.
|
||
Worker 0 was told to terminate.
|
||
Worker 3 was told to terminate.
|
||
Shutting down worker 1
|
||
Shutting down worker 2
|
||
Shutting down worker 3
|
||
</code></pre>
|
||
<p>You might see a different ordering of workers and messages printed. We can see
|
||
how this code works from the messages: workers 0 and 3 got the first two
|
||
requests, and then on the third request, the server stopped accepting
|
||
connections. When the <code>ThreadPool</code> goes out of scope at the end of <code>main</code>, its
|
||
<code>Drop</code> implementation kicks in, and the pool tells all workers to terminate.
|
||
The workers each print a message when they see the terminate message, and then
|
||
the thread pool calls <code>join</code> to shut down each worker thread.</p>
|
||
<p>Notice one interesting aspect of this particular execution: the <code>ThreadPool</code>
|
||
sent the terminate messages down the channel, and before any worker received
|
||
the messages, we tried to join worker 0. Worker 0 had not yet received the
|
||
terminate message, so the main thread blocked waiting for worker 0 to finish.
|
||
In the meantime, each of the workers received the termination messages. When
|
||
worker 0 finished, the main thread waited for the rest of the workers to
|
||
finish. At that point, they had all received the termination message and were
|
||
able to shut down.</p>
|
||
<p>Congrats! We’ve now completed our project; we have a basic web server that uses
|
||
a thread pool to respond asynchronously. We’re able to perform a graceful
|
||
shutdown of the server, which cleans up all the threads in the pool.</p>
|
||
<p>Here’s the full code for reference:</p>
|
||
<p><span class="filename">Filename: src/bin/main.rs</span></p>
|
||
<pre><code class="language-rust ignore">use hello::ThreadPool;
|
||
|
||
use std::io::prelude::*;
|
||
use std::net::TcpListener;
|
||
use std::net::TcpStream;
|
||
use std::fs;
|
||
use std::thread;
|
||
use std::time::Duration;
|
||
|
||
fn main() {
|
||
let listener = TcpListener::bind("127.0.0.1:7878").unwrap();
|
||
let pool = ThreadPool::new(4);
|
||
|
||
for stream in listener.incoming().take(2) {
|
||
let stream = stream.unwrap();
|
||
|
||
pool.execute(|| {
|
||
handle_connection(stream);
|
||
});
|
||
}
|
||
|
||
println!("Shutting down.");
|
||
}
|
||
|
||
fn handle_connection(mut stream: TcpStream) {
|
||
let mut buffer = [0; 512];
|
||
stream.read(&mut buffer).unwrap();
|
||
|
||
let get = b"GET / HTTP/1.1\r\n";
|
||
let sleep = b"GET /sleep HTTP/1.1\r\n";
|
||
|
||
let (status_line, filename) = if buffer.starts_with(get) {
|
||
("HTTP/1.1 200 OK\r\n\r\n", "hello.html")
|
||
} else if buffer.starts_with(sleep) {
|
||
thread::sleep(Duration::from_secs(5));
|
||
("HTTP/1.1 200 OK\r\n\r\n", "hello.html")
|
||
} else {
|
||
("HTTP/1.1 404 NOT FOUND\r\n\r\n", "404.html")
|
||
};
|
||
|
||
let contents = fs::read_to_string(filename).unwrap();
|
||
|
||
let response = format!("{}{}", status_line, contents);
|
||
|
||
stream.write(response.as_bytes()).unwrap();
|
||
stream.flush().unwrap();
|
||
}
|
||
</code></pre>
|
||
<p><span class="filename">Filename: src/lib.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">
|
||
<span class="boring">#![allow(unused_variables)]
|
||
</span><span class="boring">fn main() {
|
||
</span>use std::thread;
|
||
use std::sync::mpsc;
|
||
use std::sync::Arc;
|
||
use std::sync::Mutex;
|
||
|
||
enum Message {
|
||
NewJob(Job),
|
||
Terminate,
|
||
}
|
||
|
||
pub struct ThreadPool {
|
||
workers: Vec<Worker>,
|
||
sender: mpsc::Sender<Message>,
|
||
}
|
||
|
||
trait FnBox {
|
||
fn call_box(self: Box<Self>);
|
||
}
|
||
|
||
impl<F: FnOnce()> FnBox for F {
|
||
fn call_box(self: Box<F>) {
|
||
(*self)()
|
||
}
|
||
}
|
||
|
||
type Job = Box<dyn FnBox + Send + 'static>;
|
||
|
||
impl ThreadPool {
|
||
/// Create a new ThreadPool.
|
||
///
|
||
/// The size is the number of threads in the pool.
|
||
///
|
||
/// # Panics
|
||
///
|
||
/// The `new` function will panic if the size is zero.
|
||
pub fn new(size: usize) -> ThreadPool {
|
||
assert!(size > 0);
|
||
|
||
let (sender, receiver) = mpsc::channel();
|
||
|
||
let receiver = Arc::new(Mutex::new(receiver));
|
||
|
||
let mut workers = Vec::with_capacity(size);
|
||
|
||
for id in 0..size {
|
||
workers.push(Worker::new(id, Arc::clone(&receiver)));
|
||
}
|
||
|
||
ThreadPool {
|
||
workers,
|
||
sender,
|
||
}
|
||
}
|
||
|
||
pub fn execute<F>(&self, f: F)
|
||
where
|
||
F: FnOnce() + Send + 'static
|
||
{
|
||
let job = Box::new(f);
|
||
|
||
self.sender.send(Message::NewJob(job)).unwrap();
|
||
}
|
||
}
|
||
|
||
impl Drop for ThreadPool {
|
||
fn drop(&mut self) {
|
||
println!("Sending terminate message to all workers.");
|
||
|
||
for _ in &mut self.workers {
|
||
self.sender.send(Message::Terminate).unwrap();
|
||
}
|
||
|
||
println!("Shutting down all workers.");
|
||
|
||
for worker in &mut self.workers {
|
||
println!("Shutting down worker {}", worker.id);
|
||
|
||
if let Some(thread) = worker.thread.take() {
|
||
thread.join().unwrap();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
struct Worker {
|
||
id: usize,
|
||
thread: Option<thread::JoinHandle<()>>,
|
||
}
|
||
|
||
impl Worker {
|
||
fn new(id: usize, receiver: Arc<Mutex<mpsc::Receiver<Message>>>) ->
|
||
Worker {
|
||
|
||
let thread = thread::spawn(move ||{
|
||
loop {
|
||
let message = receiver.lock().unwrap().recv().unwrap();
|
||
|
||
match message {
|
||
Message::NewJob(job) => {
|
||
println!("Worker {} got a job; executing.", id);
|
||
|
||
job.call_box();
|
||
},
|
||
Message::Terminate => {
|
||
println!("Worker {} was told to terminate.", id);
|
||
|
||
break;
|
||
},
|
||
}
|
||
}
|
||
});
|
||
|
||
Worker {
|
||
id,
|
||
thread: Some(thread),
|
||
}
|
||
}
|
||
}
|
||
<span class="boring">}
|
||
</span></code></pre></pre>
|
||
<p>We could do more here! If you want to continue enhancing this project, here are
|
||
some ideas:</p>
|
||
<ul>
|
||
<li>Add more documentation to <code>ThreadPool</code> and its public methods.</li>
|
||
<li>Add tests of the library’s functionality.</li>
|
||
<li>Change calls to <code>unwrap</code> to more robust error handling.</li>
|
||
<li>Use <code>ThreadPool</code> to perform some task other than serving web requests.</li>
|
||
<li>Find a thread pool crate on <a href="https://crates.io/">crates.io</a> and implement a
|
||
similar web server using the crate instead. Then compare its API and
|
||
robustness to the thread pool we implemented.</li>
|
||
</ul>
|
||
<h2><a class="header" href="#summary-19" id="summary-19">Summary</a></h2>
|
||
<p>Well done! You’ve made it to the end of the book! We want to thank you for
|
||
joining us on this tour of Rust. You’re now ready to implement your own Rust
|
||
projects and help with other peoples’ projects. Keep in mind that there is a
|
||
welcoming community of other Rustaceans who would love to help you with any
|
||
challenges you encounter on your Rust journey.</p>
|
||
<h1><a class="header" href="#appendix" id="appendix">Appendix</a></h1>
|
||
<p>The following sections contain reference material you may find useful in your
|
||
Rust journey.</p>
|
||
<h2><a class="header" href="#appendix-a-keywords" id="appendix-a-keywords">Appendix A: Keywords</a></h2>
|
||
<p>The following list contains keywords that are reserved for current or future
|
||
use by the Rust language. As such, they cannot be used as identifiers (except
|
||
as raw identifiers as we’ll discuss in the “<a href="appendix-01-keywords.html#raw-identifiers">Raw
|
||
Identifiers</a><!-- ignore -->” section), including names of
|
||
functions, variables, parameters, struct fields, modules, crates, constants,
|
||
macros, static values, attributes, types, traits, or lifetimes.</p>
|
||
<h3><a class="header" href="#keywords-currently-in-use" id="keywords-currently-in-use">Keywords Currently in Use</a></h3>
|
||
<p>The following keywords currently have the functionality described.</p>
|
||
<ul>
|
||
<li><code>as</code> - perform primitive casting, disambiguate the specific trait containing
|
||
an item, or rename items in <code>use</code> and <code>extern crate</code> statements</li>
|
||
<li><code>break</code> - exit a loop immediately</li>
|
||
<li><code>const</code> - define constant items or constant raw pointers</li>
|
||
<li><code>continue</code> - continue to the next loop iteration</li>
|
||
<li><code>crate</code> - link an external crate or a macro variable representing the crate in
|
||
which the macro is defined</li>
|
||
<li><code>dyn</code> - dynamic dispatch to a trait object</li>
|
||
<li><code>else</code> - fallback for <code>if</code> and <code>if let</code> control flow constructs</li>
|
||
<li><code>enum</code> - define an enumeration</li>
|
||
<li><code>extern</code> - link an external crate, function, or variable</li>
|
||
<li><code>false</code> - Boolean false literal</li>
|
||
<li><code>fn</code> - define a function or the function pointer type</li>
|
||
<li><code>for</code> - loop over items from an iterator, implement a trait, or specify a
|
||
higher-ranked lifetime</li>
|
||
<li><code>if</code> - branch based on the result of a conditional expression</li>
|
||
<li><code>impl</code> - implement inherent or trait functionality</li>
|
||
<li><code>in</code> - part of <code>for</code> loop syntax</li>
|
||
<li><code>let</code> - bind a variable</li>
|
||
<li><code>loop</code> - loop unconditionally</li>
|
||
<li><code>match</code> - match a value to patterns</li>
|
||
<li><code>mod</code> - define a module</li>
|
||
<li><code>move</code> - make a closure take ownership of all its captures</li>
|
||
<li><code>mut</code> - denote mutability in references, raw pointers, or pattern bindings</li>
|
||
<li><code>pub</code> - denote public visibility in struct fields, <code>impl</code> blocks, or modules</li>
|
||
<li><code>ref</code> - bind by reference</li>
|
||
<li><code>return</code> - return from function</li>
|
||
<li><code>Self</code> - a type alias for the type implementing a trait</li>
|
||
<li><code>self</code> - method subject or current module</li>
|
||
<li><code>static</code> - global variable or lifetime lasting the entire program execution</li>
|
||
<li><code>struct</code> - define a structure</li>
|
||
<li><code>super</code> - parent module of the current module</li>
|
||
<li><code>trait</code> - define a trait</li>
|
||
<li><code>true</code> - Boolean true literal</li>
|
||
<li><code>type</code> - define a type alias or associated type</li>
|
||
<li><code>unsafe</code> - denote unsafe code, functions, traits, or implementations</li>
|
||
<li><code>use</code> - bring symbols into scope</li>
|
||
<li><code>where</code> - denote clauses that constrain a type</li>
|
||
<li><code>while</code> - loop conditionally based on the result of an expression</li>
|
||
</ul>
|
||
<h3><a class="header" href="#keywords-reserved-for-future-use" id="keywords-reserved-for-future-use">Keywords Reserved for Future Use</a></h3>
|
||
<p>The following keywords do not have any functionality but are reserved by Rust
|
||
for potential future use.</p>
|
||
<ul>
|
||
<li><code>abstract</code></li>
|
||
<li><code>async</code></li>
|
||
<li><code>await</code></li>
|
||
<li><code>become</code></li>
|
||
<li><code>box</code></li>
|
||
<li><code>do</code></li>
|
||
<li><code>final</code></li>
|
||
<li><code>macro</code></li>
|
||
<li><code>override</code></li>
|
||
<li><code>priv</code></li>
|
||
<li><code>try</code></li>
|
||
<li><code>typeof</code></li>
|
||
<li><code>unsized</code></li>
|
||
<li><code>virtual</code></li>
|
||
<li><code>yield</code></li>
|
||
</ul>
|
||
<h3><a class="header" href="#raw-identifiers" id="raw-identifiers">Raw Identifiers</a></h3>
|
||
<p><em>Raw identifiers</em> are the syntax that lets you use keywords where they wouldn’t
|
||
normally be allowed. You use a raw identifier by prefixing a keyword with <code>r#</code>.</p>
|
||
<p>For example, <code>match</code> is a keyword. If you try to compile the following function
|
||
that uses <code>match</code> as its name:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><code class="language-rust ignore does_not_compile">fn match(needle: &str, haystack: &str) -> bool {
|
||
haystack.contains(needle)
|
||
}
|
||
</code></pre>
|
||
<p>you’ll get this error:</p>
|
||
<pre><code class="language-text">error: expected identifier, found keyword `match`
|
||
--> src/main.rs:4:4
|
||
|
|
||
4 | fn match(needle: &str, haystack: &str) -> bool {
|
||
| ^^^^^ expected identifier, found keyword
|
||
</code></pre>
|
||
<p>The error shows that you can’t use the keyword <code>match</code> as the function
|
||
identifier. To use <code>match</code> as a function name, you need to use the raw
|
||
identifier syntax, like this:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn r#match(needle: &str, haystack: &str) -> bool {
|
||
haystack.contains(needle)
|
||
}
|
||
|
||
fn main() {
|
||
assert!(r#match("foo", "foobar"));
|
||
}
|
||
</code></pre></pre>
|
||
<p>This code will compile without any errors. Note the <code>r#</code> prefix on the function
|
||
name in its definition as well as where the function is called in <code>main</code>.</p>
|
||
<p>Raw identifiers allow you to use any word you choose as an identifier, even if
|
||
that word happens to be a reserved keyword. In addition, raw identifiers allow
|
||
you to use libraries written in a different Rust edition than your crate uses.
|
||
For example, <code>try</code> isn’t a keyword in the 2015 edition but is in the 2018
|
||
edition. If you depend on a library that’s written using the 2015 edition and
|
||
has a <code>try</code> function, you’ll need to use the raw identifier syntax, <code>r#try</code> in
|
||
this case, to call that function from your 2018 edition code. See <a href="appendix-05-editions.html">Appendix
|
||
E</a><!-- ignore --> for more information on editions.</p>
|
||
<h2><a class="header" href="#appendix-b-operators-and-symbols" id="appendix-b-operators-and-symbols">Appendix B: Operators and Symbols</a></h2>
|
||
<p>This appendix contains a glossary of Rust’s syntax, including operators and
|
||
other symbols that appear by themselves or in the context of paths, generics,
|
||
trait bounds, macros, attributes, comments, tuples, and brackets.</p>
|
||
<h3><a class="header" href="#operators" id="operators">Operators</a></h3>
|
||
<p>Table B-1 contains the operators in Rust, an example of how the operator would
|
||
appear in context, a short explanation, and whether that operator is
|
||
overloadable. If an operator is overloadable, the relevant trait to use to
|
||
overload that operator is listed.</p>
|
||
<p><span class="caption">Table B-1: Operators</span></p>
|
||
<table><thead><tr><th>Operator</th><th>Example</th><th>Explanation</th><th>Overloadable?</th></tr></thead><tbody>
|
||
<tr><td><code>!</code></td><td><code>ident!(...)</code>, <code>ident!{...}</code>, <code>ident![...]</code></td><td>Macro expansion</td><td></td></tr>
|
||
<tr><td><code>!</code></td><td><code>!expr</code></td><td>Bitwise or logical complement</td><td><code>Not</code></td></tr>
|
||
<tr><td><code>!=</code></td><td><code>var != expr</code></td><td>Nonequality comparison</td><td><code>PartialEq</code></td></tr>
|
||
<tr><td><code>%</code></td><td><code>expr % expr</code></td><td>Arithmetic remainder</td><td><code>Rem</code></td></tr>
|
||
<tr><td><code>%=</code></td><td><code>var %= expr</code></td><td>Arithmetic remainder and assignment</td><td><code>RemAssign</code></td></tr>
|
||
<tr><td><code>&</code></td><td><code>&expr</code>, <code>&mut expr</code></td><td>Borrow</td><td></td></tr>
|
||
<tr><td><code>&</code></td><td><code>&type</code>, <code>&mut type</code>, <code>&'a type</code>, <code>&'a mut type</code></td><td>Borrowed pointer type</td><td></td></tr>
|
||
<tr><td><code>&</code></td><td><code>expr & expr</code></td><td>Bitwise AND</td><td><code>BitAnd</code></td></tr>
|
||
<tr><td><code>&=</code></td><td><code>var &= expr</code></td><td>Bitwise AND and assignment</td><td><code>BitAndAssign</code></td></tr>
|
||
<tr><td><code>&&</code></td><td><code>expr && expr</code></td><td>Logical AND</td><td></td></tr>
|
||
<tr><td><code>*</code></td><td><code>expr * expr</code></td><td>Arithmetic multiplication</td><td><code>Mul</code></td></tr>
|
||
<tr><td><code>*=</code></td><td><code>var *= expr</code></td><td>Arithmetic multiplication and assignment</td><td><code>MulAssign</code></td></tr>
|
||
<tr><td><code>*</code></td><td><code>*expr</code></td><td>Dereference</td><td></td></tr>
|
||
<tr><td><code>*</code></td><td><code>*const type</code>, <code>*mut type</code></td><td>Raw pointer</td><td></td></tr>
|
||
<tr><td><code>+</code></td><td><code>trait + trait</code>, <code>'a + trait</code></td><td>Compound type constraint</td><td></td></tr>
|
||
<tr><td><code>+</code></td><td><code>expr + expr</code></td><td>Arithmetic addition</td><td><code>Add</code></td></tr>
|
||
<tr><td><code>+=</code></td><td><code>var += expr</code></td><td>Arithmetic addition and assignment</td><td><code>AddAssign</code></td></tr>
|
||
<tr><td><code>,</code></td><td><code>expr, expr</code></td><td>Argument and element separator</td><td></td></tr>
|
||
<tr><td><code>-</code></td><td><code>- expr</code></td><td>Arithmetic negation</td><td><code>Neg</code></td></tr>
|
||
<tr><td><code>-</code></td><td><code>expr - expr</code></td><td>Arithmetic subtraction</td><td><code>Sub</code></td></tr>
|
||
<tr><td><code>-=</code></td><td><code>var -= expr</code></td><td>Arithmetic subtraction and assignment</td><td><code>SubAssign</code></td></tr>
|
||
<tr><td><code>-></code></td><td><code>fn(...) -> type</code>, <code>|...| -> type</code></td><td>Function and closure return type</td><td></td></tr>
|
||
<tr><td><code>.</code></td><td><code>expr.ident</code></td><td>Member access</td><td></td></tr>
|
||
<tr><td><code>..</code></td><td><code>..</code>, <code>expr..</code>, <code>..expr</code>, <code>expr..expr</code></td><td>Right-exclusive range literal</td><td></td></tr>
|
||
<tr><td><code>..=</code></td><td><code>..=expr</code>, <code>expr..=expr</code></td><td>Right-inclusive range literal</td><td></td></tr>
|
||
<tr><td><code>..</code></td><td><code>..expr</code></td><td>Struct literal update syntax</td><td></td></tr>
|
||
<tr><td><code>..</code></td><td><code>variant(x, ..)</code>, <code>struct_type { x, .. }</code></td><td>“And the rest” pattern binding</td><td></td></tr>
|
||
<tr><td><code>...</code></td><td><code>expr...expr</code></td><td>In a pattern: inclusive range pattern</td><td></td></tr>
|
||
<tr><td><code>/</code></td><td><code>expr / expr</code></td><td>Arithmetic division</td><td><code>Div</code></td></tr>
|
||
<tr><td><code>/=</code></td><td><code>var /= expr</code></td><td>Arithmetic division and assignment</td><td><code>DivAssign</code></td></tr>
|
||
<tr><td><code>:</code></td><td><code>pat: type</code>, <code>ident: type</code></td><td>Constraints</td><td></td></tr>
|
||
<tr><td><code>:</code></td><td><code>ident: expr</code></td><td>Struct field initializer</td><td></td></tr>
|
||
<tr><td><code>:</code></td><td><code>'a: loop {...}</code></td><td>Loop label</td><td></td></tr>
|
||
<tr><td><code>;</code></td><td><code>expr;</code></td><td>Statement and item terminator</td><td></td></tr>
|
||
<tr><td><code>;</code></td><td><code>[...; len]</code></td><td>Part of fixed-size array syntax</td><td></td></tr>
|
||
<tr><td><code><<</code></td><td><code>expr << expr</code></td><td>Left-shift</td><td><code>Shl</code></td></tr>
|
||
<tr><td><code><<=</code></td><td><code>var <<= expr</code></td><td>Left-shift and assignment</td><td><code>ShlAssign</code></td></tr>
|
||
<tr><td><code><</code></td><td><code>expr < expr</code></td><td>Less than comparison</td><td><code>PartialOrd</code></td></tr>
|
||
<tr><td><code><=</code></td><td><code>expr <= expr</code></td><td>Less than or equal to comparison</td><td><code>PartialOrd</code></td></tr>
|
||
<tr><td><code>=</code></td><td><code>var = expr</code>, <code>ident = type</code></td><td>Assignment/equivalence</td><td></td></tr>
|
||
<tr><td><code>==</code></td><td><code>expr == expr</code></td><td>Equality comparison</td><td><code>PartialEq</code></td></tr>
|
||
<tr><td><code>=></code></td><td><code>pat => expr</code></td><td>Part of match arm syntax</td><td></td></tr>
|
||
<tr><td><code>></code></td><td><code>expr > expr</code></td><td>Greater than comparison</td><td><code>PartialOrd</code></td></tr>
|
||
<tr><td><code>>=</code></td><td><code>expr >= expr</code></td><td>Greater than or equal to comparison</td><td><code>PartialOrd</code></td></tr>
|
||
<tr><td><code>>></code></td><td><code>expr >> expr</code></td><td>Right-shift</td><td><code>Shr</code></td></tr>
|
||
<tr><td><code>>>=</code></td><td><code>var >>= expr</code></td><td>Right-shift and assignment</td><td><code>ShrAssign</code></td></tr>
|
||
<tr><td><code>@</code></td><td><code>ident @ pat</code></td><td>Pattern binding</td><td></td></tr>
|
||
<tr><td><code>^</code></td><td><code>expr ^ expr</code></td><td>Bitwise exclusive OR</td><td><code>BitXor</code></td></tr>
|
||
<tr><td><code>^=</code></td><td><code>var ^= expr</code></td><td>Bitwise exclusive OR and assignment</td><td><code>BitXorAssign</code></td></tr>
|
||
<tr><td><code>|</code></td><td><code>pat | pat</code></td><td>Pattern alternatives</td><td></td></tr>
|
||
<tr><td><code>|</code></td><td><code>expr | expr</code></td><td>Bitwise OR</td><td><code>BitOr</code></td></tr>
|
||
<tr><td><code>|=</code></td><td><code>var |= expr</code></td><td>Bitwise OR and assignment</td><td><code>BitOrAssign</code></td></tr>
|
||
<tr><td><code>||</code></td><td><code>expr || expr</code></td><td>Logical OR</td><td></td></tr>
|
||
<tr><td><code>?</code></td><td><code>expr?</code></td><td>Error propagation</td><td></td></tr>
|
||
</tbody></table>
|
||
<h3><a class="header" href="#non-operator-symbols" id="non-operator-symbols">Non-operator Symbols</a></h3>
|
||
<p>The following list contains all non-letters that don’t function as operators;
|
||
that is, they don’t behave like a function or method call.</p>
|
||
<p>Table B-2 shows symbols that appear on their own and are valid in a variety of
|
||
locations.</p>
|
||
<p><span class="caption">Table B-2: Stand-Alone Syntax</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>'ident</code></td><td>Named lifetime or loop label</td></tr>
|
||
<tr><td><code>...u8</code>, <code>...i32</code>, <code>...f64</code>, <code>...usize</code>, etc.</td><td>Numeric literal of specific type</td></tr>
|
||
<tr><td><code>"..."</code></td><td>String literal</td></tr>
|
||
<tr><td><code>r"..."</code>, <code>r#"..."#</code>, <code>r##"..."##</code>, etc.</td><td>Raw string literal, escape characters not processed</td></tr>
|
||
<tr><td><code>b"..."</code></td><td>Byte string literal; constructs a <code>[u8]</code> instead of a string</td></tr>
|
||
<tr><td><code>br"..."</code>, <code>br#"..."#</code>, <code>br##"..."##</code>, etc.</td><td>Raw byte string literal, combination of raw and byte string literal</td></tr>
|
||
<tr><td><code>'...'</code></td><td>Character literal</td></tr>
|
||
<tr><td><code>b'...'</code></td><td>ASCII byte literal</td></tr>
|
||
<tr><td><code>|...| expr</code></td><td>Closure</td></tr>
|
||
<tr><td><code>!</code></td><td>Always empty bottom type for diverging functions</td></tr>
|
||
<tr><td><code>_</code></td><td>“Ignored” pattern binding; also used to make integer literals readable</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-3 shows symbols that appear in the context of a path through the module
|
||
hierarchy to an item.</p>
|
||
<p><span class="caption">Table B-3: Path-Related Syntax</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>ident::ident</code></td><td>Namespace path</td></tr>
|
||
<tr><td><code>::path</code></td><td>Path relative to the crate root (i.e., an explicitly absolute path)</td></tr>
|
||
<tr><td><code>self::path</code></td><td>Path relative to the current module (i.e., an explicitly relative path).</td></tr>
|
||
<tr><td><code>super::path</code></td><td>Path relative to the parent of the current module</td></tr>
|
||
<tr><td><code>type::ident</code>, <code><type as trait>::ident</code></td><td>Associated constants, functions, and types</td></tr>
|
||
<tr><td><code><type>::...</code></td><td>Associated item for a type that cannot be directly named (e.g., <code><&T>::...</code>, <code><[T]>::...</code>, etc.)</td></tr>
|
||
<tr><td><code>trait::method(...)</code></td><td>Disambiguating a method call by naming the trait that defines it</td></tr>
|
||
<tr><td><code>type::method(...)</code></td><td>Disambiguating a method call by naming the type for which it’s defined</td></tr>
|
||
<tr><td><code><type as trait>::method(...)</code></td><td>Disambiguating a method call by naming the trait and type</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-4 shows symbols that appear in the context of using generic type
|
||
parameters.</p>
|
||
<p><span class="caption">Table B-4: Generics</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>path<...></code></td><td>Specifies parameters to generic type in a type (e.g., <code>Vec<u8></code>)</td></tr>
|
||
<tr><td><code>path::<...></code>, <code>method::<...></code></td><td>Specifies parameters to generic type, function, or method in an expression; often referred to as turbofish (e.g., <code>"42".parse::<i32>()</code>)</td></tr>
|
||
<tr><td><code>fn ident<...> ...</code></td><td>Define generic function</td></tr>
|
||
<tr><td><code>struct ident<...> ...</code></td><td>Define generic structure</td></tr>
|
||
<tr><td><code>enum ident<...> ...</code></td><td>Define generic enumeration</td></tr>
|
||
<tr><td><code>impl<...> ...</code></td><td>Define generic implementation</td></tr>
|
||
<tr><td><code>for<...> type</code></td><td>Higher-ranked lifetime bounds</td></tr>
|
||
<tr><td><code>type<ident=type></code></td><td>A generic type where one or more associated types have specific assignments (e.g., <code>Iterator<Item=T></code>)</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-5 shows symbols that appear in the context of constraining generic type
|
||
parameters with trait bounds.</p>
|
||
<p><span class="caption">Table B-5: Trait Bound Constraints</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>T: U</code></td><td>Generic parameter <code>T</code> constrained to types that implement <code>U</code></td></tr>
|
||
<tr><td><code>T: 'a</code></td><td>Generic type <code>T</code> must outlive lifetime <code>'a</code> (meaning the type cannot transitively contain any references with lifetimes shorter than <code>'a</code>)</td></tr>
|
||
<tr><td><code>T : 'static</code></td><td>Generic type <code>T</code> contains no borrowed references other than <code>'static</code> ones</td></tr>
|
||
<tr><td><code>'b: 'a</code></td><td>Generic lifetime <code>'b</code> must outlive lifetime <code>'a</code></td></tr>
|
||
<tr><td><code>T: ?Sized</code></td><td>Allow generic type parameter to be a dynamically sized type</td></tr>
|
||
<tr><td><code>'a + trait</code>, <code>trait + trait</code></td><td>Compound type constraint</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-6 shows symbols that appear in the context of calling or defining
|
||
macros and specifying attributes on an item.</p>
|
||
<p><span class="caption">Table B-6: Macros and Attributes</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>#[meta]</code></td><td>Outer attribute</td></tr>
|
||
<tr><td><code>#![meta]</code></td><td>Inner attribute</td></tr>
|
||
<tr><td><code>$ident</code></td><td>Macro substitution</td></tr>
|
||
<tr><td><code>$ident:kind</code></td><td>Macro capture</td></tr>
|
||
<tr><td><code>$(…)…</code></td><td>Macro repetition</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-7 shows symbols that create comments.</p>
|
||
<p><span class="caption">Table B-7: Comments</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>//</code></td><td>Line comment</td></tr>
|
||
<tr><td><code>//!</code></td><td>Inner line doc comment</td></tr>
|
||
<tr><td><code>///</code></td><td>Outer line doc comment</td></tr>
|
||
<tr><td><code>/*...*/</code></td><td>Block comment</td></tr>
|
||
<tr><td><code>/*!...*/</code></td><td>Inner block doc comment</td></tr>
|
||
<tr><td><code>/**...*/</code></td><td>Outer block doc comment</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-8 shows symbols that appear in the context of using tuples.</p>
|
||
<p><span class="caption">Table B-8: Tuples</span></p>
|
||
<table><thead><tr><th>Symbol</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>()</code></td><td>Empty tuple (aka unit), both literal and type</td></tr>
|
||
<tr><td><code>(expr)</code></td><td>Parenthesized expression</td></tr>
|
||
<tr><td><code>(expr,)</code></td><td>Single-element tuple expression</td></tr>
|
||
<tr><td><code>(type,)</code></td><td>Single-element tuple type</td></tr>
|
||
<tr><td><code>(expr, ...)</code></td><td>Tuple expression</td></tr>
|
||
<tr><td><code>(type, ...)</code></td><td>Tuple type</td></tr>
|
||
<tr><td><code>expr(expr, ...)</code></td><td>Function call expression; also used to initialize tuple <code>struct</code>s and tuple <code>enum</code> variants</td></tr>
|
||
<tr><td><code>ident!(...)</code>, <code>ident!{...}</code>, <code>ident![...]</code></td><td>Macro invocation</td></tr>
|
||
<tr><td><code>expr.0</code>, <code>expr.1</code>, etc.</td><td>Tuple indexing</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-9 shows the contexts in which curly braces are used.</p>
|
||
<p><span class="caption">Table B-9: Curly Brackets</span></p>
|
||
<table><thead><tr><th>Context</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>{...}</code></td><td>Block expression</td></tr>
|
||
<tr><td><code>Type {...}</code></td><td><code>struct</code> literal</td></tr>
|
||
</tbody></table>
|
||
<p>Table B-10 shows the contexts in which square brackets are used.</p>
|
||
<p><span class="caption">Table B-10: Square Brackets</span></p>
|
||
<table><thead><tr><th>Context</th><th>Explanation</th></tr></thead><tbody>
|
||
<tr><td><code>[...]</code></td><td>Array literal</td></tr>
|
||
<tr><td><code>[expr; len]</code></td><td>Array literal containing <code>len</code> copies of <code>expr</code></td></tr>
|
||
<tr><td><code>[type; len]</code></td><td>Array type containing <code>len</code> instances of <code>type</code></td></tr>
|
||
<tr><td><code>expr[expr]</code></td><td>Collection indexing. Overloadable (<code>Index</code>, <code>IndexMut</code>)</td></tr>
|
||
<tr><td><code>expr[..]</code>, <code>expr[a..]</code>, <code>expr[..b]</code>, <code>expr[a..b]</code></td><td>Collection indexing pretending to be collection slicing, using <code>Range</code>, <code>RangeFrom</code>, <code>RangeTo</code>, or <code>RangeFull</code> as the “index”</td></tr>
|
||
</tbody></table>
|
||
<h2><a class="header" href="#appendix-c-derivable-traits" id="appendix-c-derivable-traits">Appendix C: Derivable Traits</a></h2>
|
||
<p>In various places in the book, we’ve discussed the <code>derive</code> attribute, which
|
||
you can apply to a struct or enum definition. The <code>derive</code> attribute generates
|
||
code that will implement a trait with its own default implementation on the
|
||
type you’ve annotated with the <code>derive</code> syntax.</p>
|
||
<p>In this appendix, we provide a reference of all the traits in the standard
|
||
library that you can use with <code>derive</code>. Each section covers:</p>
|
||
<ul>
|
||
<li>What operators and methods deriving this trait will enable</li>
|
||
<li>What the implementation of the trait provided by <code>derive</code> does</li>
|
||
<li>What implementing the trait signifies about the type</li>
|
||
<li>The conditions in which you’re allowed or not allowed to implement the trait</li>
|
||
<li>Examples of operations that require the trait</li>
|
||
</ul>
|
||
<p>If you want different behavior from that provided by the <code>derive</code> attribute,
|
||
consult the <a href="../std/index.html">standard library documentation</a><!-- ignore -->
|
||
for each trait for details of how to manually implement them.</p>
|
||
<p>The rest of the traits defined in the standard library can’t be implemented on
|
||
your types using <code>derive</code>. These traits don’t have sensible default behavior,
|
||
so it’s up to you to implement them in the way that makes sense for what you’re
|
||
trying to accomplish.</p>
|
||
<p>An example of a trait that can’t be derived is <code>Display</code>, which handles
|
||
formatting for end users. You should always consider the appropriate way to
|
||
display a type to an end user. What parts of the type should an end user be
|
||
allowed to see? What parts would they find relevant? What format of the data
|
||
would be most relevant to them? The Rust compiler doesn’t have this insight, so
|
||
it can’t provide appropriate default behavior for you.</p>
|
||
<p>The list of derivable traits provided in this appendix is not comprehensive:
|
||
libraries can implement <code>derive</code> for their own traits, making the list of
|
||
traits you can use <code>derive</code> with truly open-ended. Implementing <code>derive</code>
|
||
involves using a procedural macro, which is covered in the
|
||
<a href="ch19-06-macros.html#macros">“Macros”</a><!-- ignore --> section of Chapter 19.</p>
|
||
<h3><a class="header" href="#debug-for-programmer-output" id="debug-for-programmer-output"><code>Debug</code> for Programmer Output</a></h3>
|
||
<p>The <code>Debug</code> trait enables debug formatting in format strings, which you
|
||
indicate by adding <code>:?</code> within <code>{}</code> placeholders.</p>
|
||
<p>The <code>Debug</code> trait allows you to print instances of a type for debugging
|
||
purposes, so you and other programmers using your type can inspect an instance
|
||
at a particular point in a program’s execution.</p>
|
||
<p>The <code>Debug</code> trait is required, for example, in use of the <code>assert_eq!</code> macro.
|
||
This macro prints the values of instances given as arguments if the equality
|
||
assertion fails so programmers can see why the two instances weren’t equal.</p>
|
||
<h3><a class="header" href="#partialeq-and-eq-for-equality-comparisons" id="partialeq-and-eq-for-equality-comparisons"><code>PartialEq</code> and <code>Eq</code> for Equality Comparisons</a></h3>
|
||
<p>The <code>PartialEq</code> trait allows you to compare instances of a type to check for
|
||
equality and enables use of the <code>==</code> and <code>!=</code> operators.</p>
|
||
<p>Deriving <code>PartialEq</code> implements the <code>eq</code> method. When <code>PartialEq</code> is derived on
|
||
structs, two instances are equal only if <em>all</em> fields are equal, and the
|
||
instances are not equal if any fields are not equal. When derived on enums,
|
||
each variant is equal to itself and not equal to the other variants.</p>
|
||
<p>The <code>PartialEq</code> trait is required, for example, with the use of the
|
||
<code>assert_eq!</code> macro, which needs to be able to compare two instances of a type
|
||
for equality.</p>
|
||
<p>The <code>Eq</code> trait has no methods. Its purpose is to signal that for every value of
|
||
the annotated type, the value is equal to itself. The <code>Eq</code> trait can only be
|
||
applied to types that also implement <code>PartialEq</code>, although not all types that
|
||
implement <code>PartialEq</code> can implement <code>Eq</code>. One example of this is floating point
|
||
number types: the implementation of floating point numbers states that two
|
||
instances of the not-a-number (<code>NaN</code>) value are not equal to each other.</p>
|
||
<p>An example of when <code>Eq</code> is required is for keys in a <code>HashMap<K, V></code> so the
|
||
<code>HashMap<K, V></code> can tell whether two keys are the same.</p>
|
||
<h3><a class="header" href="#partialord-and-ord-for-ordering-comparisons" id="partialord-and-ord-for-ordering-comparisons"><code>PartialOrd</code> and <code>Ord</code> for Ordering Comparisons</a></h3>
|
||
<p>The <code>PartialOrd</code> trait allows you to compare instances of a type for sorting
|
||
purposes. A type that implements <code>PartialOrd</code> can be used with the <code><</code>, <code>></code>,
|
||
<code><=</code>, and <code>>=</code> operators. You can only apply the <code>PartialOrd</code> trait to types
|
||
that also implement <code>PartialEq</code>.</p>
|
||
<p>Deriving <code>PartialOrd</code> implements the <code>partial_cmp</code> method, which returns an
|
||
<code>Option<Ordering></code> that will be <code>None</code> when the values given don’t produce an
|
||
ordering. An example of a value that doesn’t produce an ordering, even though
|
||
most values of that type can be compared, is the not-a-number (<code>NaN</code>) floating
|
||
point value. Calling <code>partial_cmp</code> with any floating point number and the <code>NaN</code>
|
||
floating point value will return <code>None</code>.</p>
|
||
<p>When derived on structs, <code>PartialOrd</code> compares two instances by comparing the
|
||
value in each field in the order in which the fields appear in the struct
|
||
definition. When derived on enums, variants of the enum declared earlier in the
|
||
enum definition are considered less than the variants listed later.</p>
|
||
<p>The <code>PartialOrd</code> trait is required, for example, for the <code>gen_range</code> method
|
||
from the <code>rand</code> crate that generates a random value in the range specified by a
|
||
low value and a high value.</p>
|
||
<p>The <code>Ord</code> trait allows you to know that for any two values of the annotated
|
||
type, a valid ordering will exist. The <code>Ord</code> trait implements the <code>cmp</code> method,
|
||
which returns an <code>Ordering</code> rather than an <code>Option<Ordering></code> because a valid
|
||
ordering will always be possible. You can only apply the <code>Ord</code> trait to types
|
||
that also implement <code>PartialOrd</code> and <code>Eq</code> (and <code>Eq</code> requires <code>PartialEq</code>). When
|
||
derived on structs and enums, <code>cmp</code> behaves the same way as the derived
|
||
implementation for <code>partial_cmp</code> does with <code>PartialOrd</code>.</p>
|
||
<p>An example of when <code>Ord</code> is required is when storing values in a <code>BTreeSet<T></code>,
|
||
a data structure that stores data based on the sort order of the values.</p>
|
||
<h3><a class="header" href="#clone-and-copy-for-duplicating-values" id="clone-and-copy-for-duplicating-values"><code>Clone</code> and <code>Copy</code> for Duplicating Values</a></h3>
|
||
<p>The <code>Clone</code> trait allows you to explicitly create a deep copy of a value, and
|
||
the duplication process might involve running arbitrary code and copying heap
|
||
data. See the <a href="ch04-01-what-is-ownership.html#ways-variables-and-data-interact-clone">“Ways Variables and Data Interact:
|
||
Clone”</a><!-- ignore --> section in
|
||
Chapter 4 for more information on <code>Clone</code>.</p>
|
||
<p>Deriving <code>Clone</code> implements the <code>clone</code> method, which when implemented for the
|
||
whole type, calls <code>clone</code> on each of the parts of the type. This means all the
|
||
fields or values in the type must also implement <code>Clone</code> to derive <code>Clone</code>.</p>
|
||
<p>An example of when <code>Clone</code> is required is when calling the <code>to_vec</code> method on a
|
||
slice. The slice doesn’t own the type instances it contains, but the vector
|
||
returned from <code>to_vec</code> will need to own its instances, so <code>to_vec</code> calls
|
||
<code>clone</code> on each item. Thus, the type stored in the slice must implement <code>Clone</code>.</p>
|
||
<p>The <code>Copy</code> trait allows you to duplicate a value by only copying bits stored on
|
||
the stack; no arbitrary code is necessary. See the <a href="ch04-01-what-is-ownership.html#stack-only-data-copy">“Stack-Only Data:
|
||
Copy”</a><!-- ignore --> section in Chapter 4 for more
|
||
information on <code>Copy</code>.</p>
|
||
<p>The <code>Copy</code> trait doesn’t define any methods to prevent programmers from
|
||
overloading those methods and violating the assumption that no arbitrary code
|
||
is being run. That way, all programmers can assume that copying a value will be
|
||
very fast.</p>
|
||
<p>You can derive <code>Copy</code> on any type whose parts all implement <code>Copy</code>. You can
|
||
only apply the <code>Copy</code> trait to types that also implement <code>Clone</code>, because a
|
||
type that implements <code>Copy</code> has a trivial implementation of <code>Clone</code> that
|
||
performs the same task as <code>Copy</code>.</p>
|
||
<p>The <code>Copy</code> trait is rarely required; types that implement <code>Copy</code> have
|
||
optimizations available, meaning you don’t have to call <code>clone</code>, which makes
|
||
the code more concise.</p>
|
||
<p>Everything possible with <code>Copy</code> you can also accomplish with <code>Clone</code>, but the
|
||
code might be slower or have to use <code>clone</code> in places.</p>
|
||
<h3><a class="header" href="#hash-for-mapping-a-value-to-a-value-of-fixed-size" id="hash-for-mapping-a-value-to-a-value-of-fixed-size"><code>Hash</code> for Mapping a Value to a Value of Fixed Size</a></h3>
|
||
<p>The <code>Hash</code> trait allows you to take an instance of a type of arbitrary size and
|
||
map that instance to a value of fixed size using a hash function. Deriving
|
||
<code>Hash</code> implements the <code>hash</code> method. The derived implementation of the <code>hash</code>
|
||
method combines the result of calling <code>hash</code> on each of the parts of the type,
|
||
meaning all fields or values must also implement <code>Hash</code> to derive <code>Hash</code>.</p>
|
||
<p>An example of when <code>Hash</code> is required is in storing keys in a <code>HashMap<K, V></code>
|
||
to store data efficiently.</p>
|
||
<h3><a class="header" href="#default-for-default-values" id="default-for-default-values"><code>Default</code> for Default Values</a></h3>
|
||
<p>The <code>Default</code> trait allows you to create a default value for a type. Deriving
|
||
<code>Default</code> implements the <code>default</code> function. The derived implementation of the
|
||
<code>default</code> function calls the <code>default</code> function on each part of the type,
|
||
meaning all fields or values in the type must also implement <code>Default</code> to
|
||
derive <code>Default</code>.</p>
|
||
<p>The <code>Default::default</code> function is commonly used in combination with the struct
|
||
update syntax discussed in the <a href="ch05-01-defining-structs.html#creating-instances-from-other-instances-with-struct-update-syntax">“Creating Instances From Other Instances With
|
||
Struct Update
|
||
Syntax”</a><!-- ignore -->
|
||
section in Chapter 5. You can customize a few fields of a struct and then
|
||
set and use a default value for the rest of the fields by using
|
||
<code>..Default::default()</code>.</p>
|
||
<p>The <code>Default</code> trait is required when you use the method <code>unwrap_or_default</code> on
|
||
<code>Option<T></code> instances, for example. If the <code>Option<T></code> is <code>None</code>, the method
|
||
<code>unwrap_or_default</code> will return the result of <code>Default::default</code> for the type
|
||
<code>T</code> stored in the <code>Option<T></code>.</p>
|
||
<h2><a class="header" href="#appendix-d---useful-development-tools" id="appendix-d---useful-development-tools">Appendix D - Useful Development Tools</a></h2>
|
||
<p>In this appendix, we talk about some useful development tools that the Rust
|
||
project provides. We’ll look at automatic formatting, quick ways to apply
|
||
warning fixes, a linter, and integrating with IDEs.</p>
|
||
<h3><a class="header" href="#automatic-formatting-with-rustfmt" id="automatic-formatting-with-rustfmt">Automatic Formatting with <code>rustfmt</code></a></h3>
|
||
<p>The <code>rustfmt</code> tool reformats your code according to the community code style.
|
||
Many collaborative projects use <code>rustfmt</code> to prevent arguments about which
|
||
style to use when writing Rust: everyone formats their code using the tool.</p>
|
||
<p>To install <code>rustfmt</code>, enter the following:</p>
|
||
<pre><code class="language-text">$ rustup component add rustfmt
|
||
</code></pre>
|
||
<p>This command gives you <code>rustfmt</code> and <code>cargo-fmt</code>, similar to how Rust gives you
|
||
both <code>rustc</code> and <code>cargo</code>. To format any Cargo project, enter the following:</p>
|
||
<pre><code class="language-text">$ cargo fmt
|
||
</code></pre>
|
||
<p>Running this command reformats all the Rust code in the current crate. This
|
||
should only change the code style, not the code semantics. For more information
|
||
on <code>rustfmt</code>, see <a href="https://github.com/rust-lang/rustfmt">its documentation</a>.</p>
|
||
<h3><a class="header" href="#fix-your-code-with-rustfix" id="fix-your-code-with-rustfix">Fix Your Code with <code>rustfix</code></a></h3>
|
||
<p>The rustfix tool is included with Rust installations and can automatically fix
|
||
some compiler warnings. If you’ve written code in Rust, you’ve probably seen
|
||
compiler warnings. For example, consider this code:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn do_something() {}
|
||
|
||
fn main() {
|
||
for i in 0..100 {
|
||
do_something();
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>Here, we’re calling the <code>do_something</code> function 100 times, but we never use the
|
||
variable <code>i</code> in the body of the <code>for</code> loop. Rust warns us about that:</p>
|
||
<pre><code class="language-text">$ cargo build
|
||
Compiling myprogram v0.1.0 (file:///projects/myprogram)
|
||
warning: unused variable: `i`
|
||
--> src/main.rs:4:9
|
||
|
|
||
4 | for i in 1..100 {
|
||
| ^ help: consider using `_i` instead
|
||
|
|
||
= note: #[warn(unused_variables)] on by default
|
||
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.50s
|
||
</code></pre>
|
||
<p>The warning suggests that we use <code>_i</code> as a name instead: the underscore
|
||
indicates that we intend for this variable to be unused. We can automatically
|
||
apply that suggestion using the <code>rustfix</code> tool by running the command <code>cargo fix</code>:</p>
|
||
<pre><code class="language-text">$ cargo fix
|
||
Checking myprogram v0.1.0 (file:///projects/myprogram)
|
||
Fixing src/main.rs (1 fix)
|
||
Finished dev [unoptimized + debuginfo] target(s) in 0.59s
|
||
</code></pre>
|
||
<p>When we look at <em>src/main.rs</em> again, we’ll see that <code>cargo fix</code> has changed the
|
||
code:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn do_something() {}
|
||
|
||
fn main() {
|
||
for _i in 0..100 {
|
||
do_something();
|
||
}
|
||
}
|
||
</code></pre></pre>
|
||
<p>The <code>for</code> loop variable is now named <code>_i</code>, and the warning no longer appears.</p>
|
||
<p>You can also use the <code>cargo fix</code> command to transition your code between
|
||
different Rust editions. Editions are covered in Appendix E.</p>
|
||
<h3><a class="header" href="#more-lints-with-clippy" id="more-lints-with-clippy">More Lints with Clippy</a></h3>
|
||
<p>The Clippy tool is a collection of lints to analyze your code so you can catch
|
||
common mistakes and improve your Rust code.</p>
|
||
<p>To install Clippy, enter the following:</p>
|
||
<pre><code class="language-text">$ rustup component add clippy
|
||
</code></pre>
|
||
<p>To run Clippy’s lints on any Cargo project, enter the following:</p>
|
||
<pre><code class="language-text">$ cargo clippy
|
||
</code></pre>
|
||
<p>For example, say you write a program that uses an approximation of a
|
||
mathematical constant, such as pi, as this program does:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = 3.1415;
|
||
let r = 8.0;
|
||
println!("the area of the circle is {}", x * r * r);
|
||
}
|
||
</code></pre></pre>
|
||
<p>Running <code>cargo clippy</code> on this project results in this error:</p>
|
||
<pre><code class="language-text">error: approximate value of `f{32, 64}::consts::PI` found. Consider using it directly
|
||
--> src/main.rs:2:13
|
||
|
|
||
2 | let x = 3.1415;
|
||
| ^^^^^^
|
||
|
|
||
= note: #[deny(clippy::approx_constant)] on by default
|
||
= help: for further information visit https://rust-lang-nursery.github.io/rust-clippy/master/index.html#approx_constant
|
||
</code></pre>
|
||
<p>This error lets you know that Rust has this constant defined more precisely and
|
||
that your program would be more correct if you used the constant instead. You
|
||
would then change your code to use the <code>PI</code> constant. The following code
|
||
doesn’t result in any errors or warnings from Clippy:</p>
|
||
<p><span class="filename">Filename: src/main.rs</span></p>
|
||
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
||
let x = std::f64::consts::PI;
|
||
let r = 8.0;
|
||
println!("the area of the circle is {}", x * r * r);
|
||
}
|
||
</code></pre></pre>
|
||
<p>For more information on Clippy, see <a href="https://github.com/rust-lang/rust-clippy">its documentation</a>.</p>
|
||
<h3><a class="header" href="#ide-integration-using-the-rust-language-server" id="ide-integration-using-the-rust-language-server">IDE Integration Using the Rust Language Server</a></h3>
|
||
<p>To help IDE integration, the Rust project distributes the <em>Rust Language
|
||
Server</em> (<code>rls</code>). This tool speaks the <a href="http://langserver.org/">Language Server
|
||
Protocol</a>, which is a specification for IDEs and programming
|
||
languages to communicate with each other. Different clients can use the <code>rls</code>,
|
||
such as <a href="https://marketplace.visualstudio.com/items?itemName=rust-lang.rust">the Rust plug-in for Visual Studio Code</a>.</p>
|
||
<p>To install the <code>rls</code>, enter the following:</p>
|
||
<pre><code class="language-text">$ rustup component add rls
|
||
</code></pre>
|
||
<p>Then install the language server support in your particular IDE; you’ll gain
|
||
abilities such as autocompletion, jump to definition, and inline errors.</p>
|
||
<p>For more information on the <code>rls</code>, see <a href="https://github.com/rust-lang/rls">its documentation</a>.</p>
|
||
<h2><a class="header" href="#appendix-e---editions" id="appendix-e---editions">Appendix E - Editions</a></h2>
|
||
<p>In Chapter 1, you saw that <code>cargo new</code> adds a bit of metadata to your
|
||
<em>Cargo.toml</em> file about an edition. This appendix talks about what that means!</p>
|
||
<p>The Rust language and compiler have a six-week release cycle, meaning users get
|
||
a constant stream of new features. Other programming languages release larger
|
||
changes less often; Rust releases smaller updates more frequently. After a
|
||
while, all of these tiny changes add up. But from release to release, it can be
|
||
difficult to look back and say, “Wow, between Rust 1.10 and Rust 1.31, Rust has
|
||
changed a lot!”</p>
|
||
<p>Every two or three years, the Rust team produces a new Rust <em>edition</em>. Each
|
||
edition brings together the features that have landed into a clear package with
|
||
fully updated documentation and tooling. New editions ship as part of the usual
|
||
six-week release process.</p>
|
||
<p>Editions serve different purposes for different people:</p>
|
||
<ul>
|
||
<li>For active Rust users, a new edition brings together incremental changes into
|
||
an easy-to-understand package.</li>
|
||
<li>For non-users, a new edition signals that some major advancements have
|
||
landed, which might make Rust worth another look.</li>
|
||
<li>For those developing Rust, a new edition provides a rallying point for the
|
||
project as a whole.</li>
|
||
</ul>
|
||
<p>At the time of this writing, two Rust editions are available: Rust 2015 and
|
||
Rust 2018. This book is written using Rust 2018 edition idioms.</p>
|
||
<p>The <code>edition</code> key in <em>Cargo.toml</em> indicates which edition the compiler should
|
||
use for your code. If the key doesn’t exist, Rust uses <code>2015</code> as the edition
|
||
value for backward compatibility reasons.</p>
|
||
<p>Each project can opt in to an edition other than the default 2015 edition.
|
||
Editions can contain incompatible changes, such as including a new keyword that
|
||
conflicts with identifiers in code. However, unless you opt in to those
|
||
changes, your code will continue to compile even as you upgrade the Rust
|
||
compiler version you use.</p>
|
||
<p>All Rust compiler versions support any edition that existed prior to that
|
||
compiler’s release, and they can link crates of any supported editions
|
||
together. Edition changes only affect the way the compiler initially parses
|
||
code. Therefore, if you’re using Rust 2015 and one of your dependencies uses
|
||
Rust 2018, your project will compile and be able to use that dependency. The
|
||
opposite situation, where your project uses Rust 2018 and a dependency uses
|
||
Rust 2015, works as well.</p>
|
||
<p>To be clear: most features will be available on all editions. Developers using
|
||
any Rust edition will continue to see improvements as new stable releases are
|
||
made. However, in some cases, mainly when new keywords are added, some new
|
||
features might only be available in later editions. You will need to switch
|
||
editions if you want to take advantage of such features.</p>
|
||
<p>For more details, the <a href="https://doc.rust-lang.org/stable/edition-guide/"><em>Edition
|
||
Guide</em></a> is a complete book
|
||
about editions that enumerates the differences between editions and explains
|
||
how to automatically upgrade your code to a new edition via <code>cargo fix</code>.</p>
|
||
<h2><a class="header" href="#appendix-f-translations-of-the-book" id="appendix-f-translations-of-the-book">Appendix F: Translations of the Book</a></h2>
|
||
<p>For resources in languages other than English. Most are still in progress; see
|
||
<a href="https://github.com/rust-lang/book/issues?q=is%3Aopen+is%3Aissue+label%3ATranslations">the Translations label</a> to help or let us know about a new translation!</p>
|
||
<ul>
|
||
<li><a href="https://github.com/rust-br/rust-book-pt-br">Português</a> (BR)</li>
|
||
<li><a href="https://github.com/nunojesus/rust-book-pt-pt">Português</a> (PT)</li>
|
||
<li><a href="https://github.com/KaiserY/trpl-zh-cn">简体中文</a></li>
|
||
<li><a href="https://github.com/pavloslav/rust-book-uk-ua">Українська</a></li>
|
||
<li><a href="https://github.com/thecodix/book">Español</a>, <a href="https://github.com/ManRR/rust-book-es">alternate</a></li>
|
||
<li><a href="https://github.com/AgeOfWar/rust-book-it">Italiano</a></li>
|
||
<li><a href="https://github.com/ruRust/rust_book_2ed">Русский</a></li>
|
||
<li><a href="https://github.com/rinthel/rust-lang-book-ko">한국어</a></li>
|
||
<li><a href="https://github.com/hazama-yuinyan/book">日本語</a></li>
|
||
<li><a href="https://github.com/quadrifoglio/rust-book-fr">Français</a></li>
|
||
<li><a href="https://github.com/paytchoo/book-pl">Polski</a></li>
|
||
<li><a href="https://github.com/idanmel/rust-book-heb">עברית</a></li>
|
||
<li><a href="https://github.com/agentzero1/book">Cebuano</a></li>
|
||
<li><a href="https://github.com/josephace135/book">Tagalog</a></li>
|
||
<li><a href="https://github.com/psychoslave/Rust-libro">Esperanto</a></li>
|
||
<li><a href="https://github.com/TChatzigiannakis/rust-book-greek">ελληνική</a></li>
|
||
<li><a href="https://github.com/sebras/book">Svenska</a></li>
|
||
</ul>
|
||
<h2><a class="header" href="#appendix-g---how-rust-is-made-and-nightly-rust" id="appendix-g---how-rust-is-made-and-nightly-rust">Appendix G - How Rust is Made and “Nightly Rust”</a></h2>
|
||
<p>This appendix is about how Rust is made and how that affects you as a Rust
|
||
developer.</p>
|
||
<h3><a class="header" href="#stability-without-stagnation" id="stability-without-stagnation">Stability Without Stagnation</a></h3>
|
||
<p>As a language, Rust cares a <em>lot</em> about the stability of your code. We want
|
||
Rust to be a rock-solid foundation you can build on, and if things were
|
||
constantly changing, that would be impossible. At the same time, if we can’t
|
||
experiment with new features, we may not find out important flaws until after
|
||
their release, when we can no longer change things.</p>
|
||
<p>Our solution to this problem is what we call “stability without stagnation”,
|
||
and our guiding principle is this: you should never have to fear upgrading to a
|
||
new version of stable Rust. Each upgrade should be painless, but should also
|
||
bring you new features, fewer bugs, and faster compile times.</p>
|
||
<h3><a class="header" href="#choo-choo-release-channels-and-riding-the-trains" id="choo-choo-release-channels-and-riding-the-trains">Choo, Choo! Release Channels and Riding the Trains</a></h3>
|
||
<p>Rust development operates on a <em>train schedule</em>. That is, all development is
|
||
done on the <code>master</code> branch of the Rust repository. Releases follow a software
|
||
release train model, which has been used by Cisco IOS and other software
|
||
projects. There are three <em>release channels</em> for Rust:</p>
|
||
<ul>
|
||
<li>Nightly</li>
|
||
<li>Beta</li>
|
||
<li>Stable</li>
|
||
</ul>
|
||
<p>Most Rust developers primarily use the stable channel, but those who want to
|
||
try out experimental new features may use nightly or beta.</p>
|
||
<p>Here’s an example of how the development and release process works: let’s
|
||
assume that the Rust team is working on the release of Rust 1.5. That release
|
||
happened in December of 2015, but it will provide us with realistic version
|
||
numbers. A new feature is added to Rust: a new commit lands on the <code>master</code>
|
||
branch. Each night, a new nightly version of Rust is produced. Every day is a
|
||
release day, and these releases are created by our release infrastructure
|
||
automatically. So as time passes, our releases look like this, once a night:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - *
|
||
</code></pre>
|
||
<p>Every six weeks, it’s time to prepare a new release! The <code>beta</code> branch of the
|
||
Rust repository branches off from the <code>master</code> branch used by nightly. Now,
|
||
there are two releases:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - *
|
||
|
|
||
beta: *
|
||
</code></pre>
|
||
<p>Most Rust users do not use beta releases actively, but test against beta in
|
||
their CI system to help Rust discover possible regressions. In the meantime,
|
||
there’s still a nightly release every night:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - * - - * - - *
|
||
|
|
||
beta: *
|
||
</code></pre>
|
||
<p>Let’s say a regression is found. Good thing we had some time to test the beta
|
||
release before the regression snuck into a stable release! The fix is applied
|
||
to <code>master</code>, so that nightly is fixed, and then the fix is backported to the
|
||
<code>beta</code> branch, and a new release of beta is produced:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - * - - * - - * - - *
|
||
|
|
||
beta: * - - - - - - - - *
|
||
</code></pre>
|
||
<p>Six weeks after the first beta was created, it’s time for a stable release! The
|
||
<code>stable</code> branch is produced from the <code>beta</code> branch:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - * - - * - - * - - * - * - *
|
||
|
|
||
beta: * - - - - - - - - *
|
||
|
|
||
stable: *
|
||
</code></pre>
|
||
<p>Hooray! Rust 1.5 is done! However, we’ve forgotten one thing: because the six
|
||
weeks have gone by, we also need a new beta of the <em>next</em> version of Rust, 1.6.
|
||
So after <code>stable</code> branches off of <code>beta</code>, the next version of <code>beta</code> branches
|
||
off of <code>nightly</code> again:</p>
|
||
<pre><code class="language-text">nightly: * - - * - - * - - * - - * - - * - * - *
|
||
| |
|
||
beta: * - - - - - - - - * *
|
||
|
|
||
stable: *
|
||
</code></pre>
|
||
<p>This is called the “train model” because every six weeks, a release “leaves the
|
||
station”, but still has to take a journey through the beta channel before it
|
||
arrives as a stable release.</p>
|
||
<p>Rust releases every six weeks, like clockwork. If you know the date of one Rust
|
||
release, you can know the date of the next one: it’s six weeks later. A nice
|
||
aspect of having releases scheduled every six weeks is that the next train is
|
||
coming soon. If a feature happens to miss a particular release, there’s no need
|
||
to worry: another one is happening in a short time! This helps reduce pressure
|
||
to sneak possibly unpolished features in close to the release deadline.</p>
|
||
<p>Thanks to this process, you can always check out the next build of Rust and
|
||
verify for yourself that it’s easy to upgrade to: if a beta release doesn’t
|
||
work as expected, you can report it to the team and get it fixed before the
|
||
next stable release happens! Breakage in a beta release is relatively rare, but
|
||
<code>rustc</code> is still a piece of software, and bugs do exist.</p>
|
||
<h3><a class="header" href="#unstable-features" id="unstable-features">Unstable Features</a></h3>
|
||
<p>There’s one more catch with this release model: unstable features. Rust uses a
|
||
technique called “feature flags” to determine what features are enabled in a
|
||
given release. If a new feature is under active development, it lands on
|
||
<code>master</code>, and therefore, in nightly, but behind a <em>feature flag</em>. If you, as a
|
||
user, wish to try out the work-in-progress feature, you can, but you must be
|
||
using a nightly release of Rust and annotate your source code with the
|
||
appropriate flag to opt in.</p>
|
||
<p>If you’re using a beta or stable release of Rust, you can’t use any feature
|
||
flags. This is the key that allows us to get practical use with new features
|
||
before we declare them stable forever. Those who wish to opt into the bleeding
|
||
edge can do so, and those who want a rock-solid experience can stick with
|
||
stable and know that their code won’t break. Stability without stagnation.</p>
|
||
<p>This book only contains information about stable features, as in-progress
|
||
features are still changing, and surely they’ll be different between when this
|
||
book was written and when they get enabled in stable builds. You can find
|
||
documentation for nightly-only features online.</p>
|
||
<h3><a class="header" href="#rustup-and-the-role-of-rust-nightly" id="rustup-and-the-role-of-rust-nightly">Rustup and the Role of Rust Nightly</a></h3>
|
||
<p>Rustup makes it easy to change between different release channels of Rust, on a
|
||
global or per-project basis. By default, you’ll have stable Rust installed. To
|
||
install nightly, for example:</p>
|
||
<pre><code class="language-text">$ rustup install nightly
|
||
</code></pre>
|
||
<p>You can see all of the <em>toolchains</em> (releases of Rust and associated
|
||
components) you have installed with <code>rustup</code> as well. Here’s an example on one
|
||
of your authors’ Windows computer:</p>
|
||
<pre><code class="language-powershell">> rustup toolchain list
|
||
stable-x86_64-pc-windows-msvc (default)
|
||
beta-x86_64-pc-windows-msvc
|
||
nightly-x86_64-pc-windows-msvc
|
||
</code></pre>
|
||
<p>As you can see, the stable toolchain is the default. Most Rust users use stable
|
||
most of the time. You might want to use stable most of the time, but use
|
||
nightly on a specific project, because you care about a cutting-edge feature.
|
||
To do so, you can use <code>rustup override</code> in that project’s directory to set the
|
||
nightly toolchain as the one <code>rustup</code> should use when you’re in that directory:</p>
|
||
<pre><code class="language-text">$ cd ~/projects/needs-nightly
|
||
$ rustup override set nightly
|
||
</code></pre>
|
||
<p>Now, every time you call <code>rustc</code> or <code>cargo</code> inside of
|
||
<em>~/projects/needs-nightly</em>, <code>rustup</code> will make sure that you are using nightly
|
||
Rust, rather than your default of stable Rust. This comes in handy when you
|
||
have a lot of Rust projects!</p>
|
||
<h3><a class="header" href="#the-rfc-process-and-teams" id="the-rfc-process-and-teams">The RFC Process and Teams</a></h3>
|
||
<p>So how do you learn about these new features? Rust’s development model follows
|
||
a <em>Request For Comments (RFC) process</em>. If you’d like an improvement in Rust,
|
||
you can write up a proposal, called an RFC.</p>
|
||
<p>Anyone can write RFCs to improve Rust, and the proposals are reviewed and
|
||
discussed by the Rust team, which is comprised of many topic subteams. There’s
|
||
a full list of the teams <a href="https://www.rust-lang.org/governance">on Rust’s
|
||
website</a>, which includes teams for
|
||
each area of the project: language design, compiler implementation,
|
||
infrastructure, documentation, and more. The appropriate team reads the
|
||
proposal and the comments, writes some comments of their own, and eventually,
|
||
there’s consensus to accept or reject the feature.</p>
|
||
<p>If the feature is accepted, an issue is opened on the Rust repository, and
|
||
someone can implement it. The person who implements it very well may not be the
|
||
person who proposed the feature in the first place! When the implementation is
|
||
ready, it lands on the <code>master</code> branch behind a feature gate, as we discussed
|
||
in the <a href="appendix-07-nightly-rust.html#unstable-features">“Unstable Features”</a><!-- ignore --> section.</p>
|
||
<p>After some time, once Rust developers who use nightly releases have been able
|
||
to try out the new feature, team members will discuss the feature, how it’s
|
||
worked out on nightly, and decide if it should make it into stable Rust or not.
|
||
If the decision is to move forward, the feature gate is removed, and the
|
||
feature is now considered stable! It rides the trains into a new stable release
|
||
of Rust.</p>
|
||
|
||
</main>
|
||
|
||
<nav class="nav-wrapper" aria-label="Page navigation">
|
||
<!-- Mobile navigation buttons -->
|
||
|
||
|
||
|
||
|
||
<div style="clear: both"></div>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
|
||
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
||
|
||
|
||
|
||
</nav>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<script type="text/javascript">
|
||
window.playpen_copyable = true;
|
||
</script>
|
||
|
||
|
||
|
||
|
||
|
||
<script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="searcher.js" type="text/javascript" charset="utf-8"></script>
|
||
|
||
|
||
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
|
||
<script src="book.js" type="text/javascript" charset="utf-8"></script>
|
||
|
||
<!-- Custom JS scripts -->
|
||
|
||
<script type="text/javascript" src="ferris.js"></script>
|
||
|
||
|
||
|
||
|
||
<script type="text/javascript">
|
||
window.addEventListener('load', function() {
|
||
window.setTimeout(window.print, 100);
|
||
});
|
||
</script>
|
||
|
||
|
||
|
||
</body>
|
||
</html>
|