hadley
diff --git a/‎.Rbuildignore
Lines changed: 2 additions & 0 deletions b/‎.Rbuildignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎.vscode/extensions.json
Lines changed: 5 additions & 0 deletions b/‎.vscode/extensions.json
Lines changed: 5 additions & 0 deletions
diff --git a/‎.vscode/settings.json
Lines changed: 8 additions & 0 deletions b/‎.vscode/settings.json
Lines changed: 8 additions & 0 deletions
diff --git a/‎EDA.qmd
Lines changed: 1 addition & 27 deletions b/‎EDA.qmd
Lines changed: 1 addition & 27 deletions
diff --git a/‎_common.R
Lines changed: 11 additions & 5 deletions b/‎_common.R
Lines changed: 11 additions & 5 deletions
diff --git a/‎_quarto.yml
Lines changed: 1 addition & 2 deletions b/‎_quarto.yml
Lines changed: 1 addition & 2 deletions
diff --git a/‎air.toml b/‎air.toml
diff --git a/‎base-R.qmd
Lines changed: 1 addition & 12 deletions b/‎base-R.qmd
Lines changed: 1 addition & 12 deletions
@@ -3,3 +3,5 @@
 ^\.travis\.yml$
 ^\.github$
 ^CODE_OF_CONDUCT\.md$
+^[\.]?air\.toml$
+^\.vscode$
@@ -0,0 +1,5 @@
+{
+    "recommendations": [
+        "Posit.air-vscode"
+    ]
+}
@@ -0,0 +1,8 @@
+{
+    "[r]": {
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "Posit.air-vscode"
+    },
+    "editor.defaultFormatter": "Posit.air-vscode",
+    "quarto.visualEditor.markdownWrap": "sentence"
+}
@@ -2,7 +2,6 @@
 
 ```{r}
 #| echo: false
-
 source("_common.R")
 ```
 
@@ -35,7 +34,6 @@ In this chapter we'll combine what you've learned about dplyr and ggplot2 to int
 ```{r}
 #| label: setup
 #| message: false
-
 library(tidyverse)
 ```
 
@@ -87,7 +85,6 @@ Since `carat` is a numerical variable, we can use a histogram:
 #|   the bin centered at 0.5, approximately 15000 diamonds in the bin centered 
 #|   at 1, and much fewer, approximately 5000 diamonds in the bin centered at 
 #|   1.5. Beyond this, there's a trailing tail.
-
 ggplot(diamonds, aes(x = carat)) +
   geom_histogram(binwidth = 0.5)
 ```
@@ -122,7 +119,6 @@ Let's take a look at the distribution of `carat` for smaller diamonds.
 #|   (0.01), resulting in a very large number of skinny bars. The distribution 
 #|   is right skewed, with many peaks followed by bars in decreasing heights, 
 #|   until a sharp increase at the next peak.
-
 smaller <- diamonds |> 
   filter(carat < 3)
 
@@ -164,7 +160,6 @@ The only evidence of outliers is the unusually wide limits on the x-axis.
 #|   A histogram of lengths of diamonds. The x-axis ranges from 0 to 60 and 
 #|   the y-axis ranges from 0 to 12000. There is a peak around 5, and the 
 #|   data appear to be completely clustered around the peak.
-
 ggplot(diamonds, aes(x = y)) + 
   geom_histogram(binwidth = 0.5)
 ```
@@ -179,7 +174,6 @@ To make it easy to see the unusual values, we need to zoom to small values of th
 #|   appear to be completely clustered around the peak. Other than those data, 
 #|   there is one bin at 0 with a height of about 8, one a little over 30 with 
 #|   a height of 1 and another one a little below 60 with a height of 1.
-
 ggplot(diamonds, aes(x = y)) + 
   geom_histogram(binwidth = 0.5) +
   coord_cartesian(ylim = c(0, 50))
@@ -193,7 +187,6 @@ We pluck them out with dplyr:
 
 ```{r}
 #| include: false
-
 old <- options(tibble.print_max = 10, tibble.print_min = 10)
 ```
 
@@ -207,7 +200,6 @@ unusual
 
 ```{r}
 #| include: false
-
 options(old)
 ```
 
@@ -248,7 +240,6 @@ If you've encountered unusual values in your dataset, and simply want to move on
 
     ```{r}
     #| eval: false
-
     diamonds2 <- diamonds |> 
       filter(between(y, 3, 20))
     ```
@@ -274,7 +265,6 @@ It's not obvious where you should plot missing values, so ggplot2 doesn't includ
 #|   linear association between the two variables. All but one of the diamonds 
 #|   has length greater than 3. The one outlier has a length of 0 and a width 
 #|   of about 6.5. 
-
 ggplot(diamonds2, aes(x = x, y = y)) + 
   geom_point()
 ```
@@ -283,7 +273,6 @@ To suppress that warning, set `na.rm = TRUE`:
 
 ```{r}
 #| eval: false
-
 ggplot(diamonds2, aes(x = x, y = y)) + 
   geom_point(na.rm = TRUE)
 ```
@@ -301,7 +290,6 @@ You can do this by making a new variable, using `is.na()` to check if `dep_time`
 #|   represent flights that are cancelled and not cancelled. The x-axis ranges 
 #|   from 0 to 25 minutes and the y-axis ranges from 0 to 10000. The number of 
 #|   flights not cancelled are much higher than those cancelled.
-
 nycflights13::flights |> 
   mutate(
     cancelled = is.na(dep_time),
@@ -346,7 +334,6 @@ For example, let's explore how the price of a diamond varies with its quality (m
 #|   5000. The lines overlap a great deal, suggesting similar frequency 
 #|   distributions of prices of diamonds. One notable feature is that 
 #|   Ideal diamonds have the highest peak around 1500.
-
 ggplot(diamonds, aes(x = price)) + 
   geom_freqpoly(aes(color = cut), binwidth = 500, linewidth = 0.75)
 ```
@@ -367,7 +354,6 @@ Instead of displaying count, we'll display the **density**, which is the count s
 #|   a great deal, suggesting similar density distributions of prices of 
 #|   diamonds. One notable feature is that all but Fair diamonds have high peaks 
 #|   around a price of 1500 and Fair diamonds have a higher mean than others.
-
 ggplot(diamonds, aes(x = price, y = after_stat(density))) + 
   geom_freqpoly(aes(color = cut), binwidth = 500, linewidth = 0.75)
 ```
@@ -386,7 +372,6 @@ A visually simpler plot for exploring this relationship is using side-by-side bo
 #|   prices is right skewed for each cut (Fair, Good, Very Good, Premium, and 
 #|   Ideal). The medians are close to each other, with the median for Ideal 
 #|   diamonds lowest and that for Fair highest.
-
 ggplot(diamonds, aes(x = cut, y = price)) +
   geom_boxplot()
 ```
@@ -407,7 +392,6 @@ You might be interested to know how highway mileage varies across classes:
 #|   Side-by-side boxplots of highway mileages of cars by class. Classes are 
 #|   on the x-axis (2seaters, compact, midsize, minivan, pickup, subcompact, 
 #|   and suv).
-
 ggplot(mpg, aes(x = class, y = hwy)) +
   geom_boxplot()
 ```
@@ -419,7 +403,6 @@ To make the trend easier to see, we can reorder `class` based on the median valu
 #|   Side-by-side boxplots of highway mileages of cars by class. Classes are 
 #|   on the x-axis and ordered by increasing median highway mileage (pickup, 
 #|   suv, minivan, 2seater, subcompact, compact, and midsize).
-
 ggplot(mpg, aes(x = fct_reorder(class, hwy, median), y = hwy)) +
   geom_boxplot()
 ```
@@ -431,7 +414,6 @@ You can do that by exchanging the x and y aesthetic mappings.
 #| fig-alt: |
 #|   Side-by-side boxplots of highway mileages of cars by class. Classes are 
 #|   on the y-axis and ordered by increasing median highway mileage.
-
 ggplot(mpg, aes(x = hwy, y = fct_reorder(class, hwy, median))) +
   geom_boxplot()
 ```
@@ -473,7 +455,6 @@ One way to do that is to rely on the built-in `geom_count()`:
 #|   and color (D, E, F, G, G, I, and J). The sizes of the points represent 
 #|   the number of observations for that combination. The legend indicates 
 #|   that these sizes range between 1000 and 4000.
-
 ggplot(diamonds, aes(x = cut, y = color)) +
   geom_count()
 ```
@@ -497,7 +478,6 @@ Then visualize with `geom_tile()` and the fill aesthetic:
 #|   observations in each tile. There are more Ideal diamonds than other cuts, 
 #|   with the highest number being Ideal diamonds with color G. Fair diamonds 
 #|   and diamonds with color I are the lowest in frequency.
-
 diamonds |> 
   count(color, cut) |>  
   ggplot(aes(x = color, y = cut)) +
@@ -530,7 +510,6 @@ The relationship is exponential.
 #| fig-alt: |
 #|   A scatterplot of price vs. carat. The relationship is positive, somewhat 
 #|   strong, and exponential.
-
 ggplot(smaller, aes(x = carat, y = price)) +
   geom_point()
 ```
@@ -547,7 +526,6 @@ You've already seen one way to fix the problem: using the `alpha` aesthetic to a
 #|   strong, and exponential. The points are transparent, showing clusters where 
 #|   the number of points is higher than other areas, The most obvious clusters 
 #|   are for diamonds with 1, 1.5, and 2 carats.
-
 ggplot(smaller, aes(x = carat, y = price)) + 
   geom_point(alpha = 1 / 100)
 ```
@@ -569,7 +547,6 @@ You will need to install the hexbin package to use `geom_hex()`.
 #|   Plot 1: A binned density plot of price vs. carat. Plot 2: A hexagonal bin 
 #|   plot of price vs. carat. Both plots show that the highest density of 
 #|   diamonds have low carats and low prices.
-
 ggplot(smaller, aes(x = carat, y = price)) +
   geom_bin2d()
 
@@ -591,7 +568,6 @@ For example, you could bin `carat` and then for each group, display a boxplot:
 #|   roughly symmetric price distributions, and diamonds that weigh more have 
 #|   left skewed distributions. Cheaper, smaller diamonds have outliers on the 
 #|   higher end, more expensive, bigger diamonds have outliers on the lower end.
-
 ggplot(smaller, aes(x = carat, y = price)) + 
   geom_boxplot(aes(group = cut_width(carat, 0.1)))
 ```
@@ -672,7 +648,6 @@ Then, we exponentiate the residuals to put them back in the scale of raw prices.
 #|   to 5, the y-axis ranges from 0 to almost 4. Much of the data are clustered 
 #|   around low values of carat and residuals. There is a clear, curved pattern 
 #|   showing decrease in residuals as carat increases.
-
 library(tidymodels)
 
 diamonds <- diamonds |>
@@ -699,7 +674,6 @@ Once you've removed the strong relationship between carat and price, you can see
 #|   cuts (Fair to Ideal), the y-axis ranges from 0 to almost 5. The medians are 
 #|   quite similar, between roughly 0.75 to 1.25. Each of the distributions of 
 #|   residuals is right skewed, with many outliers on the higher end.
-
 ggplot(diamonds_aug, aes(x = cut, y = .resid)) + 
   geom_boxplot()
 ```
@@ -712,4 +686,4 @@ In this chapter you've learned a variety of tools to help you understand the var
 You've seen techniques that work with a single variable at a time and with a pair of variables.
 This might seem painfully restrictive if you have tens or hundreds of variables in your data, but they're the foundation upon which all other techniques are built.
 
-In the next chapter, we'll focus on the tools we can use to communicate our results.
+In the next chapter, we'll focus on the tools we can use to communicate our results.
@@ -6,7 +6,7 @@ knitr::opts_chunk$set(
   # cache = TRUE,
   fig.retina = 2,
   fig.width = 6,
-  fig.asp = 2/3,
+  fig.asp = 2 / 3,
   fig.show = "hold"
 )
 
@@ -27,15 +27,17 @@ ggplot2::theme_set(ggplot2::theme_gray(12))
 
 # use results: "asis" when setting a status for a chapter
 status <- function(type) {
-  status <- switch(type,
+  status <- switch(
+    type,
     polishing = "should be readable but is currently undergoing final polishing",
     restructuring = "is undergoing heavy restructuring and may be confusing or incomplete",
     drafting = "is currently a dumping ground for ideas, and we don't recommend reading it",
     complete = "is largely complete and just needs final proof reading",
     stop("Invalid `type`", call. = FALSE)
   )
 
-  class <- switch(type,
+  class <- switch(
+    type,
     polishing = "note",
     restructuring = "important",
     drafting = "important",
@@ -45,9 +47,13 @@ status <- function(type) {
   cat(paste0(
     "\n",
     ":::: status\n",
-    "::: callout-", class, " \n",
+    "::: callout-",
+    class,
+    " \n",
     "You are reading the work-in-progress second edition of R for Data Science. ",
-    "This chapter ", status, ". ",
+    "This chapter ",
+    status,
+    ". ",
     "You can find the complete first edition at <https://r4ds.had.co.nz>.\n",
     ":::\n",
     "::::\n"
 
@@ -82,5 +82,4 @@ format:
     include-in-header: "plausible.html"
     callout-appearance: simple
 
-editor: visual
-
+editor: source
@@ -2,7 +2,6 @@
 
 ```{r}
 #| echo: false
-
 source("_common.R")
 ```
 
@@ -30,7 +29,6 @@ This package focuses on base R so doesn't have any real prerequisites, but we'll
 ```{r}
 #| label: setup
 #| message: false
-
 library(tidyverse)
 ```
 
@@ -152,7 +150,6 @@ Several dplyr verbs are special cases of `[`:
 
     ```{r}
     #| results: false
-
     df <- tibble(
       x = c(2, 3, 1, 1, NA), 
       y = letters[1:5], 
@@ -170,7 +167,6 @@ Several dplyr verbs are special cases of `[`:
 
     ```{r}
     #| results: false
-
     df |> arrange(x, y)
 
     # same as
@@ -183,7 +179,6 @@ Several dplyr verbs are special cases of `[`:
 
     ```{r}
     #| results: false
-
     df |> select(x, z)
 
     # same as
@@ -202,7 +197,6 @@ df |>
 
 ```{r}
 #| results: false
-
 # same as
 df |> subset(x > 1, c(y, z))
 ```
@@ -353,7 +347,6 @@ If this pepper shaker is your list `pepper`, then, `pepper[1]` is a pepper shake
 #|   the pepper shaker containing pepper, it contains a single packet of pepper.
 #|   In the middle is a photo of a single packet of pepper. On the right is a 
 #|   photo of the contents of a packet of pepper.
-
 knitr::include_graphics("diagrams/pepper.png")
 ```
 
@@ -434,7 +427,6 @@ The basic structure of a `for` loop looks like this:
 
 ```{r}
 #| eval: false
-
 for (element in vector) {
   # do something with element
 }
@@ -445,15 +437,13 @@ For example, in @sec-save-database instead of using `walk()`:
 
 ```{r}
 #| eval: false
-
 paths |> walk(append_file)
 ```
 
 We could have used a `for` loop:
 
 ```{r}
 #| eval: false
-
 for (path in paths) {
   append_file(path)
 }
@@ -525,7 +515,6 @@ Here's a quick example from the diamonds dataset:
 #|   that fans out as both price and carat increases. The scatter plot 
 #|   shows very few diamonds bigger than 3 carats compared to diamonds between 
 #|   0 to 3 carats.
-
 # Left
 hist(diamonds$carat)
 
@@ -543,4 +532,4 @@ This often makes life easier for programming and so becomes more important as yo
 
 This chapter concludes the programming section of the book.
 You've made a solid start on your journey to becoming not just a data scientist who uses R, but a data scientist who can *program* in R.
-We hope these chapters have sparked your interest in programming and that you're looking forward to learning more outside of this book.
+We hope these chapters have sparked your interest in programming and that you're looking forward to learning more outside of this book.
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +{
 +    "recommendations": [
 +        "Posit.air-vscode"
 +    ]
 +}