Quarto Workbook

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

You can add options to executable code like this

[1] 4

The echo: false option disables the printing of code (only output is displayed).

Fact

Spheniscus demersus is the Latin name for African penguins

library(palmerpenguins)
library(tidyverse)

Libraries

library(tidyverse)
library(readr)
library(knitr)

Exercises W3

diamonds %>% #uses the diamond dataset
  group_by(color, clarity) %>% #groups data by variables selected
  mutate(price200 = mean(price)) %>% #creates new variable in this case mean price
  ungroup() %>% #no longer groups data
  mutate(random10 = 10 + price) %>% #creates new variable, increases price by 10
  select(clarity, cut, price, color, price200, random10) %>% #keeps only the columns listed in line
  arrange(color) %>% #visualises data by colour
    group_by(cut) %>%  #groups data by cut
  mutate(dis = n_distinct(price),
         rowID = row_number()) %>% #counts number of unique values per cut, then numbers each row per cut
  ungroup()
# A tibble: 53,940 × 8
   clarity cut       price color price200 random10   dis rowID
   <ord>   <ord>     <int> <ord>    <dbl>    <dbl> <int> <int>
 1 VS2     Very Good   357 D        2587.      367  5840     1
 2 VS1     Very Good   402 D        3030.      412  5840     2
 3 VS2     Very Good   403 D        2587.      413  5840     3
 4 VS2     Good        403 D        2587.      413  3086     1
 5 VS1     Good        403 D        3030.      413  3086     2
 6 VS2     Premium     404 D        2587.      414  6014     1
 7 SI1     Premium     552 D        2976.      562  6014     2
 8 SI1     Ideal       552 D        2976.      562  7281     1
 9 SI1     Ideal       552 D        2976.      562  7281     2
10 VVS1    Very Good   553 D        2948.      563  5840     4
# ℹ 53,930 more rows

Diamonds cut, color, means and SD

diamonds %>% 
  group_by(cut, color) %>% 
  summarize(m = mean(price),
            s = sd(price)) %>% 
  ungroup()
`summarise()` has grouped output by 'cut'. You can override using the `.groups`
argument.
# A tibble: 35 × 4
   cut   color     m     s
   <ord> <ord> <dbl> <dbl>
 1 Fair  D     4291. 3286.
 2 Fair  E     3682. 2977.
 3 Fair  F     3827. 3223.
 4 Fair  G     4239. 3610.
 5 Fair  H     5136. 3886.
 6 Fair  I     4685. 3730.
 7 Fair  J     4976. 4050.
 8 Good  D     3405. 3175.
 9 Good  E     3424. 3331.
10 Good  F     3496. 3202.
# ℹ 25 more rows

Diamonds 70% Off

diamonds %>% 
  group_by(cut, color, clarity) %>% 
  summarize(m = mean(price),
            s = sd(price),
            msale = m * 0.70) %>% 
  ungroup()
`summarise()` has grouped output by 'cut', 'color'. You can override using the
`.groups` argument.
# A tibble: 276 × 6
   cut   color clarity     m     s msale
   <ord> <ord> <ord>   <dbl> <dbl> <dbl>
 1 Fair  D     I1      7383  5899. 5168.
 2 Fair  D     SI2     4355. 3260. 3049.
 3 Fair  D     SI1     4273. 3019. 2991.
 4 Fair  D     VS2     4513. 3383. 3159.
 5 Fair  D     VS1     2921. 2550. 2045.
 6 Fair  D     VVS2    3607  3629. 2525.
 7 Fair  D     VVS1    4473  5457. 3131.
 8 Fair  D     IF      1620.  525. 1134.
 9 Fair  E     I1      2095.  824. 1467.
10 Fair  E     SI2     4172. 3055. 2921.
# ℹ 266 more rows

Diamonds by Price (L-H)

diamonds %>% arrange(price)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
# ℹ 53,930 more rows

Diamonds by Price (H-L)

diamonds %>% arrange(desc(price))
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  2.29 Premium   I     VS2      60.8    60 18823  8.5   8.47  5.16
 2  2    Very Good G     SI1      63.5    56 18818  7.9   7.97  5.04
 3  1.51 Ideal     G     IF       61.7    55 18806  7.37  7.41  4.56
 4  2.07 Ideal     G     SI2      62.5    55 18804  8.2   8.13  5.11
 5  2    Very Good H     SI1      62.8    57 18803  7.95  8     5.01
 6  2.29 Premium   I     SI1      61.8    59 18797  8.52  8.45  5.24
 7  2.04 Premium   H     SI1      58.1    60 18795  8.37  8.28  4.84
 8  2    Premium   I     VS1      60.8    59 18795  8.13  8.02  4.91
 9  1.71 Premium   F     VS2      62.3    59 18791  7.57  7.53  4.7 
10  2.15 Ideal     G     SI2      62.6    54 18791  8.29  8.35  5.21
# ℹ 53,930 more rows

Diamond Lowest Price and Cut

diamonds %>% arrange(price, cut) 
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
 2  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
 8  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
 9  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
# ℹ 53,930 more rows

Diamonds Highest Price and Cut

diamonds %>% 
  arrange(desc(price), cut)
# A tibble: 53,940 × 10
   carat cut       color clarity depth table price     x     y     z
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
 1  2.29 Premium   I     VS2      60.8    60 18823  8.5   8.47  5.16
 2  2    Very Good G     SI1      63.5    56 18818  7.9   7.97  5.04
 3  1.51 Ideal     G     IF       61.7    55 18806  7.37  7.41  4.56
 4  2.07 Ideal     G     SI2      62.5    55 18804  8.2   8.13  5.11
 5  2    Very Good H     SI1      62.8    57 18803  7.95  8     5.01
 6  2.29 Premium   I     SI1      61.8    59 18797  8.52  8.45  5.24
 7  2.04 Premium   H     SI1      58.1    60 18795  8.37  8.28  4.84
 8  2    Premium   I     VS1      60.8    59 18795  8.13  8.02  4.91
 9  1.71 Premium   F     VS2      62.3    59 18791  7.57  7.53  4.7 
10  2.15 Ideal     G     SI2      62.6    54 18791  8.29  8.35  5.21
# ℹ 53,930 more rows

Diamonds New Sale Price

diamonds %>% 
  mutate(salePrice = price - 250)
# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z salePrice
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>     <dbl>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43        76
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31        76
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31        77
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63        84
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75        85
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48        86
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47        86
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53        87
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49        87
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39        88
# ℹ 53,930 more rows

Number of Diamonds Per Cut

diamonds %>% 
  group_by(cut) %>% 
  summarize(count = n())
# A tibble: 5 × 2
  cut       count
  <ord>     <int>
1 Fair       1610
2 Good       4906
3 Very Good 12082
4 Premium   13791
5 Ideal     21551

Total Number of Diamonds Column

diamonds %>% 
  mutate(totalNum = n())
# A tibble: 53,940 × 11
   carat cut       color clarity depth table price     x     y     z totalNum
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>    <int>
 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43    53940
 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31    53940
 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31    53940
 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63    53940
 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75    53940
 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48    53940
 7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47    53940
 8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53    53940
 9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49    53940
10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39    53940
# ℹ 53,930 more rows