This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
A. Load tidyverse and ggplot2 library. This library includes a dataset named diamonds. perform the following tasks:
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
view(diamonds)
#Lowest to highest price (hint: arrange())
diamonds_low_to_high <- diamonds %>%
arrange(price)
head(diamonds_low_to_high)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
#Highest to lowest price (hint: arrange(), desc())
diamonds_high_to_low <- diamonds %>%
arrange(desc(price))
head(diamonds_high_to_low)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
## 2 2 Very Good G SI1 63.5 56 18818 7.9 7.97 5.04
## 3 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56
## 4 2.07 Ideal G SI2 62.5 55 18804 8.2 8.13 5.11
## 5 2 Very Good H SI1 62.8 57 18803 7.95 8 5.01
## 6 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24
#Lowest price and cut
diamonds_low_price_cut <- diamonds %>%
arrange(price, cut)
head(diamonds_low_price_cut)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 2 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
#highest price and cut
diamonds_high_price_cut <- diamonds %>%
arrange(desc(price), desc(cut))
head(diamonds_high_price_cut)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
## 2 2 Very Good G SI1 63.5 56 18818 7.9 7.97 5.04
## 3 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56
## 4 2.07 Ideal G SI2 62.5 55 18804 8.2 8.13 5.11
## 5 2 Very Good H SI1 62.8 57 18803 7.95 8 5.01
## 6 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24
diamonds_low_to_high_clarity <- diamonds %>%
arrange(price, clarity)
head(diamonds_low_to_high_clarity)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
diamonds_discount <- diamonds %>%
mutate(salePrice = price - 250)
head(diamonds_discount)
## # A tibble: 6 × 11
## carat cut color clarity depth table price x y z salePrice
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 76
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 76
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 77
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 84
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 85
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 86
diamonds_without_xyz <- diamonds %>%
select(-x, -y, -z)
head(diamonds_without_xyz)
## # A tibble: 6 × 7
## carat cut color clarity depth table price
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int>
## 1 0.23 Ideal E SI2 61.5 55 326
## 2 0.21 Premium E SI1 59.8 61 326
## 3 0.23 Good E VS1 56.9 65 327
## 4 0.29 Premium I VS2 62.4 58 334
## 5 0.31 Good J SI2 63.3 58 335
## 6 0.24 Very Good J VVS2 62.8 57 336
diamonds_cut_summary <- diamonds %>%
group_by(cut) %>%
summarize(num_diamonds = n())
diamonds_cut_summary
## # A tibble: 5 × 2
## cut num_diamonds
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
totalNum <- diamonds %>% mutate(totalNum = n())
totalNum
## # A tibble: 53,940 × 11
## carat cut color clarity depth table price x y z totalNum
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <int>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 53940
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 53940
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 53940
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 53940
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 53940
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 53940
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 53940
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 53940
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 53940
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 53940
## # ℹ 53,930 more rows