library(tidyverse)
data("diamonds")
## INSERT YOUR CODE HERE ##
Make a new data set that has the average depth and price of the diamonds in the data set.
q1<-diamonds%>%
summarise(avgD=mean(depth, na.rm=TRUE),
avgP=mean(price, na.rm=TRUE))
q1
## # A tibble: 1 × 2
## avgD avgP
## <dbl> <dbl>
## 1 61.7 3933.
Add a new column to the data set that records each diamond’s price per carat.
## INSERT YOUR CODE HERE ##
diamonds<-diamonds%>%
mutate(ppc=price/carat)
str(diamonds)
## tibble [53,940 × 11] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
## $ ppc : num [1:53940] 1417 1552 1422 1152 1081 ...
Create a new data set that groups diamonds by their cut and displays the average price of each group.
## INSERT YOUR CODE HERE ##
q3<-diamonds%>%
group_by(cut)%>%
summarise(avgP=mean(price, na.rm=TRUE))
q3
## # A tibble: 5 × 2
## cut avgP
## <ord> <dbl>
## 1 Fair 4359.
## 2 Good 3929.
## 3 Very Good 3982.
## 4 Premium 4584.
## 5 Ideal 3458.
Create a new data set that groups diamonds by color and displays the average depth and average table for each group.
## INSERT YOUR CODE HERE ##
q4<-diamonds%>%
group_by(color)%>%
summarise(avgD=mean(depth, na.rm=TRUE),
avgT=mean(table, na.rm=TRUE))
q4
## # A tibble: 7 × 3
## color avgD avgT
## <ord> <dbl> <dbl>
## 1 D 61.7 57.4
## 2 E 61.7 57.5
## 3 F 61.7 57.4
## 4 G 61.8 57.3
## 5 H 61.8 57.5
## 6 I 61.8 57.6
## 7 J 61.9 57.8
Which color diamonds seem to be largest on average (in terms of carats)?
## INSERT YOUR CODE HERE ##
q5<-diamonds%>%
group_by(color)%>%
summarise(avgC=mean(carat, na.rm=TRUE))%>%
arrange(desc(avgC))
q5
## # A tibble: 7 × 2
## color avgC
## <ord> <dbl>
## 1 J 1.16
## 2 I 1.03
## 3 H 0.912
## 4 G 0.771
## 5 F 0.737
## 6 E 0.658
## 7 D 0.658
What color of diamonds occurs the most frequently among diamonds with ideal cuts?
## INSERT YOUR CODE HERE ##
q6<-diamonds%>%
filter(cut=="Ideal")%>%
count(color)
q6
## # A tibble: 7 × 2
## color n
## <ord> <int>
## 1 D 2834
## 2 E 3903
## 3 F 3826
## 4 G 4884
## 5 H 3115
## 6 I 2093
## 7 J 896
Which clarity of diamonds has the largest average table per carats?
## INSERT YOUR CODE HERE ##
q7<-diamonds%>%
mutate(tpc=table/carat)%>%
group_by(clarity)%>%
summarise(avgTPC=mean(tpc, na.rm=TRUE))%>%
arrange(desc(avgTPC))
q7
## # A tibble: 8 × 2
## clarity avgTPC
## <ord> <dbl>
## 1 VVS1 141.
## 2 IF 140.
## 3 VVS2 127.
## 4 VS1 107.
## 5 VS2 103.
## 6 SI1 89.6
## 7 SI2 69.1
## 8 I1 56.3
What is the average price per carat of diamonds that cost more than $10,000?
## INSERT YOUR CODE HERE ##
q8<-diamonds%>%
filter(price>10000)%>%
mutate(ppc2=price/carat)%>%
summarise(avgPPC=mean(ppc2, na.rm=TRUE))
q8
## # A tibble: 1 × 1
## avgPPC
## <dbl>
## 1 8044.
Of the diamonds that cost more than $10,000 what is the most common clarity?
## INSERT YOUR CODE HERE ##
q9<-diamonds%>%
filter(price>10000)%>%
count(clarity)%>%
arrange(desc(n))
q9
## # A tibble: 8 × 2
## clarity n
## <ord> <int>
## 1 SI2 1239
## 2 SI1 1184
## 3 VS2 1155
## 4 VS1 747
## 5 VVS2 452
## 6 VVS1 247
## 7 IF 168
## 8 I1 30
Add two columns to the diamonds data set. The first column should display the average depth of diamonds in the diamond’s color group. The second column should display the average table of diamonds in the diamonds color group.
Hint: Use left_join()
avgDiamond<-diamonds%>%
group_by(color)%>%
summarise(avgDepth=mean(depth),
avgTable=mean(table))
joinDiamond<-diamonds%>%
left_join(avgDiamond)
## Joining, by = "color"
str(joinDiamond)
## tibble [53,940 × 13] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity : Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
## $ ppc : num [1:53940] 1417 1552 1422 1152 1081 ...
## $ avgDepth: num [1:53940] 61.7 61.7 61.7 61.8 61.9 ...
## $ avgTable: num [1:53940] 57.5 57.5 57.5 57.6 57.8 ...