library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(dplyr)
#view(diamonds)

Part II

1.

avg_depth_price <- diamonds %>%
summarise(meandepth = mean(depth),meanprice = mean(price))
avg_depth_price
## # A tibble: 1 x 2
##   meandepth meanprice
##       <dbl>     <dbl>
## 1      61.7     3933.

2.

ppc <- diamonds %>%
  mutate(price_per_carat = price/carat)
ppc
## # A tibble: 53,940 x 11
##    carat cut   color clarity depth table price     x     y     z price_per_carat
##    <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>           <dbl>
##  1 0.23  Ideal E     SI2      61.5    55   326  3.95  3.98  2.43           1417.
##  2 0.21  Prem… E     SI1      59.8    61   326  3.89  3.84  2.31           1552.
##  3 0.23  Good  E     VS1      56.9    65   327  4.05  4.07  2.31           1422.
##  4 0.290 Prem… I     VS2      62.4    58   334  4.2   4.23  2.63           1152.
##  5 0.31  Good  J     SI2      63.3    58   335  4.34  4.35  2.75           1081.
##  6 0.24  Very… J     VVS2     62.8    57   336  3.94  3.96  2.48           1400 
##  7 0.24  Very… I     VVS1     62.3    57   336  3.95  3.98  2.47           1400 
##  8 0.26  Very… H     SI1      61.9    55   337  4.07  4.11  2.53           1296.
##  9 0.22  Fair  E     VS2      65.1    61   337  3.87  3.78  2.49           1532.
## 10 0.23  Very… H     VS1      59.4    61   338  4     4.05  2.39           1470.
## # … with 53,930 more rows

3.

groupbycut <- diamonds %>%
  group_by(cut) %>%
  summarise(meanprice = mean(price), .groups = 'drop')
groupbycut
## # A tibble: 5 x 2
##   cut       meanprice
##   <ord>         <dbl>
## 1 Fair          4359.
## 2 Good          3929.
## 3 Very Good     3982.
## 4 Premium       4584.
## 5 Ideal         3458.

5.

groupbycolor <- diamonds %>%
  group_by(color) %>%
  summarise(meandepth = mean(depth), meantable = mean(table), .groups = 'drop')
groupbycolor
## # A tibble: 7 x 3
##   color meandepth meantable
##   <ord>     <dbl>     <dbl>
## 1 D          61.7      57.4
## 2 E          61.7      57.5
## 3 F          61.7      57.4
## 4 G          61.8      57.3
## 5 H          61.8      57.5
## 6 I          61.8      57.6
## 7 J          61.9      57.8

Extra Credit.

diamondcolor <- diamonds %>%
  left_join(groupbycolor)
## Joining, by = "color"
diamondcolor
## # A tibble: 53,940 x 12
##    carat cut   color clarity depth table price     x     y     z meandepth
##    <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>     <dbl>
##  1 0.23  Ideal E     SI2      61.5    55   326  3.95  3.98  2.43      61.7
##  2 0.21  Prem… E     SI1      59.8    61   326  3.89  3.84  2.31      61.7
##  3 0.23  Good  E     VS1      56.9    65   327  4.05  4.07  2.31      61.7
##  4 0.290 Prem… I     VS2      62.4    58   334  4.2   4.23  2.63      61.8
##  5 0.31  Good  J     SI2      63.3    58   335  4.34  4.35  2.75      61.9
##  6 0.24  Very… J     VVS2     62.8    57   336  3.94  3.96  2.48      61.9
##  7 0.24  Very… I     VVS1     62.3    57   336  3.95  3.98  2.47      61.8
##  8 0.26  Very… H     SI1      61.9    55   337  4.07  4.11  2.53      61.8
##  9 0.22  Fair  E     VS2      65.1    61   337  3.87  3.78  2.49      61.7
## 10 0.23  Very… H     VS1      59.4    61   338  4     4.05  2.39      61.8
## # … with 53,930 more rows, and 1 more variable: meantable <dbl>

6.

colorcarat <- diamonds %>%
  group_by(color) %>%
  summarise(meancarat = mean(carat), .groups = 'drop')
colorcarat
## # A tibble: 7 x 2
##   color meancarat
##   <ord>     <dbl>
## 1 D         0.658
## 2 E         0.658
## 3 F         0.737
## 4 G         0.771
## 5 H         0.912
## 6 I         1.03 
## 7 J         1.16

The color of diamond that seems to be largest on average is color J. The average of carats is 1.16.

7.

idealcut <- diamonds %>%
  filter(cut == "Ideal") %>%
  group_by(color) %>%
  summarise(countcolor = sum(color %in% c("D", "E", "F", "G", "H", "I", "J")), .groups = 'drop')
idealcut
## # A tibble: 7 x 2
##   color countcolor
##   <ord>      <int>
## 1 D           2834
## 2 E           3903
## 3 F           3826
## 4 G           4884
## 5 H           3115
## 6 I           2093
## 7 J            896

The color of diamond that occurs most frequently among ideal cuts is color G.

8.

clarityofdiamonds <- diamonds %>%
  group_by(clarity) %>%
  summarise(meantabpcar = mean(table/carat), .groups = 'drop')
clarityofdiamonds
## # A tibble: 8 x 2
##   clarity meantabpcar
##   <ord>         <dbl>
## 1 I1             56.3
## 2 SI2            69.1
## 3 SI1            89.6
## 4 VS2           103. 
## 5 VS1           107. 
## 6 VVS2          127. 
## 7 VVS1          141. 
## 8 IF            140.

The clarity of diamonds that has the largest average table per carat is VVS1.

9.

avgppc <- ppc %>%
  filter(price_per_carat > 10000) %>%
  summarise(meanprice_per_carat = mean(price_per_carat))

avgppc
## # A tibble: 1 x 1
##   meanprice_per_carat
##                 <dbl>
## 1              11132.

The average price per carat of diamonds that cost more than $10000 is $11132.05

10.

commonclarity <- ppc %>%
  filter(price_per_carat > 10000) %>%
  group_by(clarity) %>%
  summarise(claritycount = sum(clarity %in% c("IF", "VVS1", "VVS2", "VS2", "VS1")), .groups = 'drop')

commonclarity
## # A tibble: 5 x 2
##   clarity claritycount
##   <ord>          <int>
## 1 VS2               77
## 2 VS1              115
## 3 VVS2             167
## 4 VVS1             145
## 5 IF               113

From the diamonds that cost more than $10000, the most common clarity is VVS2.

Part III

data("ToothGrowth")
?ToothGrowth
str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
#view(ToothGrowth)

1.

The rows of this data set represents the guinea pigs tooth length, the supplement type, and dosage.

2.

The columns in this data set represents the number of observations from each variable. The tooth length is numerical. It identifies as continuous because the tooth length can be any value. The supplement type is categorical. The variable is not ordinal because there are only 2 levels. Orange juice and ascorbic acid. The last variable, dose amount, is numeric. This variable is discrete because there is a set dose amount that they give the guinea pigs.

3.

The response variable is the tooth length of the guinea pigs and the explanatory variable is the dose amounts of orange juice or ascorbic acid.

4.

H0: There is no difference between the supplement treatment and dosage levels of orange juice and ascorbic acid.

HA: There is a difference between the supplement treatment and dosage levels of orange juice and ascorbic acid.

5.

ggplot(ToothGrowth, aes(x = as.factor(dose), y = len, fill = supp))+
  geom_boxplot() +
  xlab("Dosage Levels") +
  ylab("Tooth Length")

6.

ggplot(ToothGrowth, aes(x = as.factor(dose), y = len, fill = supp))+
  geom_boxplot() +
  facet_grid(.~dose)+
  xlab("Dosage Levels") +
  ylab("Tooth Length")

7.

We can see from the data that orange juice is more effective in helping with tooth growth. We can also see that there is a positive correlation between the increase in dose levels and the tooth length. Some trends are that the more dosages of orange juice and ascorbic acid, the length of the tooth is longer.

8.

From the data set, we can assume to reject the null. Looking at the boxplots we can see that their is a difference between the supplement treatment and dosage levels. When they gave the guinea pigs 0.5 and 1 mg/day we can see that orange juice was the more effective supplement. When they gave the guinea pigs 2 mg/day it was interesting because the mean value was the same, but the ascorbic acid worked better. What was also interesting is that the mean of 1 mg/day of orange juice was almost the same as giving them 2 mg/day of orange juice or ascorbic acid.