library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
#view(diamonds)
avg_depth_price <- diamonds %>%
summarise(meandepth = mean(depth),meanprice = mean(price))
avg_depth_price
## # A tibble: 1 x 2
## meandepth meanprice
## <dbl> <dbl>
## 1 61.7 3933.
ppc <- diamonds %>%
mutate(price_per_carat = price/carat)
ppc
## # A tibble: 53,940 x 11
## carat cut color clarity depth table price x y z price_per_carat
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417.
## 2 0.21 Prem… E SI1 59.8 61 326 3.89 3.84 2.31 1552.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422.
## 4 0.290 Prem… I VS2 62.4 58 334 4.2 4.23 2.63 1152.
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 1081.
## 6 0.24 Very… J VVS2 62.8 57 336 3.94 3.96 2.48 1400
## 7 0.24 Very… I VVS1 62.3 57 336 3.95 3.98 2.47 1400
## 8 0.26 Very… H SI1 61.9 55 337 4.07 4.11 2.53 1296.
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 1532.
## 10 0.23 Very… H VS1 59.4 61 338 4 4.05 2.39 1470.
## # … with 53,930 more rows
groupbycut <- diamonds %>%
group_by(cut) %>%
summarise(meanprice = mean(price), .groups = 'drop')
groupbycut
## # A tibble: 5 x 2
## cut meanprice
## <ord> <dbl>
## 1 Fair 4359.
## 2 Good 3929.
## 3 Very Good 3982.
## 4 Premium 4584.
## 5 Ideal 3458.
groupbycolor <- diamonds %>%
group_by(color) %>%
summarise(meandepth = mean(depth), meantable = mean(table), .groups = 'drop')
groupbycolor
## # A tibble: 7 x 3
## color meandepth meantable
## <ord> <dbl> <dbl>
## 1 D 61.7 57.4
## 2 E 61.7 57.5
## 3 F 61.7 57.4
## 4 G 61.8 57.3
## 5 H 61.8 57.5
## 6 I 61.8 57.6
## 7 J 61.9 57.8
diamondcolor <- diamonds %>%
left_join(groupbycolor)
## Joining, by = "color"
diamondcolor
## # A tibble: 53,940 x 12
## carat cut color clarity depth table price x y z meandepth
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 61.7
## 2 0.21 Prem… E SI1 59.8 61 326 3.89 3.84 2.31 61.7
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 61.7
## 4 0.290 Prem… I VS2 62.4 58 334 4.2 4.23 2.63 61.8
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 61.9
## 6 0.24 Very… J VVS2 62.8 57 336 3.94 3.96 2.48 61.9
## 7 0.24 Very… I VVS1 62.3 57 336 3.95 3.98 2.47 61.8
## 8 0.26 Very… H SI1 61.9 55 337 4.07 4.11 2.53 61.8
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 61.7
## 10 0.23 Very… H VS1 59.4 61 338 4 4.05 2.39 61.8
## # … with 53,930 more rows, and 1 more variable: meantable <dbl>
colorcarat <- diamonds %>%
group_by(color) %>%
summarise(meancarat = mean(carat), .groups = 'drop')
colorcarat
## # A tibble: 7 x 2
## color meancarat
## <ord> <dbl>
## 1 D 0.658
## 2 E 0.658
## 3 F 0.737
## 4 G 0.771
## 5 H 0.912
## 6 I 1.03
## 7 J 1.16
The color of diamond that seems to be largest on average is color J. The average of carats is 1.16.
idealcut <- diamonds %>%
filter(cut == "Ideal") %>%
group_by(color) %>%
summarise(countcolor = sum(color %in% c("D", "E", "F", "G", "H", "I", "J")), .groups = 'drop')
idealcut
## # A tibble: 7 x 2
## color countcolor
## <ord> <int>
## 1 D 2834
## 2 E 3903
## 3 F 3826
## 4 G 4884
## 5 H 3115
## 6 I 2093
## 7 J 896
The color of diamond that occurs most frequently among ideal cuts is color G.
clarityofdiamonds <- diamonds %>%
group_by(clarity) %>%
summarise(meantabpcar = mean(table/carat), .groups = 'drop')
clarityofdiamonds
## # A tibble: 8 x 2
## clarity meantabpcar
## <ord> <dbl>
## 1 I1 56.3
## 2 SI2 69.1
## 3 SI1 89.6
## 4 VS2 103.
## 5 VS1 107.
## 6 VVS2 127.
## 7 VVS1 141.
## 8 IF 140.
The clarity of diamonds that has the largest average table per carat is VVS1.
avgppc <- ppc %>%
filter(price_per_carat > 10000) %>%
summarise(meanprice_per_carat = mean(price_per_carat))
avgppc
## # A tibble: 1 x 1
## meanprice_per_carat
## <dbl>
## 1 11132.
The average price per carat of diamonds that cost more than $10000 is $11132.05
commonclarity <- ppc %>%
filter(price_per_carat > 10000) %>%
group_by(clarity) %>%
summarise(claritycount = sum(clarity %in% c("IF", "VVS1", "VVS2", "VS2", "VS1")), .groups = 'drop')
commonclarity
## # A tibble: 5 x 2
## clarity claritycount
## <ord> <int>
## 1 VS2 77
## 2 VS1 115
## 3 VVS2 167
## 4 VVS1 145
## 5 IF 113
From the diamonds that cost more than $10000, the most common clarity is VVS2.
data("ToothGrowth")
?ToothGrowth
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
#view(ToothGrowth)
The rows of this data set represents the guinea pigs tooth length, the supplement type, and dosage.
The columns in this data set represents the number of observations from each variable. The tooth length is numerical. It identifies as continuous because the tooth length can be any value. The supplement type is categorical. The variable is not ordinal because there are only 2 levels. Orange juice and ascorbic acid. The last variable, dose amount, is numeric. This variable is discrete because there is a set dose amount that they give the guinea pigs.
The response variable is the tooth length of the guinea pigs and the explanatory variable is the dose amounts of orange juice or ascorbic acid.
H0: There is no difference between the supplement treatment and dosage levels of orange juice and ascorbic acid.
HA: There is a difference between the supplement treatment and dosage levels of orange juice and ascorbic acid.
ggplot(ToothGrowth, aes(x = as.factor(dose), y = len, fill = supp))+
geom_boxplot() +
xlab("Dosage Levels") +
ylab("Tooth Length")
ggplot(ToothGrowth, aes(x = as.factor(dose), y = len, fill = supp))+
geom_boxplot() +
facet_grid(.~dose)+
xlab("Dosage Levels") +
ylab("Tooth Length")
We can see from the data that orange juice is more effective in helping with tooth growth. We can also see that there is a positive correlation between the increase in dose levels and the tooth length. Some trends are that the more dosages of orange juice and ascorbic acid, the length of the tooth is longer.
From the data set, we can assume to reject the null. Looking at the boxplots we can see that their is a difference between the supplement treatment and dosage levels. When they gave the guinea pigs 0.5 and 1 mg/day we can see that orange juice was the more effective supplement. When they gave the guinea pigs 2 mg/day it was interesting because the mean value was the same, but the ascorbic acid worked better. What was also interesting is that the mean of 1 mg/day of orange juice was almost the same as giving them 2 mg/day of orange juice or ascorbic acid.