library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("dplyr")
library(ggplot2)
#question 1
mean_diamonds = diamonds %>%
summarise(mean_depth = mean(depth),mean_price = mean(price))
mean_diamonds
## # A tibble: 1 x 2
## mean_depth mean_price
## <dbl> <dbl>
## 1 61.7 3933.
#question 2
diamonds_cost_over_carat=diamonds %>%
mutate(cost_over_carat = price/carat)
diamonds_cost_over_carat
## # A tibble: 53,940 x 11
## carat cut color clarity depth table price x y z cost_over_carat
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417.
## 2 0.21 Prem~ E SI1 59.8 61 326 3.89 3.84 2.31 1552.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422.
## 4 0.290 Prem~ I VS2 62.4 58 334 4.2 4.23 2.63 1152.
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 1081.
## 6 0.24 Very~ J VVS2 62.8 57 336 3.94 3.96 2.48 1400
## 7 0.24 Very~ I VVS1 62.3 57 336 3.95 3.98 2.47 1400
## 8 0.26 Very~ H SI1 61.9 55 337 4.07 4.11 2.53 1296.
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 1532.
## 10 0.23 Very~ H VS1 59.4 61 338 4 4.05 2.39 1470.
## # ... with 53,930 more rows
#question 3
diamonds_group_by_cut=diamonds %>%
group_by(cut) %>% summarize( mean_price = mean(price))
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_group_by_cut
## # A tibble: 5 x 2
## cut mean_price
## <ord> <dbl>
## 1 Fair 4359.
## 2 Good 3929.
## 3 Very Good 3982.
## 4 Premium 4584.
## 5 Ideal 3458.
#question 4
answer="free bingo"
answer
## [1] "free bingo"
#question 5
diamonds_group_by_color= diamonds %>% group_by(color) %>% summarize( mean_price = mean(price), mean_table = mean(table))
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_group_by_color
## # A tibble: 7 x 3
## color mean_price mean_table
## <ord> <dbl> <dbl>
## 1 D 3170. 57.4
## 2 E 3077. 57.5
## 3 F 3725. 57.4
## 4 G 3999. 57.3
## 5 H 4487. 57.5
## 6 I 5092. 57.6
## 7 J 5324. 57.8
#question 6
diamonds_group_by_color_for_size= diamonds %>% group_by(color) %>% summarize( mean_size = mean(carat))
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_group_by_color_for_size
## # A tibble: 7 x 2
## color mean_size
## <ord> <dbl>
## 1 D 0.658
## 2 E 0.658
## 3 F 0.737
## 4 G 0.771
## 5 H 0.912
## 6 I 1.03
## 7 J 1.16
#on average J has the largest diamonds
#question 7
diamonds_type_J_cut_ideal = diamonds %>% filter(cut=="Ideal") %>% group_by(color) %>% summarize( count = n())
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_type_J_cut_ideal
## # A tibble: 7 x 2
## color count
## <ord> <int>
## 1 D 2834
## 2 E 3903
## 3 F 3826
## 4 G 4884
## 5 H 3115
## 6 I 2093
## 7 J 896
#from this we know the color G has the most Ideal cut diamonds
#problem 8
diamonds_group_by_clarity= diamonds %>% group_by(clarity) %>% mutate(table_per_carat = table/carat) %>% summarize( mean_table_per_carat = mean(table_per_carat))
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_group_by_clarity
## # A tibble: 8 x 2
## clarity mean_table_per_carat
## <ord> <dbl>
## 1 I1 56.3
## 2 SI2 69.1
## 3 SI1 89.6
## 4 VS2 103.
## 5 VS1 107.
## 6 VVS2 127.
## 7 VVS1 141.
## 8 IF 140.
#for this the clearity "vvs1" has the highest average table over carat.
#problem 9
diamonds_price_G_10000= diamonds %>% filter(price>=10000) %>% summarize( average_price = mean(price))
diamonds_price_G_10000
## # A tibble: 1 x 1
## average_price
## <dbl>
## 1 13640.
# the average price is 13640 dollars
#problem 10
diamonds_price_G_10000_with_clarity= diamonds %>% filter(price>=10000) %>% group_by(clarity) %>% summarize( count = n())
## `summarise()` ungrouping output (override with `.groups` argument)
diamonds_price_G_10000_with_clarity
## # A tibble: 8 x 2
## clarity count
## <ord> <int>
## 1 I1 30
## 2 SI2 1239
## 3 SI1 1184
## 4 VS2 1156
## 5 VS1 747
## 6 VVS2 452
## 7 VVS1 247
## 8 IF 168
#the most common clarity is SI2
#part 3
# Load in the data
data("ToothGrowth")
# Learn about the data
?ToothGrowth
## starting httpd help server ...
## done
# Structure of the dataset
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
# Look at the data
View(ToothGrowth)
#problem 1
#this data represents the length of odontoblasts in 60 guinea pigs. the data varys by how much vitamin c each animal is given and by which method.
#problem 2
#the len is the length of the cells, this is a numerical column. Supp is what supplement they where given, either OJ or Vc this column is categorical. finally there is dose or the amount of the suplement they where given, this is also numerical
#problem 3
#len is explanatory and the others are explanatory
#problem 4
#i think that the vitamin C will increase tooth growth and I think the maximum vitamin dose will incease it the most IE. VC 2 mg/day
#problem 5
ggplot(ToothGrowth,aes( x = supp, y = len, color=supp))+
geom_boxplot()+
labs(y="length of teeth cells")+
ggtitle("tooth lenght across both treaments")

#problem 6
ggplot(ToothGrowth,aes( x = supp, y = len, color=supp))+
geom_boxplot()+
facet_wrap(~ dose)+
labs(x="dosage levels in mg see (top of each chart)",y="length of teeth cells")+
ggtitle("tooth lenght across both treaments faceted across all levels of dosage")

#problem 7
#well to say im surprised is an understatement looks like OJ is, in most cases, is a far more reliable and effective supplement than VC. as the dosage increases we see in increase in tooth length
# we also see OJ out pre form VC constantly until the dose hits 2 mg. even then a case can be made to say OJ is still a better supplement. as it has less variance than VC at 2 mg.
#problem 8
# as stated before, im surprised OJ did so well, Although id like to argue that in some sense my hypothesis was correct, because the largest tooth length was achieved by VC at 2 mg
# but I will admit OJ Preformed much better than I anticipated.