mid-term-1.R

library(tidyverse)

## -- Attaching packages -------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts ----------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library("dplyr")
library(ggplot2)
#question 1
mean_diamonds = diamonds %>%
  summarise(mean_depth = mean(depth),mean_price = mean(price))
mean_diamonds

## # A tibble: 1 x 2
##   mean_depth mean_price
##        <dbl>      <dbl>
## 1       61.7      3933.

#question 2
diamonds_cost_over_carat=diamonds %>%
  mutate(cost_over_carat = price/carat)
diamonds_cost_over_carat

## # A tibble: 53,940 x 11
##    carat cut   color clarity depth table price     x     y     z cost_over_carat
##    <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>           <dbl>
##  1 0.23  Ideal E     SI2      61.5    55   326  3.95  3.98  2.43           1417.
##  2 0.21  Prem~ E     SI1      59.8    61   326  3.89  3.84  2.31           1552.
##  3 0.23  Good  E     VS1      56.9    65   327  4.05  4.07  2.31           1422.
##  4 0.290 Prem~ I     VS2      62.4    58   334  4.2   4.23  2.63           1152.
##  5 0.31  Good  J     SI2      63.3    58   335  4.34  4.35  2.75           1081.
##  6 0.24  Very~ J     VVS2     62.8    57   336  3.94  3.96  2.48           1400 
##  7 0.24  Very~ I     VVS1     62.3    57   336  3.95  3.98  2.47           1400 
##  8 0.26  Very~ H     SI1      61.9    55   337  4.07  4.11  2.53           1296.
##  9 0.22  Fair  E     VS2      65.1    61   337  3.87  3.78  2.49           1532.
## 10 0.23  Very~ H     VS1      59.4    61   338  4     4.05  2.39           1470.
## # ... with 53,930 more rows

#question 3
diamonds_group_by_cut=diamonds %>%
  group_by(cut) %>% summarize( mean_price = mean(price))

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_group_by_cut

## # A tibble: 5 x 2
##   cut       mean_price
##   <ord>          <dbl>
## 1 Fair           4359.
## 2 Good           3929.
## 3 Very Good      3982.
## 4 Premium        4584.
## 5 Ideal          3458.

#question 4
answer="free bingo"
answer

## [1] "free bingo"

#question 5
diamonds_group_by_color= diamonds %>% group_by(color) %>% summarize( mean_price = mean(price), mean_table = mean(table))

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_group_by_color

## # A tibble: 7 x 3
##   color mean_price mean_table
##   <ord>      <dbl>      <dbl>
## 1 D          3170.       57.4
## 2 E          3077.       57.5
## 3 F          3725.       57.4
## 4 G          3999.       57.3
## 5 H          4487.       57.5
## 6 I          5092.       57.6
## 7 J          5324.       57.8

#question 6
diamonds_group_by_color_for_size= diamonds %>% group_by(color) %>% summarize( mean_size = mean(carat))

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_group_by_color_for_size

## # A tibble: 7 x 2
##   color mean_size
##   <ord>     <dbl>
## 1 D         0.658
## 2 E         0.658
## 3 F         0.737
## 4 G         0.771
## 5 H         0.912
## 6 I         1.03 
## 7 J         1.16

#on average J has the largest diamonds

#question 7
diamonds_type_J_cut_ideal = diamonds %>% filter(cut=="Ideal") %>% group_by(color) %>% summarize( count = n())

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_type_J_cut_ideal

## # A tibble: 7 x 2
##   color count
##   <ord> <int>
## 1 D      2834
## 2 E      3903
## 3 F      3826
## 4 G      4884
## 5 H      3115
## 6 I      2093
## 7 J       896

#from this we know the color G has the most Ideal cut diamonds


#problem 8
diamonds_group_by_clarity= diamonds %>% group_by(clarity) %>% mutate(table_per_carat = table/carat) %>% summarize( mean_table_per_carat = mean(table_per_carat))

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_group_by_clarity

## # A tibble: 8 x 2
##   clarity mean_table_per_carat
##   <ord>                  <dbl>
## 1 I1                      56.3
## 2 SI2                     69.1
## 3 SI1                     89.6
## 4 VS2                    103. 
## 5 VS1                    107. 
## 6 VVS2                   127. 
## 7 VVS1                   141. 
## 8 IF                     140.

#for this the clearity "vvs1" has the highest average table over carat.


#problem 9
diamonds_price_G_10000= diamonds %>% filter(price>=10000) %>% summarize( average_price = mean(price))
diamonds_price_G_10000

## # A tibble: 1 x 1
##   average_price
##           <dbl>
## 1        13640.

# the average price is 13640 dollars


#problem 10
diamonds_price_G_10000_with_clarity= diamonds %>% filter(price>=10000) %>% group_by(clarity) %>% summarize( count = n())

## `summarise()` ungrouping output (override with `.groups` argument)

diamonds_price_G_10000_with_clarity

## # A tibble: 8 x 2
##   clarity count
##   <ord>   <int>
## 1 I1         30
## 2 SI2      1239
## 3 SI1      1184
## 4 VS2      1156
## 5 VS1       747
## 6 VVS2      452
## 7 VVS1      247
## 8 IF        168

#the most common clarity is SI2
#part 3


# Load in the data
data("ToothGrowth")


# Learn about the data
?ToothGrowth

## starting httpd help server ...

##  done

# Structure of the dataset
str(ToothGrowth)

## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...

# Look at the data
View(ToothGrowth)

#problem 1
#this data represents the length of odontoblasts in 60 guinea pigs. the data varys by how much vitamin c each animal is given and by which method.

#problem 2
#the len is the length of the cells, this is a numerical column. Supp is what supplement they where given, either OJ or Vc this column is categorical. finally there is dose or the amount of the suplement they where given, this is also numerical

#problem 3
#len is explanatory and the others are explanatory

#problem 4
#i think that the vitamin C will increase tooth growth and I think the maximum vitamin dose will incease it the most IE. VC 2 mg/day

#problem 5
ggplot(ToothGrowth,aes( x = supp, y = len, color=supp))+
  geom_boxplot()+
  labs(y="length of teeth cells")+
  ggtitle("tooth lenght across both treaments")

#problem 6
ggplot(ToothGrowth,aes( x = supp, y = len, color=supp))+
  geom_boxplot()+
  facet_wrap(~ dose)+
  labs(x="dosage levels in mg  see (top of each chart)",y="length of teeth cells")+
  ggtitle("tooth lenght across both treaments faceted across all levels of dosage")

#problem 7
#well to say im surprised is an understatement looks like OJ is, in most cases, is a far more reliable and effective supplement than VC. as the dosage increases we see in increase in tooth length
# we also see OJ out pre form VC constantly until the dose hits 2 mg. even then a case can be made to say OJ is still a better supplement. as it has less variance than VC at 2 mg.

#problem 8
# as stated before, im surprised OJ did so well, Although id like to argue that in some sense my hypothesis was correct, because the largest tooth length was achieved by VC at 2 mg
# but I will admit OJ Preformed much better than I anticipated.

mid-term-1.R

jadri

2020-09-28