Chunk 1 Load Libraries and Data

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("InsectSprays")
Buggies<-InsectSprays
data("iris")
purpleeyeball<-iris 
# this should be invisible

#Chunk 2 Data Exploration

?iris
## starting httpd help server ... done
#the three species being explored in this data set are Iris setosa, versicolor, and virginica
?InsectSprays
#there are 6 levels in the variable "Spray" that represent the different insecticides used  
head(Buggies)
##   count spray
## 1    10     A
## 2     7     A
## 3    20     A
## 4    14     A
## 5    14     A
## 6    12     A
str(Buggies)
## 'data.frame':    72 obs. of  2 variables:
##  $ count: num  10 7 20 14 14 12 10 23 17 20 ...
##  $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(Buggies)
##      count       spray 
##  Min.   : 0.00   A:12  
##  1st Qu.: 3.00   B:12  
##  Median : 7.00   C:12  
##  Mean   : 9.50   D:12  
##  3rd Qu.:14.25   E:12  
##  Max.   :26.00   F:12
head(purpleeyeball)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(purpleeyeball)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(purpleeyeball)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
LeafData_corrected<-read.csv("~/Biostats 2024/Data/LeafData_corrected.csv")
#these functions help you actually understand what the data means. the head function allows you to see the beginning of your data, the str function breaks down the data in words, and the summary shows different essential calculations, such as median, in order to help you build box plots and graphs later on in your code

#Chunk 3 My First Boxplot

#ggplot(data=Buggies, aes(x="spray", y="count")) + geom_boxplot()
#ggplot(Buggies, aes(x=spray, y=count)) + geom_boxplot()
ggplot(data=Buggies, aes(x=spray, y=count)) + geom_boxplot()

#the black line within the boxes is representative of the median, the boxes represent 50% of the data, and the whiskers represent the other 50% of the data with both ends acting as extreme points, representing the highest or lowest value recorded (that is not a technical outlier)
ggplot(data=purpleeyeball,aes(x=Species, y=Sepal.Length, fill=Species)) +geom_violin() + ylab("Sepal Length (mm)")

# Chunk 4 Histograms

ggplot(data=purpleeyeball, aes(x=Sepal.Width)) + geom_histogram(bins=12,color="black",fill="white") + xlab("Sepal Width (mm)")

differentOne<-ggplot(data=purpleeyeball, aes(x=Sepal.Width)) + geom_histogram(bins=10,color="blue", fill="green") + xlab("Sepal Width (mm)")
differentOne

differentTwo<-ggplot(data=purpleeyeball, aes(x=Sepal.Width)) + geom_histogram(bins=20, color="purple", fill="pink") + xlab("'Sepal Width (mm)")
differentTwo

hist(purpleeyeball$Sepal.Length) 

#Chunk 5 Dplyr to manipulate data

library(dplyr)
pine<-filter(LeafData_corrected, Species == "pine")
pine
##    Leaf.ID Name Species Deciduous Length Width Damage
## 1        1   MG    pine        no    8.2   0.1      2
## 2        2   MG    pine        no    8.4   0.1      2
## 3        3   MG    pine        no    6.0   0.1      1
## 4        4   MG    pine        no    6.5   0.1      1
## 5        5   MG    pine        no    5.9   0.1      2
## 6        1   SA    pine        no    8.8   0.1      0
## 7        2   SA    pine        no    8.8   0.1      1
## 8        3   SA    pine        no    8.5   0.1      0
## 9        4   SA    pine        no    8.6   0.1      0
## 10       5   SA    pine        no    8.8   0.1      0
## 11       1   MH    pine        no    8.0   0.1      0
## 12       2   MH    pine        no    9.0   0.1      0
## 13       3   MH    pine        no    7.3   0.1      0
## 14       4   MH    pine        no    8.0   0.1      0
## 15       5   MH    pine        no    9.4   0.1      0
## 16       1   IS    pine        no    6.2   0.1      0
## 17       2   IS    pine        no    5.2   0.1      0
## 18       3   IS    pine        no    5.5   0.1      0
## 19       4   IS    pine        no    6.1   0.1      0
## 20       5   IS    pine        no    4.2   0.1      0
## 21       1   AR    pine        no    6.2   0.1      0
## 22       2   AR    pine        no    6.7   0.1      0
## 23       3   AR    pine        no    7.5   0.1      0
## 24       4   AR    pine        no    6.8   0.1      0
## 25       5   AR    pine        no    6.9   0.1      0
## 26       6   JW    pine        no    6.0   0.1      0
## 27       7   JW    pine        no    6.0   0.1      0
## 28       8   JW    pine        no    5.8   0.1      0
## 29       9   JW    pine        no    5.6   0.1      0
## 30      10   JW    pine        no    5.9   0.1      0
## 31       1  AVB    pine        no    7.6   0.1      1
## 32       2  AVB    pine        no    8.0   0.1      1
## 33       3  AVB    pine        no    8.5   0.1      0
## 34       4  AVB    pine        no    7.4   0.1      1
## 35       5  AVB    pine        no    7.6   0.1      1
## 36       1   EO    pine        no    8.9   0.1      0
## 37       2   EO    pine        no    8.9   0.1      0
## 38       3   EO    pine        no    8.9   0.1      0
## 39       4   EO    pine        no    8.8   0.1      0
## 40       5   EO    pine        no    8.8   0.1      0
## 41       6  STA    pine        no    7.8   0.1      0
## 42       7  STA    pine        no    7.9   0.1      0
## 43       8  STA    pine        no    7.9   0.1      0
## 44       9  STA    pine        no    7.7   0.1      0
## 45      10  STA    pine        no    7.8   0.1      0
## 46       6   BS    pine        no    8.4   0.1      0
## 47       7   BS    pine        no    8.1   0.1      0
## 48       8   BS    pine        no    7.3   0.1      0
## 49       9   BS    pine        no    5.2   0.1      0
## 50      10   BS    pine        no    5.5   0.1      0
## 51       1  AEW    pine        no    8.5   0.1      0
## 52       2  AEW    pine        no    8.7   0.1      0
## 53       3  AEW    pine        no    8.6   0.1      0
## 54       4  AEW    pine        no    8.6   0.1      0
## 55       5  AEW    pine        no    8.8   0.1      0
notPine<-filter(LeafData_corrected,Species == "buckeye")
notPine
##    Leaf.ID Name Species Deciduous Length Width Damage
## 1        6   MH buckeye       yes   19.2   6.1      2
## 2        7   MH buckeye       yes   16.0   5.5      2
## 3        8   MH buckeye       yes   13.7   4.3      1
## 4        9   MH buckeye       yes   21.4   7.3      2
## 5       10   MH buckeye       yes   19.6   6.7      1
## 6        6   IS buckeye       yes   13.7   4.8      3
## 7        7   IS buckeye       yes   26.4   9.9      4
## 8        8   IS buckeye       yes   19.1   6.5      1
## 9        9   IS buckeye       yes   18.2   6.8      2
## 10      10   IS buckeye       yes   22.8   7.8      3
## 11       6   AR buckeye       yes   22.6   8.4      1
## 12       7   AR buckeye       yes   20.5   7.6      3
## 13       8   AR buckeye       yes   16.0   5.9      1
## 14       9   AR buckeye       yes   21.5   7.9      1
## 15      10   AR buckeye       yes   16.7   5.8      1
## 16       1   JW buckeye       yes   14.7   5.8      2
## 17       2   JW buckeye       yes   10.9   3.9      0
## 18       3   JW buckeye       yes   11.5   4.2      2
## 19       4   JW buckeye       yes   27.5   9.0      1
## 20       5   JW buckeye       yes   24.0   8.5      1
## 21       6  AVB buckeye       yes   24.0   9.5      1
## 22       7  AVB buckeye       yes   26.5  10.5      3
## 23       8  AVB buckeye       yes   25.0   9.0      4
## 24       9  AVB buckeye       yes   14.0   6.0      3
## 25      10  AVB buckeye       yes   15.3   5.5      2
## 26       6   EO buckeye       yes   19.5   6.5      2
## 27       7   EO buckeye       yes   18.5   6.8      1
## 28       8   EO buckeye       yes   23.0   8.0      0
## 29       9   EO buckeye       yes   26.0   8.5      0
## 30      10   EO buckeye       yes   25.5   8.0      1
## 31       1  STA buckeye       yes   27.7   8.8      5
## 32       2  STC buckeye       yes   26.5   8.0      1
## 33       3  STA buckeye       yes   28.3   8.5      1
## 34       4  STA buckeye       yes   26.6   8.2      2
## 35       5  STA buckeye       yes   24.3   7.3      1
## 36       1   BS buckeye       yes   24.9   7.6      1
## 37       2   BS buckeye       yes   16.6   6.7      2
## 38       3   BS buckeye       yes   18.0   6.7      0
## 39       4   BS buckeye       yes   16.2   5.6      2
## 40       5   BS buckeye       yes   25.2   8.3      0
## 41       6  AEW buckeye       yes   24.9   8.2      1
## 42       7  AEW buckeye       yes   18.0   6.5      1
## 43       8  AEW buckeye       yes   25.1   8.1      1
## 44       9  AEW buckeye       yes   23.2   8.5      1
## 45      10  AEW buckeye       yes   21.7   8.8      3
ggplot(pine, aes(x=Length)) + geom_histogram(bins=12, color="black", fill="white") + xlab("Pine Leaf Length (cm)")

ggplot(notPine, aes(x=Length)) + geom_histogram(bins=12, color="black", fill="white") + xlab("Buckeye Leaf Length (cm)")

#buckeye is more variable because there is the values are less consistent than the pine values. the graph has a less consistent pattern as well, compared to the pine values. 

#Chunk 6 Summary Stats

mean(purpleeyeball$Sepal.Width)
## [1] 3.057333
sd(purpleeyeball$Sepal.Width)
## [1] 0.4358663
median(purpleeyeball$Sepal.Width)
## [1] 3
IQR(purpleeyeball$Sepal.Width)
## [1] 0.5
mean(pine$Length)
## [1] 7.472727
sd(pine$Length)
## [1] 1.287037
median(pine$Length)
## [1] 7.8
IQR(pine$Length)
## [1] 2.4
mean(notPine$Length)
## [1] 20.9
sd(notPine$Length)
## [1] 4.781261
median(notPine$Length)
## [1] 21.5
IQR(notPine$Length)
## [1] 8.3

#Bonus!

dplyr::summarise(purpleeyeball, avg=mean(Sepal.Length))
##        avg
## 1 5.843333
dplyr::summarise(purpleeyeball, avg=mean(Sepal.Width))
##        avg
## 1 3.057333
dplyr::summarise(purpleeyeball, avg=mean(Petal.Length))
##     avg
## 1 3.758
dplyr::summarise(purpleeyeball, avg=mean(Petal.Width))
##        avg
## 1 1.199333