#Load Libraries and Data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
Fungi<-read.csv("~/Biostats 2024/Data/Fungi.csv")
Fungi
## IsolateID FungusID Temperature AveDaily
## 1 A1028 Fusarium B 20 7.700000
## 2 A1028 Fusarium B 25 9.686667
## 3 A1030 Fusarium A 20 7.043333
## 4 A1030 Fusarium A 25 9.200000
## 5 A1048 Epicoccum 20 8.930000
## 6 A1048 Epicoccum 25 9.156667
## 7 A1101 Fusarium D 20 7.586667
## 8 A1101 Fusarium D 25 10.653333
## 9 A1113 Clonostachys 20 5.500000
## 10 A1113 Clonostachys 25 7.400000
## 11 A1130 Fusarium C 20 6.976667
## 12 A1130 Fusarium C 25 9.856667
## 13 A2007 Pleosporales 20 7.133333
## 14 A2007 Pleosporales 25 7.756667
## 15 A2012 Epicoccum 20 9.765000
## 16 A2012 Epicoccum 25 9.930000
## 17 A2020 Chalastospora 20 7.333333
## 18 A2020 Chalastospora 25 8.363333
## 19 A2033 Nigrospora 20 10.622222
## 20 A2033 Nigrospora 25 9.588889
## 21 A2040 Didymella 20 6.800000
## 22 A2040 Didymella 25 7.533333
## 23 A2048 Fusarium A 20 6.888889
## 24 A2048 Fusarium A 25 9.811111
## 25 A2078 Fusarium B 20 8.876667
## 26 A2078 Fusarium B 25 10.530000
## 27 A2084 Fusarium D 20 7.756667
## 28 A2084 Fusarium D 25 10.090000
## 29 A2096 Fusarium C 20 6.600000
## 30 A2096 Fusarium C 25 9.356667
## 31 A4024 Alternaria 20 4.956667
## 32 A4024 Alternaria 25 6.123333
## 33 A4025 Nigrospora 20 9.120000
## 34 A4025 Nigrospora 25 4.410000
## 35 A4089 Fusarium B 20 8.122222
## 36 A4089 Fusarium B 25 10.588889
## 37 A5002 Fusarium A 20 6.690000
## 38 A5002 Fusarium A 25 9.546667
## 39 A5010 Fusarium B 20 8.536667
## 40 A5010 Fusarium B 25 10.433333
## 41 A5015 Didymella 20 6.566667
## 42 A5015 Didymella 25 7.413333
## 43 A5022 Alternaria 20 7.233333
## 44 A5022 Alternaria 25 8.050000
## 45 A5026 Nigrospora 20 10.610000
## 46 A5026 Nigrospora 25 6.700000
## 47 A5108 Fusarium D 20 8.633333
## 48 A5108 Fusarium D 25 10.800000
## 49 B1031 Fusarium C 20 6.823333
## 50 B1031 Fusarium C 25 9.756667
## 51 B1032 Chalastospora 20 4.510000
## 52 B1032 Chalastospora 25 5.490000
## 53 B1033 Alternaria 20 7.313333
## 54 B1033 Alternaria 25 8.710000
## 55 B1040 Fusarium B 20 7.611111
## 56 B1040 Fusarium B 25 9.355556
## 57 B1066 Fusarium B 20 9.315000
## 58 B1066 Fusarium B 25 10.915000
## 59 B1078 Fusarium A 20 6.896667
## 60 B1078 Fusarium A 25 9.456667
## 61 B1111 Fusarium B 20 7.411111
## 62 B1111 Fusarium B 25 10.111111
## 63 B1150 Fusarium D 20 7.543333
## 64 B1150 Fusarium D 25 9.623333
## 65 B2076 Fusarium C 20 7.203333
## 66 B2076 Fusarium C 25 9.776667
## 67 B3027 Fusarium B 20 8.320000
## 68 B3027 Fusarium B 25 10.650000
## 69 B3028 Didymella 20 6.713333
## 70 B3028 Didymella 25 7.963333
## 71 B3030 Fusarium A 20 7.123333
## 72 B3030 Fusarium A 25 9.846667
## 73 B3031 Chalastospora 20 7.823333
## 74 B3031 Chalastospora 25 9.110000
## 75 B3087 Fusarium A 20 6.188889
## 76 B3087 Fusarium A 25 9.433333
## 77 B3098 Chalastospora 20 7.976667
## 78 B3098 Chalastospora 25 8.933333
## 79 B3099 Clonostachys 20 8.603333
## 80 B3099 Clonostachys 25 9.336667
## 81 B5016 Alternaria 20 8.086667
## 82 B5016 Alternaria 25 9.390000
## 83 B5018 Fusarium A 20 6.933333
## 84 B5018 Fusarium A 25 9.315000
## 85 B5046 Alternaria 20 6.813333
## 86 B5046 Alternaria 25 6.100000
## 87 B5085 Nigrospora 20 9.410000
## 88 B5085 Nigrospora 25 5.956667
## 89 B5086 Epicoccum 20 9.376667
## 90 B5086 Epicoccum 25 10.123333
## 91 B5102 Clonostachys 20 3.233333
## 92 B5102 Clonostachys 25 5.430000
## 93 B5114 Clonostachys 20 7.633333
## 94 B5114 Clonostachys 25 9.200000
## 95 C3038 Nigrospora 20 8.823333
## 96 C3038 Nigrospora 25 6.386667
## 97 C3053 Fusarium C 20 6.556667
## 98 C3053 Fusarium C 25 8.153333
## 99 C4005 Epicoccum 20 8.188889
## 100 C4005 Epicoccum 25 10.066667
## 101 C4007 Epicoccum 20 9.435000
## 102 C4007 Epicoccum 25 9.866667
## 103 C4011 Fusarium B 20 7.322222
## 104 C4011 Fusarium B 25 10.111111
## 105 C4012 Fusarium A 20 6.688889
## 106 C4012 Fusarium A 25 9.677778
## 107 C4014 Fusarium C 20 6.833333
## 108 C4014 Fusarium C 25 9.590000
## 109 C4016 Fusarium D 20 6.133333
## 110 C4016 Fusarium D 25 8.676667
## 111 C4052 Alternaria 20 7.590000
## 112 C4052 Alternaria 25 7.933333
## 113 C4062 Didymella 20 6.413333
## 114 C4062 Didymella 25 6.870000
## 115 C4079 Fusarium A 20 7.136667
## 116 C4079 Fusarium A 25 9.550000
## 117 C4080 Chalastospora 20 8.070000
## 118 C4080 Chalastospora 25 8.533333
## 119 C5011 Nigrospora 20 11.043333
## 120 C5011 Nigrospora 25 5.576667
## 121 C5013 Epicoccum 20 9.390000
## 122 C5013 Epicoccum 25 10.233333
## 123 C5038 Didymella 20 6.670000
## 124 C5038 Didymella 25 7.963333
## 125 C5039 Chalastospora 20 6.843333
## 126 C5039 Chalastospora 25 7.790000
## 127 C5062 Fusarium D 20 7.443333
## 128 C5062 Fusarium D 25 10.513333
## 129 D4006 Fusarium B 20 8.613333
## 130 D4006 Fusarium B 25 10.720000
## 131 D4017 Didymella 20 6.176667
## 132 D4017 Didymella 25 6.900000
## 133 D4033 Clonostachys 20 3.310000
## 134 D4033 Clonostachys 25 4.433333
## 135 D4051 Fusarium A 20 7.000000
## 136 D4051 Fusarium A 25 9.688889
BlackBird<- read.csv("~/Biostats 2024/Data/BlackbirdTesto.csv")
BlackBird
## blackbird Antibody.production Treatment
## 1 1 105 Before
## 2 2 50 Before
## 3 3 136 Before
## 4 4 90 Before
## 5 5 122 Before
## 6 6 132 Before
## 7 7 131 Before
## 8 8 119 Before
## 9 9 145 Before
## 10 10 130 Before
## 11 11 116 Before
## 12 12 110 Before
## 13 13 138 Before
## 14 1 85 After
## 15 2 74 After
## 16 3 145 After
## 17 4 86 After
## 18 5 148 After
## 19 6 148 After
## 20 7 150 After
## 21 8 142 After
## 22 9 151 After
## 23 10 113 After
## 24 11 118 After
## 25 12 99 After
## 26 13 150 After
Horns<-read.csv("~/Biostats 2024/Data/Horns.csv")
Horns
## HornLength Survival
## 1 15.2 killed
## 2 15.5 killed
## 3 17.2 killed
## 4 19.0 killed
## 5 19.3 killed
## 6 20.2 killed
## 7 20.7 killed
## 8 21.0 killed
## 9 21.4 killed
## 10 21.4 killed
## 11 21.6 killed
## 12 21.7 killed
## 13 21.7 killed
## 14 22.0 killed
## 15 22.0 killed
## 16 22.5 killed
## 17 22.6 killed
## 18 22.9 killed
## 19 23.1 killed
## 20 23.2 killed
## 21 23.4 killed
## 22 23.5 killed
## 23 23.9 killed
## 24 23.9 killed
## 25 23.9 killed
## 26 24.6 killed
## 27 25.0 killed
## 28 25.2 killed
## 29 25.3 killed
## 30 26.7 killed
## 31 13.1 living
## 32 15.7 living
## 33 17.7 living
## 34 18.5 living
## 35 19.2 living
## 36 19.2 living
## 37 19.2 living
## 38 19.7 living
## 39 19.9 living
## 40 20.0 living
## 41 20.5 living
## 42 20.6 living
## 43 20.6 living
## 44 20.6 living
## 45 20.7 living
## 46 20.7 living
## 47 20.8 living
## 48 20.9 living
## 49 21.1 living
## 50 21.3 living
## 51 21.4 living
## 52 21.4 living
## 53 21.5 living
## 54 21.5 living
## 55 21.6 living
## 56 21.7 living
## 57 21.7 living
## 58 21.9 living
## 59 22.4 living
## 60 22.4 living
## 61 22.4 living
## 62 22.4 living
## 63 22.5 living
## 64 22.5 living
## 65 22.7 living
## 66 22.8 living
## 67 22.9 living
## 68 23.0 living
## 69 23.0 living
## 70 23.0 living
## 71 23.0 living
## 72 23.1 living
## 73 23.2 living
## 74 23.2 living
## 75 23.2 living
## 76 23.2 living
## 77 23.3 living
## 78 23.3 living
## 79 23.3 living
## 80 23.4 living
## 81 23.5 living
## 82 23.5 living
## 83 23.5 living
## 84 23.5 living
## 85 23.6 living
## 86 23.6 living
## 87 23.7 living
## 88 23.7 living
## 89 23.8 living
## 90 23.8 living
## 91 23.8 living
## 92 23.9 living
## 93 23.9 living
## 94 24.0 living
## 95 24.0 living
## 96 24.0 living
## 97 24.1 living
## 98 24.2 living
## 99 24.2 living
## 100 24.2 living
## 101 24.3 living
## 102 24.4 living
## 103 24.5 living
## 104 24.5 living
## 105 24.5 living
## 106 24.5 living
## 107 24.5 living
## 108 24.6 living
## 109 24.7 living
## 110 24.7 living
## 111 24.8 living
## 112 24.8 living
## 113 24.9 living
## 114 25.0 living
## 115 25.0 living
## 116 25.0 living
## 117 25.1 living
## 118 25.1 living
## 119 25.1 living
## 120 25.2 living
## 121 25.3 living
## 122 25.3 living
## 123 25.3 living
## 124 25.4 living
## 125 25.5 living
## 126 25.5 living
## 127 25.5 living
## 128 25.5 living
## 129 25.5 living
## 130 25.5 living
## 131 25.6 living
## 132 25.6 living
## 133 25.6 living
## 134 25.6 living
## 135 25.6 living
## 136 25.6 living
## 137 25.6 living
## 138 25.7 living
## 139 25.7 living
## 140 25.8 living
## 141 25.8 living
## 142 25.8 living
## 143 25.9 living
## 144 25.9 living
## 145 26.0 living
## 146 26.0 living
## 147 26.0 living
## 148 26.1 living
## 149 26.1 living
## 150 26.3 living
## 151 26.3 living
## 152 26.3 living
## 153 26.4 living
## 154 26.5 living
## 155 26.5 living
## 156 26.5 living
## 157 26.6 living
## 158 26.6 living
## 159 26.6 living
## 160 26.7 living
## 161 26.8 living
## 162 26.9 living
## 163 27.0 living
## 164 27.0 living
## 165 27.1 living
## 166 27.1 living
## 167 27.1 living
## 168 27.2 living
## 169 27.3 living
## 170 27.3 living
## 171 27.3 living
## 172 27.4 living
## 173 27.4 living
## 174 27.4 living
## 175 27.5 living
## 176 27.6 living
## 177 27.7 living
## 178 28.0 living
## 179 28.1 living
## 180 28.2 living
## 181 28.8 living
## 182 29.1 living
## 183 29.5 living
## 184 30.3 living
str(Fungi)
## 'data.frame': 136 obs. of 4 variables:
## $ IsolateID : chr "A1028" "A1028" "A1030" "A1030" ...
## $ FungusID : chr "Fusarium B" "Fusarium B" "Fusarium A" "Fusarium A" ...
## $ Temperature: int 20 25 20 25 20 25 20 25 20 25 ...
## $ AveDaily : num 7.7 9.69 7.04 9.2 8.93 ...
str(BlackBird)
## 'data.frame': 26 obs. of 3 variables:
## $ blackbird : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Antibody.production: int 105 50 136 90 122 132 131 119 145 130 ...
## $ Treatment : chr "Before" "Before" "Before" "Before" ...
str(Horns)
## 'data.frame': 184 obs. of 2 variables:
## $ HornLength: num 15.2 15.5 17.2 19 19.3 20.2 20.7 21 21.4 21.4 ...
## $ Survival : chr "killed" "killed" "killed" "killed" ...
#Define the Hypothesis and Plot the Data
#2.1 The null hypothesis for the fungi data is that temperature will not change for each sample. The black bird data is that the treatment will not affect antibody production. The horn data is that horn length will not affect the amount killed.
bird.summary<-BlackBird %>%
group_by(Treatment) %>%
summarize(mean.antibody=mean(Antibody.production))
se.antibody=sd(BlackBird$Antibody.production)/sqrt(26)
horn.summary<-Horns %>%
group_by(Survival) %>%
summarize(mean.hornLength=mean(HornLength))
se.hornLength=sd(Horns$HornLength)/sqrt(184)
ggplot(bird.summary, aes(x=Treatment, y=mean.antibody)) + geom_point() + geom_errorbar(aes(ymin=mean.antibody-se.antibody, ymax=mean.antibody+se.antibody),width=0.25)
ggplot(horn.summary, aes(x=Survival, y=mean.hornLength)) + geom_point() + geom_errorbar(aes(ymin=mean.hornLength-se.hornLength, ymax=mean.hornLength+se.hornLength), width = 0.25)
# Check Statistical Assumptions for BlackBird Data Set
Before<-filter(BlackBird, Treatment=="Before")
After<-filter(BlackBird, Treatment=="After")
Diff<-Before$Antibody.production-After$Antibody.production
shapiro.test(Diff)
##
## Shapiro-Wilk normality test
##
## data: Diff
## W = 0.93148, p-value = 0.3563
#3.2 the data meets the assumptions for a paired t-test as W=0.93148 and p=0.3563. since the p-value is significantly higher than 0.05, it means that the data is normally distributed, which is a requirement in conducting a paired t-test.
shapiro.test(Before$Antibody.production)
##
## Shapiro-Wilk normality test
##
## data: Before$Antibody.production
## W = 0.84747, p-value = 0.02644
shapiro.test(After$Antibody.production)
##
## Shapiro-Wilk normality test
##
## data: After$Antibody.production
## W = 0.82586, p-value = 0.01406
#3.4 both shapiro tests for Before and After show ridiculously low p-values, unlike the data shown when the difference was taken. this means that it is incredibly important to always test the difference rather than the individual samples in order to obtain accurate statistical results.
t.test(Before$Antibody.production, After$Antibody.production, paired=T)
##
## Paired t-test
##
## data: Before$Antibody.production and After$Antibody.production
## t = -1.5079, df = 12, p-value = 0.1575
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -15.98626 2.90934
## sample estimates:
## mean difference
## -6.538462
#4.2 RStudio gives the tstat as well as the degrees of freedom and the p-value. it is also showing the values for the error bars (the most and least extreme values) and the mean difference as well. A paired t-test was the best option because both of these samples are reliant on the other and we want to know what changed between the two states. they are not uniquely independent variables, and because they go hand-in-hand, we want to look at both together, rather than separately, to get a clearer idea of what is changing.
ggplot(BlackBird, aes(x=Treatment, y=Antibody.production)) + geom_point(size=6, levels(c("Before", "After")), color="darkgrey") + geom_line(aes(group=blackbird)) + theme_bw() + theme(axis.test=element_text(size=18), axis.title=element_text(size=18, face="bold"))
## Warning in plot_theme(plot): The `axis.test` theme element is not defined in
## the element hierarchy.
# Complete Unpaired t-test (a.k.a two-sample)
#5.1 the null hypothesis is that the two means will be equal to each other. the alternative hypothesis is that the two means will not be equal to each other.
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
check<-Horns%>%
group_by(Survival) %>%
shapiro_test(HornLength)
check
## # A tibble: 2 × 4
## Survival variable statistic p
## <chr> <chr> <dbl> <dbl>
## 1 killed HornLength 0.935 0.0648
## 2 living HornLength 0.961 0.000223
leveneTest(Horns$HornLength, group=Horns$Survival)
## Warning in leveneTest.default(Horns$HornLength, group = Horns$Survival):
## Horns$Survival coerced to factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0035 0.953
## 182
t.test(HornLength~Survival, data=Horns, var.equal=TRUE)
##
## Two Sample t-test
##
## data: HornLength by Survival
## t = -4.3494, df = 182, p-value = 2.27e-05
## alternative hypothesis: true difference in means between group killed and group living is not equal to 0
## 95 percent confidence interval:
## -3.335402 -1.253602
## sample estimates:
## mean in group killed mean in group living
## 21.98667 24.28117
#5.5 we can reject the null hypothesis because the difference between the means of the group killed and the group living is not 0.
#5.6 yes, all observations in each of these data sets are independent of one another because each lizard was only measured once
Chal<-filter(Fungi, FungusID == "Chalastospora")
Chal
## IsolateID FungusID Temperature AveDaily
## 1 A2020 Chalastospora 20 7.333333
## 2 A2020 Chalastospora 25 8.363333
## 3 B1032 Chalastospora 20 4.510000
## 4 B1032 Chalastospora 25 5.490000
## 5 B3031 Chalastospora 20 7.823333
## 6 B3031 Chalastospora 25 9.110000
## 7 B3098 Chalastospora 20 7.976667
## 8 B3098 Chalastospora 25 8.933333
## 9 C4080 Chalastospora 20 8.070000
## 10 C4080 Chalastospora 25 8.533333
## 11 C5039 Chalastospora 20 6.843333
## 12 C5039 Chalastospora 25 7.790000
lowTemp<-filter(Chal, Temperature == "20")
lowTemp
## IsolateID FungusID Temperature AveDaily
## 1 A2020 Chalastospora 20 7.333333
## 2 B1032 Chalastospora 20 4.510000
## 3 B3031 Chalastospora 20 7.823333
## 4 B3098 Chalastospora 20 7.976667
## 5 C4080 Chalastospora 20 8.070000
## 6 C5039 Chalastospora 20 6.843333
highTemp<-filter(Chal,Temperature == "25")
highTemp
## IsolateID FungusID Temperature AveDaily
## 1 A2020 Chalastospora 25 8.363333
## 2 B1032 Chalastospora 25 5.490000
## 3 B3031 Chalastospora 25 9.110000
## 4 B3098 Chalastospora 25 8.933333
## 5 C4080 Chalastospora 25 8.533333
## 6 C5039 Chalastospora 25 7.790000
t.test(lowTemp$AveDaily, highTemp$AveDaily, paired=TRUE)
##
## Paired t-test
##
## data: lowTemp$AveDaily and highTemp$AveDaily
## t = -8.6487, df = 5, p-value = 0.0003414
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.2244321 -0.6633457
## sample estimates:
## mean difference
## -0.9438889
# 6.3 this passes all of the assumptions: data is normally distributed, each observation is independent of one another, and there are no extreme outliers
# 6.4 We were able to reject the null hypothesis that the true mean difference for the lower temperature (20) and higher (25) are 0 (t=-8.6487, p=0.0003).
Chal.summary<-Chal %>%
group_by(Temperature) %>%
summarize(mean.AveDaily=mean(AveDaily))
se.AveDaily=sd(Chal$AveDaily)/sqrt(136)
Chal.summary
## # A tibble: 2 × 2
## Temperature mean.AveDaily
## <int> <dbl>
## 1 20 7.09
## 2 25 8.04
ggplot(Chal.summary, aes(x=Temperature, y=mean.AveDaily)) + geom_point() + geom_errorbar(aes(ymin=mean.AveDaily-se.AveDaily, ymax=mean.AveDaily+se.AveDaily),width=0.25)
#6.6 the fungi samples were tested twice: once at 20 degrees and once at 25, which means that they are dependent on one another, which is why we use a paired t-test to represent the trend
t.test(Chal$AveDaily ~ Chal$Temperature, data = Chal, var.equal = TRUE)
##
## Two Sample t-test
##
## data: Chal$AveDaily by Chal$Temperature
## t = -1.2214, df = 10, p-value = 0.25
## alternative hypothesis: true difference in means between group 20 and group 25 is not equal to 0
## 95 percent confidence interval:
## -2.6657931 0.7780154
## sample estimates:
## mean in group 20 mean in group 25
## 7.092778 8.036667
#6.7 the p value is a lot higher using the same data for a two-sample t-test. i believe that this is because we are measuring the variables separately, which causes an increase in variation for each variable (20 and 25). using a paired allows us to look at the difference between each variable, which decreases variation.