#Load Libraries and Data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
Fungi<-read.csv("~/Biostats 2024/Data/Fungi.csv")
Fungi
##     IsolateID      FungusID Temperature  AveDaily
## 1       A1028    Fusarium B          20  7.700000
## 2       A1028    Fusarium B          25  9.686667
## 3       A1030    Fusarium A          20  7.043333
## 4       A1030    Fusarium A          25  9.200000
## 5       A1048     Epicoccum          20  8.930000
## 6       A1048     Epicoccum          25  9.156667
## 7       A1101    Fusarium D          20  7.586667
## 8       A1101    Fusarium D          25 10.653333
## 9       A1113  Clonostachys          20  5.500000
## 10      A1113  Clonostachys          25  7.400000
## 11      A1130    Fusarium C          20  6.976667
## 12      A1130    Fusarium C          25  9.856667
## 13      A2007  Pleosporales          20  7.133333
## 14      A2007  Pleosporales          25  7.756667
## 15      A2012     Epicoccum          20  9.765000
## 16      A2012     Epicoccum          25  9.930000
## 17      A2020 Chalastospora          20  7.333333
## 18      A2020 Chalastospora          25  8.363333
## 19      A2033    Nigrospora          20 10.622222
## 20      A2033    Nigrospora          25  9.588889
## 21      A2040     Didymella          20  6.800000
## 22      A2040     Didymella          25  7.533333
## 23      A2048    Fusarium A          20  6.888889
## 24      A2048    Fusarium A          25  9.811111
## 25      A2078    Fusarium B          20  8.876667
## 26      A2078    Fusarium B          25 10.530000
## 27      A2084    Fusarium D          20  7.756667
## 28      A2084    Fusarium D          25 10.090000
## 29      A2096    Fusarium C          20  6.600000
## 30      A2096    Fusarium C          25  9.356667
## 31      A4024    Alternaria          20  4.956667
## 32      A4024    Alternaria          25  6.123333
## 33      A4025    Nigrospora          20  9.120000
## 34      A4025    Nigrospora          25  4.410000
## 35      A4089    Fusarium B          20  8.122222
## 36      A4089    Fusarium B          25 10.588889
## 37      A5002    Fusarium A          20  6.690000
## 38      A5002    Fusarium A          25  9.546667
## 39      A5010    Fusarium B          20  8.536667
## 40      A5010    Fusarium B          25 10.433333
## 41      A5015     Didymella          20  6.566667
## 42      A5015     Didymella          25  7.413333
## 43      A5022    Alternaria          20  7.233333
## 44      A5022    Alternaria          25  8.050000
## 45      A5026    Nigrospora          20 10.610000
## 46      A5026    Nigrospora          25  6.700000
## 47      A5108    Fusarium D          20  8.633333
## 48      A5108    Fusarium D          25 10.800000
## 49      B1031    Fusarium C          20  6.823333
## 50      B1031    Fusarium C          25  9.756667
## 51      B1032 Chalastospora          20  4.510000
## 52      B1032 Chalastospora          25  5.490000
## 53      B1033    Alternaria          20  7.313333
## 54      B1033    Alternaria          25  8.710000
## 55      B1040    Fusarium B          20  7.611111
## 56      B1040    Fusarium B          25  9.355556
## 57      B1066    Fusarium B          20  9.315000
## 58      B1066    Fusarium B          25 10.915000
## 59      B1078    Fusarium A          20  6.896667
## 60      B1078    Fusarium A          25  9.456667
## 61      B1111    Fusarium B          20  7.411111
## 62      B1111    Fusarium B          25 10.111111
## 63      B1150    Fusarium D          20  7.543333
## 64      B1150    Fusarium D          25  9.623333
## 65      B2076    Fusarium C          20  7.203333
## 66      B2076    Fusarium C          25  9.776667
## 67      B3027    Fusarium B          20  8.320000
## 68      B3027    Fusarium B          25 10.650000
## 69      B3028     Didymella          20  6.713333
## 70      B3028     Didymella          25  7.963333
## 71      B3030    Fusarium A          20  7.123333
## 72      B3030    Fusarium A          25  9.846667
## 73      B3031 Chalastospora          20  7.823333
## 74      B3031 Chalastospora          25  9.110000
## 75      B3087    Fusarium A          20  6.188889
## 76      B3087    Fusarium A          25  9.433333
## 77      B3098 Chalastospora          20  7.976667
## 78      B3098 Chalastospora          25  8.933333
## 79      B3099  Clonostachys          20  8.603333
## 80      B3099  Clonostachys          25  9.336667
## 81      B5016    Alternaria          20  8.086667
## 82      B5016    Alternaria          25  9.390000
## 83      B5018    Fusarium A          20  6.933333
## 84      B5018    Fusarium A          25  9.315000
## 85      B5046    Alternaria          20  6.813333
## 86      B5046    Alternaria          25  6.100000
## 87      B5085    Nigrospora          20  9.410000
## 88      B5085    Nigrospora          25  5.956667
## 89      B5086     Epicoccum          20  9.376667
## 90      B5086     Epicoccum          25 10.123333
## 91      B5102  Clonostachys          20  3.233333
## 92      B5102  Clonostachys          25  5.430000
## 93      B5114  Clonostachys          20  7.633333
## 94      B5114  Clonostachys          25  9.200000
## 95      C3038    Nigrospora          20  8.823333
## 96      C3038    Nigrospora          25  6.386667
## 97      C3053    Fusarium C          20  6.556667
## 98      C3053    Fusarium C          25  8.153333
## 99      C4005     Epicoccum          20  8.188889
## 100     C4005     Epicoccum          25 10.066667
## 101     C4007     Epicoccum          20  9.435000
## 102     C4007     Epicoccum          25  9.866667
## 103     C4011    Fusarium B          20  7.322222
## 104     C4011    Fusarium B          25 10.111111
## 105     C4012    Fusarium A          20  6.688889
## 106     C4012    Fusarium A          25  9.677778
## 107     C4014    Fusarium C          20  6.833333
## 108     C4014    Fusarium C          25  9.590000
## 109     C4016    Fusarium D          20  6.133333
## 110     C4016    Fusarium D          25  8.676667
## 111     C4052    Alternaria          20  7.590000
## 112     C4052    Alternaria          25  7.933333
## 113     C4062     Didymella          20  6.413333
## 114     C4062     Didymella          25  6.870000
## 115     C4079    Fusarium A          20  7.136667
## 116     C4079    Fusarium A          25  9.550000
## 117     C4080 Chalastospora          20  8.070000
## 118     C4080 Chalastospora          25  8.533333
## 119     C5011    Nigrospora          20 11.043333
## 120     C5011    Nigrospora          25  5.576667
## 121     C5013     Epicoccum          20  9.390000
## 122     C5013     Epicoccum          25 10.233333
## 123     C5038     Didymella          20  6.670000
## 124     C5038     Didymella          25  7.963333
## 125     C5039 Chalastospora          20  6.843333
## 126     C5039 Chalastospora          25  7.790000
## 127     C5062    Fusarium D          20  7.443333
## 128     C5062    Fusarium D          25 10.513333
## 129     D4006    Fusarium B          20  8.613333
## 130     D4006    Fusarium B          25 10.720000
## 131     D4017     Didymella          20  6.176667
## 132     D4017     Didymella          25  6.900000
## 133     D4033  Clonostachys          20  3.310000
## 134     D4033  Clonostachys          25  4.433333
## 135     D4051    Fusarium A          20  7.000000
## 136     D4051    Fusarium A          25  9.688889
BlackBird<- read.csv("~/Biostats 2024/Data/BlackbirdTesto.csv")
BlackBird
##    blackbird Antibody.production Treatment
## 1          1                 105    Before
## 2          2                  50    Before
## 3          3                 136    Before
## 4          4                  90    Before
## 5          5                 122    Before
## 6          6                 132    Before
## 7          7                 131    Before
## 8          8                 119    Before
## 9          9                 145    Before
## 10        10                 130    Before
## 11        11                 116    Before
## 12        12                 110    Before
## 13        13                 138    Before
## 14         1                  85     After
## 15         2                  74     After
## 16         3                 145     After
## 17         4                  86     After
## 18         5                 148     After
## 19         6                 148     After
## 20         7                 150     After
## 21         8                 142     After
## 22         9                 151     After
## 23        10                 113     After
## 24        11                 118     After
## 25        12                  99     After
## 26        13                 150     After
Horns<-read.csv("~/Biostats 2024/Data/Horns.csv")
Horns
##     HornLength Survival
## 1         15.2   killed
## 2         15.5   killed
## 3         17.2   killed
## 4         19.0   killed
## 5         19.3   killed
## 6         20.2   killed
## 7         20.7   killed
## 8         21.0   killed
## 9         21.4   killed
## 10        21.4   killed
## 11        21.6   killed
## 12        21.7   killed
## 13        21.7   killed
## 14        22.0   killed
## 15        22.0   killed
## 16        22.5   killed
## 17        22.6   killed
## 18        22.9   killed
## 19        23.1   killed
## 20        23.2   killed
## 21        23.4   killed
## 22        23.5   killed
## 23        23.9   killed
## 24        23.9   killed
## 25        23.9   killed
## 26        24.6   killed
## 27        25.0   killed
## 28        25.2   killed
## 29        25.3   killed
## 30        26.7   killed
## 31        13.1   living
## 32        15.7   living
## 33        17.7   living
## 34        18.5   living
## 35        19.2   living
## 36        19.2   living
## 37        19.2   living
## 38        19.7   living
## 39        19.9   living
## 40        20.0   living
## 41        20.5   living
## 42        20.6   living
## 43        20.6   living
## 44        20.6   living
## 45        20.7   living
## 46        20.7   living
## 47        20.8   living
## 48        20.9   living
## 49        21.1   living
## 50        21.3   living
## 51        21.4   living
## 52        21.4   living
## 53        21.5   living
## 54        21.5   living
## 55        21.6   living
## 56        21.7   living
## 57        21.7   living
## 58        21.9   living
## 59        22.4   living
## 60        22.4   living
## 61        22.4   living
## 62        22.4   living
## 63        22.5   living
## 64        22.5   living
## 65        22.7   living
## 66        22.8   living
## 67        22.9   living
## 68        23.0   living
## 69        23.0   living
## 70        23.0   living
## 71        23.0   living
## 72        23.1   living
## 73        23.2   living
## 74        23.2   living
## 75        23.2   living
## 76        23.2   living
## 77        23.3   living
## 78        23.3   living
## 79        23.3   living
## 80        23.4   living
## 81        23.5   living
## 82        23.5   living
## 83        23.5   living
## 84        23.5   living
## 85        23.6   living
## 86        23.6   living
## 87        23.7   living
## 88        23.7   living
## 89        23.8   living
## 90        23.8   living
## 91        23.8   living
## 92        23.9   living
## 93        23.9   living
## 94        24.0   living
## 95        24.0   living
## 96        24.0   living
## 97        24.1   living
## 98        24.2   living
## 99        24.2   living
## 100       24.2   living
## 101       24.3   living
## 102       24.4   living
## 103       24.5   living
## 104       24.5   living
## 105       24.5   living
## 106       24.5   living
## 107       24.5   living
## 108       24.6   living
## 109       24.7   living
## 110       24.7   living
## 111       24.8   living
## 112       24.8   living
## 113       24.9   living
## 114       25.0   living
## 115       25.0   living
## 116       25.0   living
## 117       25.1   living
## 118       25.1   living
## 119       25.1   living
## 120       25.2   living
## 121       25.3   living
## 122       25.3   living
## 123       25.3   living
## 124       25.4   living
## 125       25.5   living
## 126       25.5   living
## 127       25.5   living
## 128       25.5   living
## 129       25.5   living
## 130       25.5   living
## 131       25.6   living
## 132       25.6   living
## 133       25.6   living
## 134       25.6   living
## 135       25.6   living
## 136       25.6   living
## 137       25.6   living
## 138       25.7   living
## 139       25.7   living
## 140       25.8   living
## 141       25.8   living
## 142       25.8   living
## 143       25.9   living
## 144       25.9   living
## 145       26.0   living
## 146       26.0   living
## 147       26.0   living
## 148       26.1   living
## 149       26.1   living
## 150       26.3   living
## 151       26.3   living
## 152       26.3   living
## 153       26.4   living
## 154       26.5   living
## 155       26.5   living
## 156       26.5   living
## 157       26.6   living
## 158       26.6   living
## 159       26.6   living
## 160       26.7   living
## 161       26.8   living
## 162       26.9   living
## 163       27.0   living
## 164       27.0   living
## 165       27.1   living
## 166       27.1   living
## 167       27.1   living
## 168       27.2   living
## 169       27.3   living
## 170       27.3   living
## 171       27.3   living
## 172       27.4   living
## 173       27.4   living
## 174       27.4   living
## 175       27.5   living
## 176       27.6   living
## 177       27.7   living
## 178       28.0   living
## 179       28.1   living
## 180       28.2   living
## 181       28.8   living
## 182       29.1   living
## 183       29.5   living
## 184       30.3   living
str(Fungi)
## 'data.frame':    136 obs. of  4 variables:
##  $ IsolateID  : chr  "A1028" "A1028" "A1030" "A1030" ...
##  $ FungusID   : chr  "Fusarium B" "Fusarium B" "Fusarium A" "Fusarium A" ...
##  $ Temperature: int  20 25 20 25 20 25 20 25 20 25 ...
##  $ AveDaily   : num  7.7 9.69 7.04 9.2 8.93 ...
str(BlackBird)
## 'data.frame':    26 obs. of  3 variables:
##  $ blackbird          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Antibody.production: int  105 50 136 90 122 132 131 119 145 130 ...
##  $ Treatment          : chr  "Before" "Before" "Before" "Before" ...
str(Horns)
## 'data.frame':    184 obs. of  2 variables:
##  $ HornLength: num  15.2 15.5 17.2 19 19.3 20.2 20.7 21 21.4 21.4 ...
##  $ Survival  : chr  "killed" "killed" "killed" "killed" ...

#Define the Hypothesis and Plot the Data

#2.1 The null hypothesis for the fungi data is that temperature will not change for each sample. The black bird data is that the treatment will not affect antibody production. The horn data is that horn length will not affect the amount killed. 
bird.summary<-BlackBird %>%
  group_by(Treatment) %>%
  summarize(mean.antibody=mean(Antibody.production))
  se.antibody=sd(BlackBird$Antibody.production)/sqrt(26)
  
horn.summary<-Horns %>%
  group_by(Survival) %>%
  summarize(mean.hornLength=mean(HornLength))
  se.hornLength=sd(Horns$HornLength)/sqrt(184)
  
ggplot(bird.summary, aes(x=Treatment, y=mean.antibody)) + geom_point() + geom_errorbar(aes(ymin=mean.antibody-se.antibody, ymax=mean.antibody+se.antibody),width=0.25)

ggplot(horn.summary, aes(x=Survival, y=mean.hornLength)) + geom_point() + geom_errorbar(aes(ymin=mean.hornLength-se.hornLength, ymax=mean.hornLength+se.hornLength), width = 0.25)

# Check Statistical Assumptions for BlackBird Data Set

Before<-filter(BlackBird, Treatment=="Before")
After<-filter(BlackBird, Treatment=="After")
Diff<-Before$Antibody.production-After$Antibody.production
shapiro.test(Diff)
## 
##  Shapiro-Wilk normality test
## 
## data:  Diff
## W = 0.93148, p-value = 0.3563
#3.2 the data meets the assumptions for a paired t-test as W=0.93148 and p=0.3563. since the p-value is significantly higher than 0.05, it means that the data is normally distributed, which is a requirement in conducting a paired t-test.
shapiro.test(Before$Antibody.production)
## 
##  Shapiro-Wilk normality test
## 
## data:  Before$Antibody.production
## W = 0.84747, p-value = 0.02644
shapiro.test(After$Antibody.production)
## 
##  Shapiro-Wilk normality test
## 
## data:  After$Antibody.production
## W = 0.82586, p-value = 0.01406
#3.4 both shapiro tests for Before and After show ridiculously low p-values, unlike the data shown when the difference was taken. this means that it is incredibly important to always test the difference rather than the individual samples in order to obtain accurate statistical results.   

Complete a Paired t-test and Plot a Paired Point Plot

t.test(Before$Antibody.production, After$Antibody.production, paired=T)
## 
##  Paired t-test
## 
## data:  Before$Antibody.production and After$Antibody.production
## t = -1.5079, df = 12, p-value = 0.1575
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -15.98626   2.90934
## sample estimates:
## mean difference 
##       -6.538462
#4.2 RStudio gives the tstat as well as the degrees of freedom and the p-value. it is also showing the values for the error bars (the most and least extreme values) and the mean difference as well. A paired t-test was the best option because both of these samples are reliant on the other and we want to know what changed between the two states. they are not uniquely independent variables, and because they go hand-in-hand, we want to look at both together, rather than separately, to get a clearer idea of what is changing. 
ggplot(BlackBird, aes(x=Treatment, y=Antibody.production)) + geom_point(size=6, levels(c("Before", "After")), color="darkgrey") + geom_line(aes(group=blackbird)) + theme_bw() + theme(axis.test=element_text(size=18), axis.title=element_text(size=18, face="bold"))
## Warning in plot_theme(plot): The `axis.test` theme element is not defined in
## the element hierarchy.

# Complete Unpaired t-test (a.k.a two-sample)

#5.1 the null hypothesis is that the two means will be equal to each other. the alternative hypothesis is that the two means will not be equal to each other. 
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
check<-Horns%>%
  group_by(Survival) %>%
  shapiro_test(HornLength)
check
## # A tibble: 2 × 4
##   Survival variable   statistic        p
##   <chr>    <chr>          <dbl>    <dbl>
## 1 killed   HornLength     0.935 0.0648  
## 2 living   HornLength     0.961 0.000223
leveneTest(Horns$HornLength, group=Horns$Survival)
## Warning in leveneTest.default(Horns$HornLength, group = Horns$Survival):
## Horns$Survival coerced to factor.
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0035  0.953
##       182
t.test(HornLength~Survival, data=Horns, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  HornLength by Survival
## t = -4.3494, df = 182, p-value = 2.27e-05
## alternative hypothesis: true difference in means between group killed and group living is not equal to 0
## 95 percent confidence interval:
##  -3.335402 -1.253602
## sample estimates:
## mean in group killed mean in group living 
##             21.98667             24.28117
#5.5 we can reject the null hypothesis because the difference between the means of the group killed and the group living is not 0. 
#5.6 yes, all observations in each of these data sets are independent of one another because each lizard was only measured once

The True Test!

Chal<-filter(Fungi, FungusID == "Chalastospora")
Chal
##    IsolateID      FungusID Temperature AveDaily
## 1      A2020 Chalastospora          20 7.333333
## 2      A2020 Chalastospora          25 8.363333
## 3      B1032 Chalastospora          20 4.510000
## 4      B1032 Chalastospora          25 5.490000
## 5      B3031 Chalastospora          20 7.823333
## 6      B3031 Chalastospora          25 9.110000
## 7      B3098 Chalastospora          20 7.976667
## 8      B3098 Chalastospora          25 8.933333
## 9      C4080 Chalastospora          20 8.070000
## 10     C4080 Chalastospora          25 8.533333
## 11     C5039 Chalastospora          20 6.843333
## 12     C5039 Chalastospora          25 7.790000
lowTemp<-filter(Chal, Temperature == "20")
lowTemp
##   IsolateID      FungusID Temperature AveDaily
## 1     A2020 Chalastospora          20 7.333333
## 2     B1032 Chalastospora          20 4.510000
## 3     B3031 Chalastospora          20 7.823333
## 4     B3098 Chalastospora          20 7.976667
## 5     C4080 Chalastospora          20 8.070000
## 6     C5039 Chalastospora          20 6.843333
highTemp<-filter(Chal,Temperature == "25")
highTemp
##   IsolateID      FungusID Temperature AveDaily
## 1     A2020 Chalastospora          25 8.363333
## 2     B1032 Chalastospora          25 5.490000
## 3     B3031 Chalastospora          25 9.110000
## 4     B3098 Chalastospora          25 8.933333
## 5     C4080 Chalastospora          25 8.533333
## 6     C5039 Chalastospora          25 7.790000
t.test(lowTemp$AveDaily, highTemp$AveDaily, paired=TRUE)
## 
##  Paired t-test
## 
## data:  lowTemp$AveDaily and highTemp$AveDaily
## t = -8.6487, df = 5, p-value = 0.0003414
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.2244321 -0.6633457
## sample estimates:
## mean difference 
##      -0.9438889
# 6.3 this passes all of the assumptions: data is normally distributed, each observation is independent of one another, and there are no extreme outliers 
# 6.4 We were able to reject the null hypothesis that the true mean difference for the lower temperature (20) and higher (25) are 0 (t=-8.6487, p=0.0003). 
Chal.summary<-Chal %>%
  group_by(Temperature) %>%
  summarize(mean.AveDaily=mean(AveDaily))
  se.AveDaily=sd(Chal$AveDaily)/sqrt(136)
Chal.summary
## # A tibble: 2 × 2
##   Temperature mean.AveDaily
##         <int>         <dbl>
## 1          20          7.09
## 2          25          8.04
ggplot(Chal.summary, aes(x=Temperature, y=mean.AveDaily)) + geom_point() + geom_errorbar(aes(ymin=mean.AveDaily-se.AveDaily, ymax=mean.AveDaily+se.AveDaily),width=0.25)

#6.6 the fungi samples were tested twice: once at 20 degrees and once at 25, which means that they are dependent on one another, which is why we use a paired t-test to represent the trend
t.test(Chal$AveDaily ~ Chal$Temperature, data = Chal, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  Chal$AveDaily by Chal$Temperature
## t = -1.2214, df = 10, p-value = 0.25
## alternative hypothesis: true difference in means between group 20 and group 25 is not equal to 0
## 95 percent confidence interval:
##  -2.6657931  0.7780154
## sample estimates:
## mean in group 20 mean in group 25 
##         7.092778         8.036667
#6.7 the p value is a lot higher using the same data for a two-sample t-test. i believe that this is because we are measuring the variables separately, which causes an increase in variation for each variable (20 and 25). using a paired allows us to look at the difference between each variable, which decreases variation.