Final_Project_Part_6

Author

Aileen Membreno, Chii Kojima, and Yuxin Ni

#Make the table look better install.packages(“apaTables”) library(apaTables) summary<-summary(aov_hatch) apa.aov.table(summary, caption=“ANOVA Summary Table of pH level vs Mean Time to Hatch”)

Road Packages

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.3.0      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(performance) #check stat/model assumptions
library(patchwork)
library(ggsci) #for color scales
library(see)

Attaching package: 'see'

The following objects are masked from 'package:ggsci':

    scale_color_material, scale_colour_material, scale_fill_material
library(rstatix) #test for outliers, welch_anova_test

Attaching package: 'rstatix'

The following object is masked from 'package:stats':

    filter
library(rstatix) #test for outliers, welch_anova_test
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:dplyr':

    between, first, last

The following object is masked from 'package:purrr':

    transpose
library(lubridate)

Attaching package: 'lubridate'

The following objects are masked from 'package:data.table':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year

The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(rstatix)
library(performance)

#H0:There will be no relationship between the mean hatching time of Walleye Pollock and CO2 level.

#Ha: The hatching time of Walleye Pollock differs among environments with different CO2 levels.

ph_hatch<-read_csv("pH_Hatch.csv")
Rows: 500 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): expstage, trtlabel
dbl  (27): trial, tank, dic, ta, ph, pco2, casat, arsat, h2odq, dph, replica...
dttm  (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#trtlabel vs dpfmean(mean time to hatch)

#Select trtlabel and dpfmean
ph_hatch1<-ph_hatch %>%
 select(trtlabel,dpfmean)

ph_hatch1
# A tibble: 500 × 2
   trtlabel dpfmean
   <chr>      <dbl>
 1 low           NA
 2 low           NA
 3 low           NA
 4 low           NA
 5 low           NA
 6 low           NA
 7 low           NA
 8 low           NA
 9 low           NA
10 low           NA
# … with 490 more rows
#Drop NA in dpfmean
ph_hatch2<-ph_hatch1 %>%
  drop_na(dpfmean)

ph_hatch2
# A tibble: 68 × 2
   trtlabel dpfmean
   <chr>      <dbl>
 1 high        9.84
 2 high       10.1 
 3 high        9.51
 4 high       10.1 
 5 medium      9.67
 6 medium      9.70
 7 medium      9.77
 8 medium      9.73
 9 low         9.66
10 low         9.62
# … with 58 more rows
#Get mean for each category
ph_hatch3<-ph_hatch2 %>%
  group_by(trtlabel) %>%
  dplyr::summarize(mean_dpfmean=mean(dpfmean),sd=sd(dpfmean),n=n(),se=sd/sqrt(n))

ph_hatch3
# A tibble: 4 × 5
  trtlabel mean_dpfmean    sd     n    se
  <chr>           <dbl> <dbl> <int> <dbl>
1 ambient          9.53 0.579    17 0.140
2 high             9.71 0.545    17 0.132
3 low              9.28 0.596    17 0.145
4 medium           9.43 0.446    17 0.108
ggplot(data=ph_hatch3,aes(x=trtlabel,y=mean_dpfmean,color=trtlabel))+
  geom_point()+
  geom_errorbar(data=ph_hatch3,aes(x=trtlabel,ymin=mean_dpfmean-se,ymax=mean_dpfmean+se))+
   geom_point(data=ph_hatch2, aes(x=trtlabel, y=dpfmean,color=trtlabel),position = position_jitterdodge(dodge.width = 0.75), alpha = 0.3)+
  labs(y="Mean time CO2 Treatment",x="pH level",title="Mean Time to hatch by CO2 Treatment")+
  ylim(0,NA)+
  theme_classic()

#ANOVA
aov_hatch <- aov(dpfmean~trtlabel, data = ph_hatch2)
summary(aov_hatch) #P_value>0.05 shows that there is no significant difference among the groups.
            Df Sum Sq Mean Sq F value Pr(>F)
trtlabel     3  1.606  0.5354   1.806  0.155
Residuals   64 18.969  0.2964               
#Check model
check_model(aov_hatch) #Linearity. Homogeneity, Normality, Posterioi predictive check, all look good.

#H0: CO2 level will not effect body condition of Walleye Pollock larvae.

#Ha: CO2 level will effect body condition of Walleye Pollock larvae.

fish1<-filter(ph_hatch, expstage == "larval")
fish1
# A tibble: 341 × 30
   expst…¹ trial trtla…² date                 tank   dic    ta    ph  pco2 casat
   <chr>   <dbl> <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 larval      1 low     2011-03-25 00:00:00     1  2080 2206   8.00  436.  2.44
 2 larval      1 med     2011-03-25 00:00:00     2  2158 2216.  7.78  751.  1.56
 3 larval      1 high    2011-03-25 00:00:00     3  2269 2214.  7.38 1961   0.65
 4 larval      1 ambient 2011-03-25 00:00:00     4  2013 2207.  8.17  277   3.46
 5 larval      1 low     2011-03-28 00:00:00     1  2072 2207.  8.02  408.  2.57
 6 larval      1 med     2011-03-28 00:00:00     2  2151 2215.  7.80  713.  1.63
 7 larval      1 high    2011-03-28 00:00:00     3  2251 2212.  7.44 1730.  0.73
 8 larval      1 ambient 2011-03-28 00:00:00     4  2041 2210.  8.11  325   3.08
 9 larval      1 low     2011-03-30 00:00:00     1  2060 2201   8.04  390.  2.65
10 larval      1 med     2011-03-30 00:00:00     2  2186 2217.  7.69  948   1.27
# … with 331 more rows, 20 more variables: arsat <dbl>, h2odq <dbl>, dph <dbl>,
#   replicate <dbl>, slmean <dbl>, slserr <dbl>, htmean <dbl>, htserr <dbl>,
#   idmean <dbl>, idserr <dbl>, yamean <dbl>, yaserr <dbl>, kmean <dbl>,
#   kserr <dbl>, size_n <dbl>, sizedq <dbl>, dpfmean <dbl>, dpfserr <dbl>,
#   hatch_n <dbl>, hatchdq <dbl>, and abbreviated variable names ¹​expstage,
#   ²​trtlabel
fish2 <- fish1 %>%
  select("trtlabel","kmean" ) %>%
  drop_na()
summary(fish2)
   trtlabel             kmean           
 Length:170         Min.   :-0.0673530  
 Class :character   1st Qu.:-0.0129025  
 Mode  :character   Median : 0.0004040  
                    Mean   :-0.0005172  
                    3rd Qu.: 0.0128855  
                    Max.   : 0.0513350  
fish2
# A tibble: 170 × 2
   trtlabel     kmean
   <chr>        <dbl>
 1 high      0.0119  
 2 high      0.00879 
 3 high      0.0205  
 4 med       0.0159  
 5 med       0.0273  
 6 med       0.0160  
 7 low      -0.000413
 8 low       0.0106  
 9 low       0.0108  
10 ambient   0.0133  
# … with 160 more rows
fish3<-fish2 %>%
  group_by(trtlabel) %>%
  dplyr::summarize(mean_kmean=mean(kmean),sd=sd(kmean),n=n(),se=sd/sqrt(n))
fish3
# A tibble: 4 × 5
  trtlabel mean_kmean     sd     n      se
  <chr>         <dbl>  <dbl> <int>   <dbl>
1 ambient    0.000568 0.0234    42 0.00360
2 high      -0.00242  0.0183    43 0.00279
3 low        0.000164 0.0200    42 0.00309
4 med       -0.000337 0.0186    43 0.00283
ggplot(data=fish3,aes(x=trtlabel,y=mean_kmean,color=trtlabel))+
  geom_point()+
  geom_errorbar(data=fish3,aes(x=trtlabel,ymin=mean_kmean-se,ymax=mean_kmean+se))+
  geom_point(data=fish2, aes(x=trtlabel, y=kmean,color=trtlabel),position = position_jitterdodge(dodge.width = 0.75), alpha = 0.3)+
  labs(y="Body Condition",x="CO2 Treatment",title="Body condition by CO2 Treatment")+
  theme_classic()

#ANOVA
aov_kmean<-aov(kmean~trtlabel, data=fish2)
summary(aov_kmean) #P-value > 0.05 shows that there are no significant relationship between CO2 level and body condition of larvae.
             Df  Sum Sq   Mean Sq F value Pr(>F)
trtlabel      3 0.00023 0.0000755   0.186  0.906
Residuals   166 0.06734 0.0004056               
#Check Model
check_model(aov_kmean)

#Larval condition index (K) was calculated as the deviation from a best-fit relationship between log10 (MH) and log10 (SL). SL refers to standard length, and MH stands for myotome height at the anus.

#H0: There will be no relationship between the hatch number and treatment levels of CO2.

#Ha: There is a relationship between the hatch number and treatment levels of CO2

#Filtering and summarize data
hatch <- ph_hatch %>%
  select(trtlabel,hatch_n) %>%
  drop_na(hatch_n)
hatch
# A tibble: 68 × 2
   trtlabel hatch_n
   <chr>      <dbl>
 1 high         172
 2 high         320
 3 high         109
 4 high         190
 5 medium       193
 6 medium       281
 7 medium       258
 8 medium       126
 9 low          160
10 low          260
# … with 58 more rows
meanhatch<-hatch %>%
  group_by(trtlabel) %>%
  dplyr::summarize(meanhatch=mean(hatch_n),sd=sd(hatch_n),n=n(),se=sd/sqrt(n))

#Make a graph
ggplot(data=meanhatch,aes(x=trtlabel,y=meanhatch,color=trtlabel))+
  geom_point()+
  geom_errorbar(data=meanhatch,aes(x=trtlabel,ymin=meanhatch-se,ymax=meanhatch+se))+
  geom_point(data=hatch, aes(x=trtlabel, y=hatch_n,color=trtlabel),position = position_jitterdodge(dodge.width = 0.75), alpha = 0.3)+
  labs(y="CO2 Treatment",x="Hatch Number",title="Hatch number by CO2 treatment")+
  theme_classic()+
  ylim(0,NA)

#ANOVA
modelhatch <-aov(hatch_n~trtlabel, data=hatch)
check_model(modelhatch)

#In this model, I checked the relationship between the different levels of CO2 treatments (trtlabel) and the number of eggs that hatched based on the treatments. Looking at the assumptions, it seems like this is a good model. The reference line is flat and horizontal showing that this model does have linearity. Meanwhile, the reference line for the homogeneity of variance is nearly flat and it is horizontal which indicates that the model does contain homogeneity. Overall, for the most part all assumptions are followed!

summary(modelhatch) #However, looking at the summary below the p-value is greater than 0.05 which means that there is no statistical significance.
            Df  Sum Sq Mean Sq F value Pr(>F)
trtlabel     3   14195    4732   0.196  0.899
Residuals   64 1547666   24182