Final Project

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.3.0      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ph_hatch<-read_csv("pH_Hatch.csv")
Rows: 500 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): expstage, trtlabel
dbl  (27): trial, tank, dic, ta, ph, pco2, casat, arsat, h2odq, dph, replica...
dttm  (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#trtlabel vs dpfmean(mean time to hatch)

#Select trtlabel and dpfmean
ph_hatch1<-ph_hatch %>%
 select(trtlabel,dpfmean)

ph_hatch1
# A tibble: 500 × 2
   trtlabel dpfmean
   <chr>      <dbl>
 1 low           NA
 2 low           NA
 3 low           NA
 4 low           NA
 5 low           NA
 6 low           NA
 7 low           NA
 8 low           NA
 9 low           NA
10 low           NA
# … with 490 more rows
#Drop NA in dpfmean
ph_hatch2<-ph_hatch1 %>%
  drop_na(dpfmean)

ph_hatch2
# A tibble: 68 × 2
   trtlabel dpfmean
   <chr>      <dbl>
 1 high        9.84
 2 high       10.1 
 3 high        9.51
 4 high       10.1 
 5 medium      9.67
 6 medium      9.70
 7 medium      9.77
 8 medium      9.73
 9 low         9.66
10 low         9.62
# … with 58 more rows
#Get mean for each category
ph_hatch3<-ph_hatch2%>%
  group_by(trtlabel)%>%
  summarize(mean_dpfmean=mean(dpfmean),sd=sd(dpfmean),n=n(),se=sd/sqrt(n))

ph_hatch3
# A tibble: 4 × 5
  trtlabel mean_dpfmean    sd     n    se
  <chr>           <dbl> <dbl> <int> <dbl>
1 ambient          9.53 0.579    17 0.140
2 high             9.71 0.545    17 0.132
3 low              9.28 0.596    17 0.145
4 medium           9.43 0.446    17 0.108
ggplot(data=ph_hatch3,aes(x=trtlabel,y=mean_dpfmean))+
  geom_point()+
  geom_errorbar(data=ph_hatch3,aes(x=trtlabel,ymin=mean_dpfmean-se,ymax=mean_dpfmean+se))+
  labs(y="Mean time to hatch",x="pH level",title="pH level vs mean time to hatch")

#Same graph but with the y axis starts from 0
ggplot(data=ph_hatch3,aes(x=trtlabel,y=mean_dpfmean))+
  geom_point()+
  geom_errorbar(data=ph_hatch3,aes(x=trtlabel,ymin=mean_dpfmean-se,ymax=mean_dpfmean+se))+
  labs(y="Mean time to hatch",x="pH level",title="pH level vs mean time to hatch")+
  ylim(0,NA)

#Patchwaork them
library(patchwork)

p1<-ggplot(data=ph_hatch3,aes(x=trtlabel,y=mean_dpfmean))+
  geom_point()+
  geom_errorbar(data=ph_hatch3,aes(x=trtlabel,ymin=mean_dpfmean-se,ymax=mean_dpfmean+se))+
  labs(y="Mean time to hatch",x="pH level",title="pH level vs mean time to hatch")

p2<-ggplot(data=ph_hatch3,aes(x=trtlabel,y=mean_dpfmean))+
  geom_point()+
  geom_errorbar(data=ph_hatch3,aes(x=trtlabel,ymin=mean_dpfmean-se,ymax=mean_dpfmean+se))+
  labs(y="Mean time to hatch",x="pH level",title="pH level vs mean time to hatch")+
  ylim(0,NA)

p1+p2

#trtlabel vs hatch_n(number of fish hatching from incubation beaker)

#Select trtlabel and hatch_n
ph_hatch_count<-ph_hatch %>%
 select(trtlabel,hatch_n)

ph_hatch_count
# A tibble: 500 × 2
   trtlabel hatch_n
   <chr>      <dbl>
 1 low           NA
 2 low           NA
 3 low           NA
 4 low           NA
 5 low           NA
 6 low           NA
 7 low           NA
 8 low           NA
 9 low           NA
10 low           NA
# … with 490 more rows
#Drop NA in hatch_n
ph_hatch_count1<-ph_hatch_count %>%
  drop_na(hatch_n)

ph_hatch_count1
# A tibble: 68 × 2
   trtlabel hatch_n
   <chr>      <dbl>
 1 high         172
 2 high         320
 3 high         109
 4 high         190
 5 medium       193
 6 medium       281
 7 medium       258
 8 medium       126
 9 low          160
10 low          260
# … with 58 more rows
#Get mean for each category
ph_hatch_count2<-ph_hatch_count1%>%
  group_by(trtlabel)%>%
  summarize(mean_hatch=mean(hatch_n),sd=sd(hatch_n),n=n(),se=sd/sqrt(n))

ph_hatch_count2
# A tibble: 4 × 5
  trtlabel mean_hatch    sd     n    se
  <chr>         <dbl> <dbl> <int> <dbl>
1 ambient        308.  156.    17  37.7
2 high           269.  152.    17  36.8
3 low            285.  162.    17  39.3
4 medium         279.  152.    17  36.9
ggplot(data=ph_hatch_count2,aes(x=trtlabel,y=mean_hatch))+
  geom_point()+
  geom_errorbar(data=ph_hatch_count2,aes(x=trtlabel,ymin=mean_hatch-se,ymax=mean_hatch+se))+
  labs(y="Number of fish hatching from incubation beaker",x="pH level",title="pH level vs number of fish hatching from incubation beaker")+
  ylim(0,NA)

#trtlabel vs yamean(Mean yolk area)

#Select trtlabel and yamean
ph_yolk<-ph_hatch %>%
 select(trtlabel,yamean)

ph_yolk
# A tibble: 500 × 2
   trtlabel yamean
   <chr>     <dbl>
 1 low          NA
 2 low          NA
 3 low          NA
 4 low          NA
 5 low          NA
 6 low          NA
 7 low          NA
 8 low          NA
 9 low          NA
10 low          NA
# … with 490 more rows
#Drop NA in yamean
ph_yolk1<-ph_yolk %>%
  drop_na(yamean)

ph_yolk1
# A tibble: 68 × 2
   trtlabel yamean
   <chr>     <dbl>
 1 high      0.925
 2 high      0.983
 3 high      1.01 
 4 high      0.979
 5 medium    0.825
 6 medium    0.929
 7 medium    0.807
 8 medium    0.846
 9 low       0.814
10 low       0.938
# … with 58 more rows
#Get mean for each category
ph_yolk2<-ph_yolk1%>%
  group_by(trtlabel)%>%
  summarize(mean_yolk=mean(yamean),sd=sd(yamean),n=n(),se=sd/sqrt(n))

ph_yolk2
# A tibble: 4 × 5
  trtlabel mean_yolk    sd     n     se
  <chr>        <dbl> <dbl> <int>  <dbl>
1 ambient      0.859 0.164    17 0.0399
2 high         0.951 0.179    17 0.0434
3 low          0.921 0.139    17 0.0337
4 medium       0.926 0.179    17 0.0435
ggplot(data=ph_yolk2,aes(x=trtlabel,y=(mean_yolk)))+
  geom_point()+
  geom_errorbar(data=ph_yolk2,aes(x=trtlabel,ymin=mean_yolk-se,ymax=mean_yolk+se))+
  labs(y="Mean yolk area",x="pH level",title="pH level vs Mean yolk area")

# Yamean=Mean yolk area vs Htmean = Mean body height
yolk_height<-ph_hatch %>%
 select(yamean,htmean,trtlabel)

yolk_height
# A tibble: 500 × 3
   yamean htmean trtlabel
    <dbl>  <dbl> <chr>   
 1     NA     NA low     
 2     NA     NA low     
 3     NA     NA low     
 4     NA     NA low     
 5     NA     NA low     
 6     NA     NA low     
 7     NA     NA low     
 8     NA     NA low     
 9     NA     NA low     
10     NA     NA low     
# … with 490 more rows
#Drop NA
yolk_height1<-yolk_height %>%
  drop_na(yamean,htmean,trtlabel)
yolk_height1
# A tibble: 68 × 3
   yamean htmean trtlabel
    <dbl>  <dbl> <chr>   
 1  0.925  0.258 high    
 2  0.983  0.255 high    
 3  1.01   0.256 high    
 4  0.979  0.261 high    
 5  0.825  0.256 medium  
 6  0.929  0.255 medium  
 7  0.807  0.259 medium  
 8  0.846  0.256 medium  
 9  0.814  0.255 low     
10  0.938  0.256 low     
# … with 58 more rows
#Scatter Plot
ggplot(data=yolk_height1,aes(x=yamean,y=htmean,color=trtlabel))+
  geom_point()+
  labs(y="Mean body height",x="Mean yolk area",title="Mean yolk area vs Mean body height")

#Add the fitted line to the scatter plot using ggplot2
library(ggplot2)
ggplot(data=yolk_height1,aes(x=yamean,y=htmean,color=trtlabel))+
  geom_point()+
  labs(y="Mean body height",x="Mean yolk area",title="Mean yolk area vs Mean body height")+
  scale_color_discrete(name = "trtlabel") +
  stat_smooth(method = "lm", formula = y ~ x, se = FALSE)

#Patchwork them
p3=ggplot(data=yolk_height1,aes(x=yamean,y=htmean,color=trtlabel))+
  geom_point()+
  labs(y="Mean body height",x="Mean yolk area",title="Mean yolk area vs Mean body height")

p4=ggplot(data=yolk_height1,aes(x=yamean,y=htmean,color=trtlabel))+
  geom_point()+
  labs(y="Mean body height",x="Mean yolk area",title="Mean yolk area vs Mean body height")+
  scale_color_discrete(name = "trtlabel") +
  stat_smooth(method = "lm", formula = y ~ x, se = FALSE)

p3+p4

#trtlabel vs htmean = Mean body height

#Select trtlabel and htmean
ph_height<-ph_hatch %>%
 select(trtlabel,htmean)

ph_height
# A tibble: 500 × 2
   trtlabel htmean
   <chr>     <dbl>
 1 low          NA
 2 low          NA
 3 low          NA
 4 low          NA
 5 low          NA
 6 low          NA
 7 low          NA
 8 low          NA
 9 low          NA
10 low          NA
# … with 490 more rows
#Drop NA in htmean
ph_height1<-ph_height %>%
  drop_na(htmean)

ph_height1
# A tibble: 238 × 2
   trtlabel htmean
   <chr>     <dbl>
 1 high      0.258
 2 high      0.255
 3 high      0.256
 4 high      0.261
 5 medium    0.256
 6 medium    0.255
 7 medium    0.259
 8 medium    0.256
 9 low       0.255
10 low       0.256
# … with 228 more rows
#Get mean for each category
ph_height2<-ph_height1%>%
  group_by(trtlabel)%>%
  summarize(mean_height=mean(htmean),sd=sd(htmean),n=n(),se=sd/sqrt(n))

ph_height2
# A tibble: 5 × 5
  trtlabel mean_height     sd     n      se
  <chr>          <dbl>  <dbl> <int>   <dbl>
1 ambient        0.329 0.0874    59 0.0114 
2 high           0.343 0.0975    60 0.0126 
3 low            0.344 0.107     59 0.0139 
4 med            0.371 0.0968    43 0.0148 
5 medium         0.256 0.0112    17 0.00271
ggplot(data=ph_height2,aes(x=trtlabel,y=(mean_height)))+
  geom_point()+
  geom_errorbar(data=ph_height2,aes(x=trtlabel,ymin=mean_height-se,ymax=mean_height+se))+
  labs(y="Mean body height",x="pH level",title="pH level vs Mean body height")

#Why this graph has med??

#Same graph with yaxis starts from 0
ggplot(data=ph_height2,aes(x=trtlabel,y=(mean_height)))+
  geom_point()+
  geom_errorbar(data=ph_height2,aes(x=trtlabel,ymin=mean_height-se,ymax=mean_height+se))+
  labs(y="Mean body height",x="pH level",title="pH level vs Mean body height")+
  ylim(0,NA)

#Patchwork them
p5<-ggplot(data=ph_height2,aes(x=trtlabel,y=(mean_height)))+
  geom_point()+
  geom_errorbar(data=ph_height2,aes(x=trtlabel,ymin=mean_height-se,ymax=mean_height+se))+
  labs(y="Mean body height",x="pH level",title="pH level vs Mean body height")

p6<-ggplot(data=ph_height2,aes(x=trtlabel,y=(mean_height)))+
  geom_point()+
  geom_errorbar(data=ph_height2,aes(x=trtlabel,ymin=mean_height-se,ymax=mean_height+se))+
  labs(y="Mean body height",x="pH level",title="pH level vs Mean body height")+
  ylim(0,NA)

p5+p6