knitr::opts_chunk$set(echo = TRUE)
#install.packages("dplyr")
#install.packages("ggplot2")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# Load all three data tables

invasiveData<-read.csv("snail_invasive_data.csv")
snailRegression<-read.csv("snails regression for R.csv")
marshSnails<-read.csv("Marsh_Snail_Data_5.csv")
ggplot(snailRegression,aes(x=DISTANCE,y=EGGS))+
  geom_point()+
  xlab("Distance from High Tide (m)")+
  ylab("Number of Eggs")+
  theme_classic()

# A): Null hypothesis: the distance from the high-tide mark has no effect on the number of eggs in a capsule.
modl <- lm(EGGS~DISTANCE, data=snailRegression)
summary(modl)
## 
## Call:
## lm(formula = EGGS ~ DISTANCE, data = snailRegression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2442 -1.3498 -0.0587  1.2352  6.4465 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  21.1204     1.1016  19.172  < 2e-16 ***
## DISTANCE     -3.2989     0.4076  -8.093 2.16e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.477 on 46 degrees of freedom
## Multiple R-squared:  0.5874, Adjusted R-squared:  0.5785 
## F-statistic:  65.5 on 1 and 46 DF,  p-value: 2.156e-10
# B):
## EQUATION: y = -3.299x + 21.12
## R^2 VALUE: 0.579
## F-STATISTIC: 65.5 on 1 and 4 degrees of freedom
## p-value: 2.156 x 10^(-10), which is a pretty small number

#C): We shuld reject the null hypothesis.

# IT'S TIME TO MAKE A PLOT.
ggplot(snailRegression, aes(x=DISTANCE,y=EGGS))+
  geom_point()+
  ylab("Number of Eggs")+
  xlab("Distance from high-tide mark (m)")+
  theme_classic()+
  geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'

#5:
## In the document.

#PART II

#The current thought is that body size has a correlation with latitude. (All of the latitudes given are above the equator, so higher value = closer to the North Pole)

marshModl <- lm(Height~Latitude, data=marshSnails)
summary(marshModl)
## 
## Call:
## lm(formula = Height ~ Latitude, data = marshSnails)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3020 -1.5387 -0.2064  1.6549  6.6886 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -9.6213     4.3186  -2.228 0.028770 *  
## Latitude      0.5164     0.1326   3.894 0.000207 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.298 on 78 degrees of freedom
## Multiple R-squared:  0.1628, Adjusted R-squared:  0.152 
## F-statistic: 15.16 on 1 and 78 DF,  p-value: 0.0002068
#The graph.
ggplot(marshSnails, aes(y=Height,x=Latitude))+
  geom_point()+
  ylab("Snail height (mm)")+
  xlab("Latitude")+
  theme_classic()

#The same graph with a LM line
ggplot(marshSnails, aes(y=Height,x=Latitude))+
  geom_point()+
  ylab("Snail height (mm)")+
  xlab("Latitude")+
  theme_classic()+
  geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'

## 

#PART III

#I already read in the invasive snail data during the first chunk.
ggplot(invasiveData,aes(x=DISTANCE,y=EGGS,color=SNAIL))+
  geom_point()+
  geom_smooth(method="lm")+
  ylab("Number of Eggs")+
  xlab("Distance from High Tide (m)")+
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

invasiveModl <- lm(EGGS~DISTANCE*SNAIL, data=invasiveData)
summary(invasiveModl)
## 
## Call:
## lm(formula = EGGS ~ DISTANCE * SNAIL, data = invasiveData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6400 -1.8837 -0.2113  1.9823  6.4465 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           30.6239     1.3324  22.983  < 2e-16 ***
## DISTANCE              -2.9866     0.4930  -6.058 2.98e-08 ***
## SNAILNative           -9.5035     1.8843  -5.043 2.29e-06 ***
## DISTANCE:SNAILNative  -0.3123     0.6972  -0.448    0.655    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.996 on 92 degrees of freedom
## Multiple R-squared:  0.7988, Adjusted R-squared:  0.7922 
## F-statistic: 121.7 on 3 and 92 DF,  p-value: < 2.2e-16
anova(invasiveModl)
## Analysis of Variance Table
## 
## Response: EGGS
##                Df  Sum Sq Mean Sq  F value  Pr(>F)    
## DISTANCE        1  729.67  729.67  81.2683 2.7e-14 ***
## SNAIL           1 2547.11 2547.11 283.6874 < 2e-16 ***
## DISTANCE:SNAIL  1    1.80    1.80   0.2007  0.6552    
## Residuals      92  826.03    8.98                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##DISTANCE:SNAIL has a high Pr(>F), indicating that there's not a statistically-significant difference between the snails' patterns of reproduction. This makes sense; in the graph above, the lines look extremely similar.

#PART IV

#Latitude increases mean that body size increases. 
# The null hypotheses are: 1.) latitude has no effect on height, and 2.) species has no impact on the regression line of height-latitude.

marshModl2 <-lm(Height~Latitude*Snail, data=marshSnails)
summary(marshModl2)
## 
## Call:
## lm(formula = Height ~ Latitude * Snail, data = marshSnails)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4725 -1.1743 -0.2343  1.3067  4.6076 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -27.4977     5.0937  -5.398 7.39e-07 ***
## Latitude                 1.0866     0.1564   6.947 1.10e-09 ***
## SnailMelampus           35.7529     7.2036   4.963 4.14e-06 ***
## Latitude:SnailMelampus  -1.1405     0.2212  -5.156 1.94e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.916 on 76 degrees of freedom
## Multiple R-squared:  0.4326, Adjusted R-squared:  0.4102 
## F-statistic: 19.31 on 3 and 76 DF,  p-value: 2.093e-09
anova(marshModl2)
## Analysis of Variance Table
## 
## Response: Height
##                Df  Sum Sq Mean Sq F value    Pr(>F)    
## Latitude        1  80.065  80.065 21.7986 1.278e-05 ***
## Snail           1  35.074  35.074  9.5492  0.002795 ** 
## Latitude:Snail  1  97.650  97.650 26.5864 1.941e-06 ***
## Residuals      76 279.143   3.673                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#It looks like we can reject both of the null hypotheses...? Well, maybe the latitude has no effect on the size of Melampus. We'll see.

ggplot(marshSnails,aes(x=Latitude,y=Height,color=Snail))+
  geom_point()+
  geom_smooth(method="lm")+
  ylab("Snail Height (mm)")+
  xlab("Latitude")+
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'