Week 7 Lab

Part 1

# libraries 
library(ggplot2)
library(readr)

# read in data 
snail_regression <- read.csv("snails_regression_for_R.csv")

print(snail_regression)
##    DISTANCE     ZONE EGGS ZONE_NUM
## 1       4.0  lowtide    9        1
## 2       4.0  lowtide    8        1
## 3       3.9  lowtide   12        1
## 4       3.9  lowtide    9        1
## 5       3.8  lowtide   11        1
## 6       3.8  lowtide   11        1
## 7       3.7  lowtide   10        1
## 8       3.6  lowtide    5        1
## 9       3.5  lowtide    7        1
## 10      3.5  lowtide    9        1
## 11      3.4  lowtide   10        1
## 12      3.4  lowtide    9        1
## 13      3.3  lowtide    6        1
## 14      3.2  lowtide   12        1
## 15      3.2  lowtide   11        1
## 16      3.1  lowtide    8        1
## 17      3.0  midtide   11        2
## 18      2.9  midtide    8        2
## 19      2.9  midtide   18        2
## 20      2.8  midtide   10        2
## 21      2.7  midtide    9        2
## 22      2.7  midtide   13        2
## 23      2.6  midtide   15        2
## 24      2.5  midtide   12        2
## 25      2.5  midtide    9        2
## 26      2.5  midtide   15        2
## 27      2.4  midtide   11        2
## 28      2.3  midtide   13        2
## 29      2.3  midtide   10        2
## 30      2.2  midtide   13        2
## 31      2.1  midtide   14        2
## 32      2.1  midtide   17        2
## 33      2.0 hightide   15        3
## 34      2.0 hightide   14        3
## 35      1.9 hightide   14        3
## 36      1.9 hightide   20        3
## 37      1.8 hightide   17        3
## 38      1.7 hightide   16        3
## 39      1.7 hightide   19        3
## 40      1.6 hightide   15        3
## 41      1.5 hightide   15        3
## 42      1.5 hightide   17        3
## 43      1.4 hightide   20        3
## 44      1.3 hightide   18        3
## 45      1.3 hightide   18        3
## 46      1.2 hightide   14        3
## 47      1.1 hightide   15        3
## 48      1.0 hightide   17        3
# create a scatterplot of the data 
p1 <- ggplot(snail_regression, aes(x = DISTANCE, y = EGGS)) + geom_point(size = 3, shape = 21, fill = "lightblue") + 

# theme
  theme_classic() + 
  theme(plot.title = element_text(size = 16, face = "bold"), 
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14)) +
  
# labels 
  labs(title = "Snail Scatter Plot", 
       x = "Distance from High Tide (m)", 
       y = "Number of Eggs Produced") +
  
# add linear model regression line 
  geom_smooth(method = lm, color = "black", fill = "red")

p1
## `geom_smooth()` using formula = 'y ~ x'

# summary of regression line  
modl <- lm (EGGS~DISTANCE, data = snail_regression) 

summary(modl)
## 
## Call:
## lm(formula = EGGS ~ DISTANCE, data = snail_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2442 -1.3498 -0.0587  1.2352  6.4465 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  21.1204     1.1016  19.172  < 2e-16 ***
## DISTANCE     -3.2989     0.4076  -8.093 2.16e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.477 on 46 degrees of freedom
## Multiple R-squared:  0.5874, Adjusted R-squared:  0.5785 
## F-statistic:  65.5 on 1 and 46 DF,  p-value: 2.156e-10

Part 2

# libraries 
library(ggplot2)
library(readr)

# load data 
marsh_snail2 <- read.csv("marsh-snail-data-two.csv")

print(marsh_snail2)
##            State Latitude    Height
## 1        Florida 29.97636  5.843091
## 2        Florida 30.43704  7.417985
## 3        Florida 30.32216  5.416990
## 4        Florida 29.96138  9.533414
## 5        Florida 30.65199  5.922261
## 6        Florida 30.65167  7.034838
## 7        Florida 29.92675  7.496119
## 8        Florida 30.55372  5.324035
## 9        Florida 30.22841  3.730801
## 10       Florida 30.30138  9.441726
## 11       Georgia 30.71591 10.440276
## 12       Georgia 30.92205  4.048434
## 13       Georgia 31.78489 10.421373
## 14       Georgia 31.86330 11.223543
## 15       Georgia 31.38938  6.888101
## 16       Georgia 31.54030  2.113660
## 17       Georgia 31.83065  8.238671
## 18       Georgia 31.08275  5.403010
## 19       Georgia 31.59380  8.650702
## 20       Georgia 30.90311  7.010132
## 21 SouthCarolina 33.43508  7.568949
## 22 SouthCarolina 33.64718  5.503450
## 23 SouthCarolina 33.17228  6.713329
## 24 SouthCarolina 32.51065  6.791677
## 25 SouthCarolina 33.59657  8.415815
## 26 SouthCarolina 32.83199  7.438263
## 27 SouthCarolina 32.74251  4.204901
## 28 SouthCarolina 32.87554  6.562090
## 29 SouthCarolina 32.43516 11.647446
## 30 SouthCarolina 32.15816  8.830496
## 31 NorthCarolina 36.49926  9.902741
## 32 NorthCarolina 34.92810 14.266457
## 33 NorthCarolina 34.88198  8.966930
## 34 NorthCarolina 35.37086 13.092618
## 35 NorthCarolina 35.10911  8.205240
## 36 NorthCarolina 34.38543  5.702840
## 37 NorthCarolina 35.00921  9.562529
## 38 NorthCarolina 34.10425 11.247638
## 39 NorthCarolina 34.16465 11.709933
## 40 NorthCarolina 35.04468 13.584341
# create scatter plot with LM
p2 <- ggplot(marsh_snail2, aes(x = Latitude, y = Height)) + geom_point(size = 3, shape = 21, fill = "lightblue") + 
  
# labels 
  labs(title = "Body Size of Marsh Periwinkles by Location", 
       x = "U.S. State (Latitude)", 
       y = "Body Height") + 

# theme 
  
  theme_classic() + 
  theme(plot.title = element_text(size = 16, face = "bold"), 
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14)) +
  
# linear model 
  geom_smooth(method = lm, color = "black", fill = "red")

p2
## `geom_smooth()` using formula = 'y ~ x'

#summary of linear model 
modl2 <- lm (Height~Latitude, data = marsh_snail2) 

summary(modl2)
## 
## Call:
## lm(formula = Height ~ Latitude, data = marsh_snail2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3039 -1.4490 -0.4249  2.0594  4.3711 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -15.6503     6.9834  -2.241  0.03094 * 
## Latitude      0.7314     0.2153   3.397  0.00161 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.47 on 38 degrees of freedom
## Multiple R-squared:  0.233,  Adjusted R-squared:  0.2128 
## F-statistic: 11.54 on 1 and 38 DF,  p-value: 0.001608