library(tidyverse)

Example: Climate Change and Fish Habitats

As the climate grows warmer, we expect many animal species to move toward the poles in an attempt to maintain their preferred temperature range.

Do data on fish in the North Sea confirm this suspicion?

The data are 25 years of mean winter temperatures at the bottom of the North Sea (degrees Celsius) and the center of the distribution of anglerfish (sometimes called monkfish) in degrees of north latitude.

Step 0: Load the data into R. Create a data frame.

# EXAMPLE: Climate Change and Fish Habitats
# Data on anglerfish distribution
# Explanatory: Temp in C (mean winter temperature at bottom of North Sea)
# Response: Latitude of center for distribution of anglerfish

year<-c(1977:2001)
temp<-c(6.26, 6.26, 6.27, 6.31, 6.34, 6.32, 6.37, 6.39, 6.42, 
        6.52, 6.68, 6.76, 6.78, 6.89, 6.90, 6.93, 6.98, 
        7.02, 7.09, 7.13, 7.15, 7.29, 7.34, 7.57, 7.65)
lat<-c(57.20, 57.96, 57.65, 57.59, 58.01, 59.06, 56.85, 56.87, 57.43,
       57.72, 57.83, 57.87, 57.48, 58.13, 58.52, 58.48, 57.89,
       58.71, 58.07, 58.49, 58.28, 58.49, 58.01, 58.57, 58.90)

# Make a dataframe
fish<-data.frame(year, temp, lat)

Step 1: Look at the data! Create a scatterplot!

library(tidyverse)
# Scatterplot of data looks linear
ggplot(data=fish, aes(temp, lat))+
  geom_point()+
  ggtitle("Scatterplot of Temp vs Fish Latitude")+
  xlab("Temperature (C)")+
  ylab("Fish Latitude")+
  theme_bw()

Step 2: Create a simple linear model

# Fit a simple linear model 
mod2<-lm(lat~temp)
summary(mod2)
## 
## Call:
## lm(formula = lat ~ temp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81309 -0.27207 -0.02401  0.20523  1.43781 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  52.4524     1.5324  34.229  < 2e-16 ***
## temp          0.8180     0.2254   3.629  0.00141 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4734 on 23 degrees of freedom
## Multiple R-squared:  0.3641, Adjusted R-squared:  0.3364 
## F-statistic: 13.17 on 1 and 23 DF,  p-value: 0.001408
names(mod2)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"

… and add the fitted line to the scatterplot

# Scatterplot with fitted line
ggplot(data=fish, aes(temp, lat))+
  geom_point()+
  ggtitle("Scatterplot of Temp vs Fish Latitude")+
  xlab("Temperature (C)")+
  ylab("Fish Latitude")+
  theme_bw()+
  geom_abline(slope=mod2$coefficients[2], intercept=mod2$coefficients[1],
              color="blue", lty=2, lwd=1)

Step 3: Check the model conditions

  1. Are the data independent?
  2. Is there a linear relationship between x and y?
  3. Are the residuals normally distributed?
  4. Are the residuals centered around zero and do they have constant spread?
# QQ NORM Plot 
hist(mod2$residuals)

qqnorm(mod2$residuals)
qqline(mod2$residuals)

fish<-cbind(fish, 
            fit=mod2$fitted.values,
            residual=mod2$residuals)

ggplot(data=fish, aes(residual))+
  geom_histogram(bins=8)+
  ggtitle("Histogram of Residuals")+
  theme_bw()

# Residual Plot
ggplot(data=fish, aes(temp, residual))+
  geom_point()+
  ggtitle("Residual Plot")+
  xlab("Temperature (C)")+
  ylab("Residuals")+
  theme_bw()+
  geom_hline(yintercept = 0,
             color="blue", lty=2, lwd=1)

Step 4: Perform a hypothesis test for the slope

summary(mod2)
## 
## Call:
## lm(formula = lat ~ temp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81309 -0.27207 -0.02401  0.20523  1.43781 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  52.4524     1.5324  34.229  < 2e-16 ***
## temp          0.8180     0.2254   3.629  0.00141 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4734 on 23 degrees of freedom
## Multiple R-squared:  0.3641, Adjusted R-squared:  0.3364 
## F-statistic: 13.17 on 1 and 23 DF,  p-value: 0.001408
  • Write the hypothesis for the test performed in R:

  • Report the test statistic:

  • What is the reference distribution (be sure to include the degrees of freedom)

  • Find the p-value and write a five-part conclusion for the hypothesis test.

Step 5: Construct a 95% confidence interval for the slope

  • What is the critical value used for this confidence interval?
qt(0.975, df=23)
## [1] 2.068658
  • Find the confidence interval using the equation.

  • Report your results.

Step 6: ANOVA

anova(mod2)
## Analysis of Variance Table
## 
## Response: lat
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## temp       1 2.9515 2.95151  13.167 0.001408 **
## Residuals 23 5.1555 0.22415                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1