Project 2

Load Data

observations <- read.csv("46_bird_observations.csv")
surveys <- read.csv("46_bird_surveys.csv")

Data Cleaning and Merging

# Convert survey_date column to Date format for proper date handling
observations$survey_date <- as.Date(observations$survey_date)
surveys$survey_date <- as.Date(surveys$survey_date)

# Convert bird_count to numeric to ensure proper aggregation in later steps
observations$bird_count <- as.numeric(observations$bird_count)

# Merge observations and survey data using common columns
# left_join ensures all rows from 'observations' are kept, matching data from 'surveys'
merged_df <- left_join(observations, surveys, 
                       by = c("survey_id", "site_code", "survey_date", 
                              "time_start", "time_end", "observer"))

# Select only relevant columns for analysis
merged_df <- merged_df %>%
  select(survey_id, site_code, survey_date, location_type, common_name, bird_count)

# Remove rows where bird_count is missing (NA) to avoid errors in analysis
merged_df <- drop_na(merged_df, bird_count)

# Extract the year from survey_date and store it as a new column
merged_df <- merged_df %>%
  mutate(year = as.numeric(format(survey_date, "%Y")))

Summarizing Data

# Aggregate data to compute species richness and bird abundance per year and location type
summary_df <- merged_df %>%
  group_by(year, location_type) %>%  # Group by year and location type
  summarise(species_richness = n_distinct(common_name), # Count unique species per group
            bird_abundance = sum(bird_count),  # Sum total bird count per group
            .groups = 'drop') # Remove grouping after summarization

Statistical Tests

ANOVA Tests

# Perform one-way ANOVA to assess differences in species richness across habitat types
anova_richness <- aov(species_richness ~ location_type, data = summary_df)

# Perform one-way ANOVA to assess differences in bird abundance across habitat types
anova_abundance <- aov(bird_abundance ~ location_type, data = summary_df)

# Display ANOVA results
summary(anova_richness)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## location_type  5  54802   10960   61.16 <2e-16 ***
## Residuals     80  14336     179                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(anova_abundance)

##               Df    Sum Sq   Mean Sq F value  Pr(>F)    
## location_type  5 1.713e+09 342637404   22.76 3.9e-14 ***
## Residuals     80 1.204e+09  15051871                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Linear Regression

# Fit a linear regression model to test for temporal trends in species richness
lm_richness <- lm(species_richness ~ year, data = summary_df)

# Fit a linear regression model to test for temporal trends in bird abundance
lm_abundance <- lm(bird_abundance ~ year, data = summary_df)

# Display summary statistics for both regression models
summary(lm_richness)

## 
## Call:
## lm(formula = species_richness ~ year, data = summary_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.287 -20.973   7.477  19.045  44.987 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 1549.2365   893.2535   1.734   0.0865 .
## year          -0.7264     0.4437  -1.637   0.1053  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.24 on 84 degrees of freedom
## Multiple R-squared:  0.03093,    Adjusted R-squared:  0.01939 
## F-statistic: 2.681 on 1 and 84 DF,  p-value: 0.1053

summary(lm_abundance)

## 
## Call:
## lm(formula = bird_abundance ~ year, data = summary_df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8027  -3709  -2124   3964  14841 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 504209.94  178289.88   2.828  0.00585 **
## year          -247.46      88.55  -2.794  0.00644 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5637 on 84 degrees of freedom
## Multiple R-squared:  0.08506,    Adjusted R-squared:  0.07417 
## F-statistic: 7.809 on 1 and 84 DF,  p-value: 0.006439

Visualization

Species Richness Over Time

# Create a line plot showing trends in species richness over time by location type
ggplot(summary_df, aes(x = year, y = species_richness, color = location_type)) +
  geom_line() +   # Add a line connecting data points
  geom_point() +  # Add individual data points
  theme_minimal() +  # Apply a clean theme for better readability
  labs(title = "Bird Species Richness Over Time",
       x = "Year", 
       y = "Species Richness")  # Label axes and add a title

Bird Abundance Over Time

# Create a line plot showing trends in bird abundance over time by location type
ggplot(summary_df, aes(x = year, y = bird_abundance, color = location_type)) +
  geom_line() +   # Add a line connecting data points
  geom_point() +  # Add individual data points
  theme_minimal() +  # Apply a clean theme for better readability
  labs(title = "Bird Abundance Over Time",
       x = "Year", 
       y = "Bird Abundance")  # Label axes and add a title

```

Project 2

Nicholas Steinhoff

2025-02-22

Load Data

Data Cleaning and Merging

Summarizing Data

Statistical Tests

ANOVA Tests

Linear Regression

Visualization

Species Richness Over Time

Bird Abundance Over Time