observations <- read.csv("46_bird_observations.csv")
surveys <- read.csv("46_bird_surveys.csv")
# Convert survey_date column to Date format for proper date handling
observations$survey_date <- as.Date(observations$survey_date)
surveys$survey_date <- as.Date(surveys$survey_date)
# Convert bird_count to numeric to ensure proper aggregation in later steps
observations$bird_count <- as.numeric(observations$bird_count)
# Merge observations and survey data using common columns
# left_join ensures all rows from 'observations' are kept, matching data from 'surveys'
merged_df <- left_join(observations, surveys,
by = c("survey_id", "site_code", "survey_date",
"time_start", "time_end", "observer"))
# Select only relevant columns for analysis
merged_df <- merged_df %>%
select(survey_id, site_code, survey_date, location_type, common_name, bird_count)
# Remove rows where bird_count is missing (NA) to avoid errors in analysis
merged_df <- drop_na(merged_df, bird_count)
# Extract the year from survey_date and store it as a new column
merged_df <- merged_df %>%
mutate(year = as.numeric(format(survey_date, "%Y")))
# Aggregate data to compute species richness and bird abundance per year and location type
summary_df <- merged_df %>%
group_by(year, location_type) %>% # Group by year and location type
summarise(species_richness = n_distinct(common_name), # Count unique species per group
bird_abundance = sum(bird_count), # Sum total bird count per group
.groups = 'drop') # Remove grouping after summarization
# Perform one-way ANOVA to assess differences in species richness across habitat types
anova_richness <- aov(species_richness ~ location_type, data = summary_df)
# Perform one-way ANOVA to assess differences in bird abundance across habitat types
anova_abundance <- aov(bird_abundance ~ location_type, data = summary_df)
# Display ANOVA results
summary(anova_richness)
## Df Sum Sq Mean Sq F value Pr(>F)
## location_type 5 54802 10960 61.16 <2e-16 ***
## Residuals 80 14336 179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(anova_abundance)
## Df Sum Sq Mean Sq F value Pr(>F)
## location_type 5 1.713e+09 342637404 22.76 3.9e-14 ***
## Residuals 80 1.204e+09 15051871
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit a linear regression model to test for temporal trends in species richness
lm_richness <- lm(species_richness ~ year, data = summary_df)
# Fit a linear regression model to test for temporal trends in bird abundance
lm_abundance <- lm(bird_abundance ~ year, data = summary_df)
# Display summary statistics for both regression models
summary(lm_richness)
##
## Call:
## lm(formula = species_richness ~ year, data = summary_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.287 -20.973 7.477 19.045 44.987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1549.2365 893.2535 1.734 0.0865 .
## year -0.7264 0.4437 -1.637 0.1053
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.24 on 84 degrees of freedom
## Multiple R-squared: 0.03093, Adjusted R-squared: 0.01939
## F-statistic: 2.681 on 1 and 84 DF, p-value: 0.1053
summary(lm_abundance)
##
## Call:
## lm(formula = bird_abundance ~ year, data = summary_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8027 -3709 -2124 3964 14841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 504209.94 178289.88 2.828 0.00585 **
## year -247.46 88.55 -2.794 0.00644 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5637 on 84 degrees of freedom
## Multiple R-squared: 0.08506, Adjusted R-squared: 0.07417
## F-statistic: 7.809 on 1 and 84 DF, p-value: 0.006439
# Create a line plot showing trends in species richness over time by location type
ggplot(summary_df, aes(x = year, y = species_richness, color = location_type)) +
geom_line() + # Add a line connecting data points
geom_point() + # Add individual data points
theme_minimal() + # Apply a clean theme for better readability
labs(title = "Bird Species Richness Over Time",
x = "Year",
y = "Species Richness") # Label axes and add a title
# Create a line plot showing trends in bird abundance over time by location type
ggplot(summary_df, aes(x = year, y = bird_abundance, color = location_type)) +
geom_line() + # Add a line connecting data points
geom_point() + # Add individual data points
theme_minimal() + # Apply a clean theme for better readability
labs(title = "Bird Abundance Over Time",
x = "Year",
y = "Bird Abundance") # Label axes and add a title
```