knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
setwd("~/Documents/School/Data 101/Data project/Birds")
birds <- read.csv("forest.birds.csv")

R Markdown

Research Question: Do the number of years a forest has been left isolated really affect the bird abundance? (using the following variables, abundance, yrs.isolation)

Introduction: In this project I will demonstrate how environmental factors affect the Forest bird populations in Australia. I will also focus in on the number of years a forest has been isolated and the size of the bird’s abundance affected. These ongoing environmental issues are important because it severly affects the species of birds. This data is from OpenIntro Forest birds dataset, this dataset includes the following variables, abundance, patch.area, year.of.isolation, dis.nearest, dist.larger, grazing, intensity, altitude, yrs.isolation.

#Cleaning the Data, clean title, check for missing data.

colSums(is.na(birds))
##         abundance        patch.area year.of.isolation      dist.nearest 
##                 0                 0                 0                 0 
##       dist.larger grazing.intensity          altitude     yrs.isolation 
##                 0                 0                 0                 0
names(birds) <- tolower(names(birds))
names(birds) <- gsub("\\.","_",names(birds))

head(birds)
##   abundance patch_area year_of_isolation dist_nearest dist_larger
## 1       5.3        0.1              1968           39          39
## 2       2.0        0.5              1920          234         234
## 3       1.5        0.5              1900          104         311
## 4      17.1        1.0              1966           66          66
## 5      13.8        1.0              1918          246         246
## 6      14.1        1.0              1965          234         285
##   grazing_intensity altitude yrs_isolation
## 1 less than average      160            15
## 2             heavy       60            63
## 3             heavy      140            83
## 4           average      160            17
## 5             heavy      140            65
## 6           average      130            18
summary(birds)
##    abundance       patch_area      year_of_isolation  dist_nearest   
##  Min.   : 1.50   Min.   :   0.10   Min.   :1890      Min.   :  26.0  
##  1st Qu.:12.40   1st Qu.:   2.00   1st Qu.:1928      1st Qu.:  93.0  
##  Median :21.05   Median :   7.50   Median :1962      Median : 234.0  
##  Mean   :19.51   Mean   :  69.27   Mean   :1950      Mean   : 240.4  
##  3rd Qu.:28.30   3rd Qu.:  29.75   3rd Qu.:1966      3rd Qu.: 333.2  
##  Max.   :39.60   Max.   :1771.00   Max.   :1976      Max.   :1427.0  
##   dist_larger     grazing_intensity     altitude     yrs_isolation  
##  Min.   :  26.0   Length:56          Min.   : 60.0   Min.   : 7.00  
##  1st Qu.: 158.2   Class :character   1st Qu.:120.0   1st Qu.:17.00  
##  Median : 338.5   Mode  :character   Median :140.0   Median :20.50  
##  Mean   : 733.3                      Mean   :146.2   Mean   :33.25  
##  3rd Qu.: 913.8                      3rd Qu.:182.5   3rd Qu.:55.50  
##  Max.   :4426.0                      Max.   :260.0   Max.   :93.00
str(birds)
## 'data.frame':    56 obs. of  8 variables:
##  $ abundance        : num  5.3 2 1.5 17.1 13.8 14.1 3.8 2.2 3.3 3 ...
##  $ patch_area       : num  0.1 0.5 0.5 1 1 1 1 1 1 1 ...
##  $ year_of_isolation: int  1968 1920 1900 1966 1918 1965 1955 1920 1965 1900 ...
##  $ dist_nearest     : int  39 234 104 66 246 234 467 284 156 311 ...
##  $ dist_larger      : int  39 234 311 66 246 285 467 1829 156 571 ...
##  $ grazing_intensity: chr  "less than average" "heavy" "heavy" "average" ...
##  $ altitude         : int  160 60 140 160 140 130 90 60 130 130 ...
##  $ yrs_isolation    : int  15 63 83 17 65 18 28 63 18 83 ...

Utilizing functions

birds_chosen <- birds |>
  select(abundance, patch_area, yrs_isolation)

Finding the mean and summary

mean(birds$abundance)
## [1] 19.51429
birds |>
  group_by(yrs_isolation) |>
  summarize(abundance = mean(abundance))
## # A tibble: 25 × 2
##    yrs_isolation abundance
##            <int>     <dbl>
##  1             7      34.4
##  2             9      29  
##  3            10      22.8
##  4            11      27.1
##  5            13      29.5
##  6            15      11.0
##  7            16      27.4
##  8            17      24.5
##  9            18      15.1
## 10            19      28.9
## # ℹ 15 more rows

#Creating a new variable using mutate

birds_df <- birds |>
  mutate(size_available = ifelse(patch_area > median(patch_area), "Big", "Small"))

#Scatterplot (Visualization)

library(ggplot2)

ggplot(birds, aes(x = yrs_isolation, y = abundance)) +
  geom_point(aes(size = patch_area), alpha = 1/3) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

This scatterplot shows the relationship between bird abundance and years of isolation with patch area also included. The scatterplot shows how bird abundance decreased over the years. It also shows how the larger patch size have a higher bird abundance when compared to the smaller patch sizes. This further shows how patch size and isolation have a role in bird population outcome.

#Conclusion and future directions In conclusion this implies how the patch size and years of isolations affect the bird population (abundance) in the Australian forest. This demonstrates how the importance of having better habitats to preserve natural habitat. Also, how habitat isolation directly impacts animal population. More future research could have more variables available and maximizing the data to better understand how this affects bird population.

#References This data comes from OpenIntro Forest birds dataset.

Previous assignments and past chapters: Scatterplat formats: https://r4ds.had.co.nz/transform.html and Descriptive Statistics.Rmd Dataset cleaning under “cleaning data headings and variable names”: Loading Datasets.Rmd Future directions: help with https://www.sciencedirect.com/science/article/abs/pii/S0006320700000173