R Markdown
# Install packages if not already installed
required_packages <- c("tidyverse", "sf", "ggplot2", "rnaturalearth", "rnaturalearthdata", "knitr", "rlang", "gt", "webshot2", "rsconnect", "devtools", "raster", "geodata")
for (pkg in required_packages) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg)
library(pkg, character.only = TRUE)
}
}
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: sf
##
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
##
## Loading required package: rnaturalearth
##
## Loading required package: rnaturalearthdata
##
##
## Attaching package: 'rnaturalearthdata'
##
##
## The following object is masked from 'package:rnaturalearth':
##
## countries110
##
##
## Loading required package: knitr
##
## Loading required package: rlang
##
##
## Attaching package: 'rlang'
##
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
##
##
## Loading required package: gt
##
## Loading required package: webshot2
##
## Loading required package: rsconnect
##
## Loading required package: devtools
##
## Loading required package: usethis
##
##
## Attaching package: 'devtools'
##
##
## The following object is masked from 'package:rsconnect':
##
## lint
##
##
## Loading required package: raster
##
## Loading required package: sp
##
##
## Attaching package: 'raster'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Loading required package: geodata
##
## Loading required package: terra
##
## terra 1.8.5
##
##
## Attaching package: 'terra'
##
##
## The following object is masked from 'package:knitr':
##
## spin
##
##
## The following object is masked from 'package:tidyr':
##
## extract
# Load required libraries
# This shoudl work but didnt so did local installation devtools::install_github("ropensci/rnaturalearthhires", force = TRUE)
devtools::install_local("rnaturalearthhires-master/")
## Skipping install of 'rnaturalearthhires' from a local remote, the SHA1 (1.0.0.90) has not changed since last install.
## Use `force = TRUE` to force installation
library(tidyverse) # Data manipulation and visualization
library(sf) # Handling spatial data
library(ggplot2) # Visualization
library(rnaturalearth) # To get world map data
library(rnaturalearthdata)
library(knitr) # For dynamic reporting
library(rlang) # Utilities for programming
library(gt) # For creating tables
library(webshot2) # For saving tables as images
library(rsconnect) # For RPubs md
library(devtools)
library(rnaturalearthhires)
library(raster)
library(geodata)
## 1. Load the dataset
data <- read.csv("MyEBirdData 2.csv", stringsAsFactors = FALSE)
glimpse(data)
## Rows: 48,395
## Columns: 23
## $ Submission.ID <chr> "S33291577", "S33291584", "S33291523", "S944804…
## $ Common.Name <chr> "Black-bellied Whistling-Duck", "Black-bellied …
## $ Scientific.Name <chr> "Dendrocygna autumnalis", "Dendrocygna autumnal…
## $ Taxonomic.Order <int> 233, 233, 239, 248, 248, 248, 248, 248, 248, 24…
## $ Count <chr> "20", "10", "6", "1", "1", "10", "2", "2", "2",…
## $ State.Province <chr> "NI-RI", "NI-RI", "NI-RI", "IN-MP", "IN-MP", "I…
## $ County <chr> "", "", "", "Anuppur", "Anuppur", "Bengaluru Ru…
## $ Location.ID <chr> "L1004596", "L1004596", "L3670273", "L16346038"…
## $ Location <chr> "Isla Ometepe--Charco Verde ", "Isla Ometepe--C…
## $ Latitude <dbl> 11.48486, 11.48486, 11.51136, 23.11376, 23.0426…
## $ Longitude <dbl> -85.62478, -85.62478, -85.71801, 81.72472, 81.4…
## $ Date <chr> "2016-12-30", "2016-12-30", "2016-12-29", "2021…
## $ Time <chr> "08:58 AM", "09:13 AM", "04:37 PM", "07:32 AM",…
## $ Protocol <chr> "eBird - Stationary Count", "eBird - Traveling …
## $ Duration..Min. <int> 20, 97, 54, 22, 9, 150, 92, 14, 90, 46, 0, 21, …
## $ All.Obs.Reported <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,…
## $ Distance.Traveled..km. <dbl> NA, 2.000, 0.300, 2.128, 3.836, 0.500, 1.500, N…
## $ Area.Covered..ha. <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Number.of.Observers <int> 2, 2, 2, 1, 1, 3, 2, 1, 5, 5, 12, 10, 4, 2, 5, …
## $ Breeding.Code <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ Observation.Details <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
## $ Checklist.Comments <chr> "Charco verde lagoon", "From charco verde lagoo…
## $ ML.Catalog.Numbers <chr> "127451251", "", "", "", "", "", "", "", "", ""…
## 2. Clean the data
# Remove missing values for Latitude, Longitude, and Common Name
data_clean <- data %>%
filter(!is.na(Latitude), !is.na(Longitude), !is.na(Common.Name))
missing_values <- data %>%
summarise(across(everything(), ~ sum(is.na(.)), .names = "missing_{col}")) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "Missing_Count")
print("Missing values by variable:")
## [1] "Missing values by variable:"
print(missing_values)
## # A tibble: 23 × 2
## Variable Missing_Count
## <chr> <int>
## 1 missing_Submission.ID 0
## 2 missing_Common.Name 0
## 3 missing_Scientific.Name 0
## 4 missing_Taxonomic.Order 0
## 5 missing_Count 0
## 6 missing_State.Province 0
## 7 missing_County 0
## 8 missing_Location.ID 0
## 9 missing_Location 0
## 10 missing_Latitude 0
## # ℹ 13 more rows
# Summarize numeric variables
numeric_summary <- data %>%
dplyr::select(where(is.numeric)) %>%
summarise(across(everything(), list(mean = mean, median = median, sd = sd, min = min, max = max), na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
print("Summary of numeric variables:")
## [1] "Summary of numeric variables:"
print(numeric_summary)
## Taxonomic.Order_mean Taxonomic.Order_median Taxonomic.Order_sd
## 1 17277.36 21210 9951.142
## Taxonomic.Order_min Taxonomic.Order_max Latitude_mean Latitude_median
## 1 233 35564 13.59148 12.31475
## Latitude_sd Latitude_min Latitude_max Longitude_mean Longitude_median
## 1 5.779635 -33.95959 56.34958 73.01723 77.14248
## Longitude_sd Longitude_min Longitude_max Duration..Min._mean
## 1 26.27715 -123.9558 140.0584 27.85706
## Duration..Min._median Duration..Min._sd Duration..Min._min Duration..Min._max
## 1 17 42.00343 0 1220
## All.Obs.Reported_mean All.Obs.Reported_median All.Obs.Reported_sd
## 1 0.9147433 1 0.2792663
## All.Obs.Reported_min All.Obs.Reported_max Distance.Traveled..km._mean
## 1 0 1 2.024088
## Distance.Traveled..km._median Distance.Traveled..km._sd
## 1 0.6 4.165263
## Distance.Traveled..km._min Distance.Traveled..km._max Area.Covered..ha._mean
## 1 0.08 70 4.0469
## Area.Covered..ha._median Area.Covered..ha._sd Area.Covered..ha._min
## 1 4.0469 0 4.0469
## Area.Covered..ha._max Number.of.Observers_mean Number.of.Observers_median
## 1 4.0469 2.524811 2
## Number.of.Observers_sd Number.of.Observers_min Number.of.Observers_max
## 1 2.178464 1 34
## 3. Group data by location (Latitude and Longitude)
# Calculate the number of unique species at each location
species_counts <- data_clean %>%
group_by(Latitude, Longitude, Location) %>%
summarize(Species_Count = n_distinct(Common.Name), .groups = "drop")
# Convert to spatial data
species_sf <- st_as_sf(species_counts, coords = c("Longitude", "Latitude"), crs = 4326)
## 4. Get world map data
world_map <- ne_countries(scale = "medium", returnclass = "sf")
## 5. Plot the number of unique species observed globally
ggplot() +
geom_sf(data = world_map, fill = "lightgrey", color = "black") +
geom_sf(data = species_sf, aes(size = Species_Count), color = "darkgreen", alpha = 0.6) +
scale_size_continuous(name = "Unique Species Count", range = c(1, 8)) +
labs(title = "PNS eBird",
subtitle = "Based on Uploaded eBird Data",
x = "Longitude", y = "Latitude") +
theme_minimal()

## 6. Get the top 10 most observed species
top_species <- data %>%
count(`Common.Name`, sort = TRUE) %>%
slice_head(n = 10)
print("Top 10 most observed species:")
## [1] "Top 10 most observed species:"
print(top_species)
## Common.Name n
## 1 Red-whiskered Bulbul 1558
## 2 Rock Pigeon (Feral Pigeon) 1408
## 3 White-cheeked Barbet (Small Green Barbet) 1407
## 4 Large-billed Crow 1276
## 5 Black Kite 1254
## 6 Common Tailorbird 1226
## 7 Common Myna 1203
## 8 Ashy Prinia 1086
## 9 Spotted Dove 1073
## 10 Rose-ringed Parakeet 1069
# Display as a table with caption
print("Top 10 Most Observed Species:")
## [1] "Top 10 Most Observed Species:"
top_species %>%
kable(caption = "Top 10 Most Observed Bird Species observed by Prashanth N S on eBird")
Top 10 Most Observed Bird Species observed by Prashanth N S on
eBird
| Red-whiskered Bulbul |
1558 |
| Rock Pigeon (Feral Pigeon) |
1408 |
| White-cheeked Barbet (Small Green Barbet) |
1407 |
| Large-billed Crow |
1276 |
| Black Kite |
1254 |
| Common Tailorbird |
1226 |
| Common Myna |
1203 |
| Ashy Prinia |
1086 |
| Spotted Dove |
1073 |
| Rose-ringed Parakeet |
1069 |
# Create a table and save as an image
species_table <- top_species %>%
gt() %>%
tab_header(
title = "Top 10 Most Observed Bird Species by Prashanth N S",
subtitle = "Based on his eBird Dataset"
) %>%
fmt_number(
columns = "n",
decimals = 0
) %>%
cols_label(
`Common.Name` = "Bird Species",
n = "Observation Count"
)
# Save the table as an image
gtsave(species_table, "top_species_table.png")
## file:////var/folders/0q/ys17wz2548z1x7tlclsfw7kr0000gn/T//RtmpZeUdVa/file18161603ab741.html screenshot completed
## 7. Get the most observed locations
top_locations <- data %>%
count(Location, sort = TRUE) %>%
slice_head(n = 10)
print("Top 10 most observed locations:")
## [1] "Top 10 most observed locations:"
print(top_locations)
## Location
## 1 BR Hills--IPH field station
## 2 Judicial layout--BBMP Park
## 3 Near Zuari/Hulikere
## 4 BR Hills ಬಿ ಆರ್ ಹಿಲ್ಸ್--Biligiri Rangaswamy Temple Tiger Reserve ಬಿಳಿಗಿರಿರಂಗನಾಥಸ್ವಾಮಿ ಹುಲಿ ಅಭಯಾರಣ್ಯ
## 5 Judicial Layout--Adithya Homes
## 6 BR Hills—Shaheen
## 7 Mysore home
## 8 Kumaraswamy Layout Water Tank
## 9 Muntipalya
## 10 BR Hills--Someshwara Kere
## n
## 1 6226
## 2 3546
## 3 3204
## 4 2646
## 5 1501
## 6 1465
## 7 1265
## 8 1137
## 9 805
## 10 745
# Create a table and save as an image
locations_table <- top_locations %>%
gt() %>%
tab_header(
title = "Top 10 Most Observed Locations",
subtitle = "Based on the eBird Dataset"
) %>%
fmt_number(
columns = "n",
decimals = 0
) %>%
cols_label(
Location = "Location",
n = "Observation Count"
)
# Save the table as an image
gtsave(locations_table, "top_locations_table.png")
## file:////var/folders/0q/ys17wz2548z1x7tlclsfw7kr0000gn/T//RtmpZeUdVa/file18161618617cc.html screenshot completed
top_locations %>%
kable(caption = "Top 10 locations from which Bird Species observed by Prashanth N S on eBird")
Top 10 locations from which Bird Species observed by Prashanth
N S on eBird
| BR Hills–IPH field station |
6226 |
| Judicial layout–BBMP Park |
3546 |
| Near Zuari/Hulikere |
3204 |
| BR Hills ಬಿ ಆರ್ ಹಿಲ್ಸ್–Biligiri Rangaswamy Temple Tiger
Reserve ಬಿಳಿಗಿರಿರಂಗನಾಥಸ್ವಾಮಿ ಹುಲಿ ಅಭಯಾರಣ್ಯ |
2646 |
| Judicial Layout–Adithya Homes |
1501 |
| BR Hills—Shaheen |
1465 |
| Mysore home |
1265 |
| Kumaraswamy Layout Water Tank |
1137 |
| Muntipalya |
805 |
| BR Hills–Someshwara Kere |
745 |
## 8. Birding in Karnataka
# Filter data for Karnataka state
data_karnataka <- data %>%
filter(State.Province == "IN-KA") %>%
filter(!is.na(Latitude), !is.na(Longitude))
# Convert to spatial data
data_karnataka_sf <- st_as_sf(data_karnataka, coords = c("Longitude", "Latitude"), crs = 4326)
# Download and load India shapefile from GADM
# india <- gadm("India", level = 1, path = tempdir()) # Level 1: State boundaries
india1 <- st_read("southern-zone-latest-free/gis_osm_landuse_a_free_1.shp") # base map from https://download.geofabrik.de/asia/india.html
## Reading layer `gis_osm_landuse_a_free_1' from data source
## `/Users/prashanthns/Documents/R/Learning/eBird/eBird/southern-zone-latest-free/gis_osm_landuse_a_free_1.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 167393 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 72.18424 ymin: 6.756267 xmax: 94.27662 ymax: 19.91997
## Geodetic CRS: WGS 84
india <- st_read("maps-master/Country/india-osm.geojson") # base map from https://projects.datameet.org/maps/states/#repository
## Reading layer `india-osm' from data source
## `/Users/prashanthns/Documents/R/Learning/eBird/eBird/maps-master/Country/india-osm.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 68.2056 ymin: 6.755997 xmax: 97.39556 ymax: 37.08411
## Geodetic CRS: WGS 84
# Convert to sf object
india_sf <- st_as_sf(india)
india_sf1 <- st_as_sf(india1)
# Plot the map
ggplot() +
geom_sf(data = india_sf, fill = "lightgrey", color = "black") +
geom_sf(data = data_karnataka_sf, aes(color = Location), alpha = 0.7, size = 2) +
labs(
title = "Bird Lists from Karnataka by Prashanth N S",
subtitle = "Locations of eBird Observations in Karnataka (2000-2024)",
x = "Longitude", y = "Latitude"
) +
theme_minimal() +
theme(legend.position = "none")

ggplot() +
geom_sf(data = india_sf1, fill = "lightgrey", color = "black") +
geom_sf(data = data_karnataka_sf, aes(color = Location), alpha = 0.7, size = 2) +
labs(
title = "Bird Lists from Karnataka by Prashanth N S",
subtitle = "Locations of eBird Observations in Karnataka (2000-2024)",
x = "Longitude", y = "Latitude"
) +
theme_minimal() +
theme(legend.position = "none")

# 9. type of lists
# Count the types of lists
list_counts <- data %>%
count(Protocol, sort = TRUE) # Group by 'Protocol' column and count occurrences
# Display the counts in a table
print(list_counts)
## Protocol n
## 1 eBird - Stationary Count 27249
## 2 eBird - Traveling Count 18702
## 3 eBird - Casual Observation 2314
## 4 Historical 130
# Plot the types of lists as a bar chart
ggplot(list_counts, aes(x = reorder(Protocol, n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black") +
coord_flip() + # Flip the coordinates for better readability
labs(
title = "Type of Lists by Number",
x = "List Type (Protocol)",
y = "Number of Lists"
) +
theme_minimal()

# 10.Over time
# Load required libraries
library(lubridate)
# Convert Count to numeric (replacing invalid entries with NA)
data <- data %>%
mutate(
Count = as.numeric(Count), # Convert Count to numeric
Date = as.Date(Date), # Ensure Date is in the correct format
Year = year(Date), # Extract Year
Month = month(Date, label = TRUE) # Extract Month (labeled)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Count = as.numeric(Count)`.
## Caused by warning:
## ! NAs introduced by coercion
# Summarize bird observations over time
# Lists per month
monthly_observations <- data %>%
group_by(Year, Month) %>%
summarise(Observations = n(), .groups = "drop")
# Sum the bird counts (if Count is numeric and cleaned)
monthly_counts <- data %>%
filter(!is.na(Count)) %>% # Exclude rows with invalid Count values
group_by(Year, Month) %>%
summarise(Total_Birds = sum(Count, na.rm = TRUE), .groups = "drop")
# trend of observations over time
ggplot(monthly_observations, aes(x = interaction(Year, Month, sep = "-"), y = Observations)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 2) +
labs(
title = "Number of Bird Observations Over Time",
x = "Time (Year-Month)",
y = "Number of Observations"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Plot the trend of bird counts over time
ggplot(monthly_counts, aes(x = interaction(Year, Month, sep = "-"), y = Total_Birds)) +
geom_line(color = "green", size = 1) +
geom_point(color = "orange", size = 2) +
labs(
title = "Total Birds Observed Over Time",
x = "Time (Year-Month)",
y = "Total Bird Count"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Summarize by Quarter
quarterly_observations <- data %>%
mutate(Quarter = paste0("Q", quarter(Date), "-", Year)) %>%
group_by(Quarter) %>%
summarise(Observations = n(), .groups = "drop")
# Plot the quarterly trend
ggplot(quarterly_observations, aes(x = Quarter, y = Observations)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 2) +
labs(
title = "Number of Bird Observations Over Time (Quarterly)",
x = "Quarter",
y = "Number of Observations"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Summarize by Year instead of Year-Month
data <- data %>%
mutate(
Date = as.Date(Date), # Ensure Date is in the correct format
Year = year(Date) # Extract the Year
) %>%
filter(Year >= 2000 & Year <= 2024) # Filter for years 2000 to 2024
# Summarize by Year
yearly_observations <- data %>%
group_by(Year) %>%
summarise(Observations = n(), .groups = "drop") # Count the number of lists per year
# Plot the yearly trend from 2000-2024
ggplot(yearly_observations, aes(x = Year, y = Observations)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 2) +
labs(
title = "Number of Bird Observations Over Time (2000-2024)",
x = "Year",
y = "Number of Observations"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
