suppressPackageStartupMessages({
library(tidyverse) })Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'lubridate' was built under R version 4.4.2
suppressPackageStartupMessages({
library(tidyverse) })Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'lubridate' was built under R version 4.4.2
#Loading Libraries
library(dplyr) # For data manipulation
library(ggplot2) # For visualization
library(tidyverse)Analyze the data set….
#Loading Dataset
cr_data <- read_csv("All CR Occurence.csv")Rows: 58187 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
en_data <- read_csv("All EN Occurance Data.csv")Rows: 62497 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vu_data <- read_csv("All VU Occurence Data.csv")Rows: 39125 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(cr_data) # Check structurespc_tbl_ [58,187 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Common Name: chr [1:58187] "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" ...
$ Latitude : num [1:58187] 6.35 6.35 6.49 6.5 6.35 ...
$ Longitude : num [1:58187] 81.5 81.5 80.9 81.5 81.5 ...
- attr(*, "spec")=
.. cols(
.. `Common Name` = col_character(),
.. Latitude = col_double(),
.. Longitude = col_double()
.. )
- attr(*, "problems")=<externalptr>
head(cr_data) # View first few rows# A tibble: 6 × 3
`Common Name` Latitude Longitude
<chr> <dbl> <dbl>
1 Black-necked Stork 6.35 81.5
2 Black-necked Stork 6.35 81.5
3 Black-necked Stork 6.49 80.9
4 Black-necked Stork 6.50 81.5
5 Black-necked Stork 6.35 81.5
6 Black-necked Stork 6.35 81.5
summary(cr_data) # Get summary statistics Common Name Latitude Longitude
Length:58187 Min. :5.887 Min. :79.52
Class :character 1st Qu.:6.353 1st Qu.:79.88
Mode :character Median :6.889 Median :80.03
Mean :7.038 Mean :80.35
3rd Qu.:7.179 3rd Qu.:80.81
Max. :9.831 Max. :81.86
cr_data# A tibble: 58,187 × 3
`Common Name` Latitude Longitude
<chr> <dbl> <dbl>
1 Black-necked Stork 6.35 81.5
2 Black-necked Stork 6.35 81.5
3 Black-necked Stork 6.49 80.9
4 Black-necked Stork 6.50 81.5
5 Black-necked Stork 6.35 81.5
6 Black-necked Stork 6.35 81.5
7 Black-necked Stork 6.39 81.5
8 Black-necked Stork 6.39 81.5
9 Black-necked Stork 6.58 81.7
10 Black-necked Stork 6.29 81.5
# ℹ 58,177 more rows
#combine multiple data frames
bird_occurrence <- bind_rows(
cr_data %>% mutate(Threat_Category = "CR"),
en_data %>% mutate(Threat_Category = "EN"),
vu_data %>% mutate(Threat_Category = "VU")
)#Checking
head(bird_occurrence)# A tibble: 6 × 4
`Common Name` Latitude Longitude Threat_Category
<chr> <dbl> <dbl> <chr>
1 Black-necked Stork 6.35 81.5 CR
2 Black-necked Stork 6.35 81.5 CR
3 Black-necked Stork 6.49 80.9 CR
4 Black-necked Stork 6.50 81.5 CR
5 Black-necked Stork 6.35 81.5 CR
6 Black-necked Stork 6.35 81.5 CR
str(bird_occurrence)tibble [159,809 × 4] (S3: tbl_df/tbl/data.frame)
$ Common Name : chr [1:159809] "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" ...
$ Latitude : num [1:159809] 6.35 6.35 6.49 6.5 6.35 ...
$ Longitude : num [1:159809] 81.5 81.5 80.9 81.5 81.5 ...
$ Threat_Category: chr [1:159809] "CR" "CR" "CR" "CR" ...
#Summary count of the Bird Occurrence
bird_occurrence %>%
group_by(Threat_Category) %>%
summarise(Count = n())# A tibble: 3 × 2
Threat_Category Count
<chr> <int>
1 CR 58187
2 EN 62497
3 VU 39125
#Visualization of the summary counts
# Create a bar plot
ggplot(bird_occurrence, aes(x = Threat_Category, fill = Threat_Category)) +
geom_bar() +
labs(title = "Number of Bird Occurrences by Threat Category",
x = "Threat Category",
y = "Count") +
theme_minimal() +
scale_fill_manual(values = c("CR" = "red", "EN" = "orange", "VU" = "yellow"))#Species counts per Category
bird_occurrence %>%
group_by(Threat_Category) %>%
summarise(Unique_Species = n_distinct(`Common Name`))# A tibble: 3 × 2
Threat_Category Unique_Species
<chr> <int>
1 CR 19
2 EN 44
3 VU 15
#Top 10 Species
bird_occurrence %>%
count(`Common Name`, sort = TRUE) %>%
head(10)# A tibble: 10 × 2
`Common Name` n
<chr> <int>
1 Rock Pigeon 23166
2 Blue-tailed Bee-eater 21397
3 Indian Swiftlet 9203
4 Gull-billed Tern 5222
5 Sri Lanka White-eye 4809
6 Oriental Honey-buzzard 4519
7 Sri Lanka Orange-billed Babbler 4209
8 Little Ringed Plover 3657
9 Sri Lanka Crested Drongo 3544
10 Yellow Bittern 3522
#General Map
ggplot(bird_occurrence, aes(x = Longitude, y = Latitude, color = Threat_Category)) +
geom_point(alpha = 0.6) +
theme_minimal() +
labs(title = " Endangered Bird Occurrence Distribution in Sri Lanka",
x = "Longitude", y = "Latitude")Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).
library(patchwork) # For arranging multiple plots togetherWarning: package 'patchwork' was built under R version 4.4.2
# Function to generate bar plots for each category
plot_species_occurrence <- function(data, category, color) {
data %>%
filter(Threat_Category == category) %>%
group_by(`Common Name`) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ggplot(aes(x = reorder(`Common Name`, -Count), y = Count)) +
geom_bar(stat = "identity", fill = color) +
labs(title = paste("Occurrences of", category, "Birds"),
x = "Species",
y = "Occurrence Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotate x labels
}# Generate plots for CR, EN, and VU
plot_CR <- plot_species_occurrence(bird_occurrence, "CR", "red")
plot_EN <- plot_species_occurrence(bird_occurrence, "EN", "orange")
plot_VU <- plot_species_occurrence(bird_occurrence, "VU", "yellow")# Display plots side by side
plot_CRplot_ENplot_VU# Read the CSV file
en_birds <- read_csv("EN_Inisde_Outside.csv")Rows: 62497 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(en_birds)# A tibble: 6 × 26
`Common Name` Latitude Longitude fid NatName IntName SitLat SitLong GISArea
<chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 Yellow Bittern 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
2 Cinnamon Bitt… 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
3 Long-tailed S… 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
4 Lesser Adjuta… 8.36 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
5 Cinnamon Bitt… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
6 Eurasian Kest… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
# WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
# DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
# GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
# Inside_Outside_PA <chr>
# Check the structure of the dataset
str(en_birds)spc_tbl_ [62,497 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Common Name : chr [1:62497] "Yellow Bittern" "Cinnamon Bittern" "Long-tailed Shrike" "Lesser Adjutant" ...
$ Latitude : num [1:62497] 8.88 8.88 8.88 8.36 8.35 ...
$ Longitude : num [1:62497] 80 80 80 80.4 80.4 ...
$ fid : num [1:62497] 1 1 1 2 2 2 2 2 2 2 ...
$ NatName : chr [1:62497] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
$ IntName : chr [1:62497] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
$ SitLat : num [1:62497] 8.86 8.86 8.86 8.35 8.35 ...
$ SitLong : num [1:62497] 80.1 80.1 80.1 80.4 80.4 ...
$ GISArea : num [1:62497] 4024 4024 4024 3541 3541 ...
$ PA_TYPE : chr [1:62497] "IBA" "IBA" "IBA" "IBA" ...
$ Source : chr [1:62497] NA NA NA NA ...
$ WDPAID : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
$ WDPA_PID : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
$ PA_DEF : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
$ NAME : chr [1:62497] NA NA NA NA ...
$ ORIG_NAME : chr [1:62497] NA NA NA NA ...
$ DESIG : chr [1:62497] NA NA NA NA ...
$ DESIG_ENG : chr [1:62497] NA NA NA NA ...
$ DESIG_TYPE : chr [1:62497] NA NA NA NA ...
$ IUCN_CAT : chr [1:62497] "IBA" "IBA" "IBA" "IBA" ...
$ INT_CRIT : chr [1:62497] NA NA NA NA ...
$ GIS_M_AREA : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
$ GIS_AREA : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
$ layer : chr [1:62497] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
$ path : chr [1:62497] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
$ Inside_Outside_PA: chr [1:62497] "Inside" "Inside" "Inside" "Inside" ...
- attr(*, "spec")=
.. cols(
.. `Common Name` = col_character(),
.. Latitude = col_double(),
.. Longitude = col_double(),
.. fid = col_double(),
.. NatName = col_character(),
.. IntName = col_character(),
.. SitLat = col_double(),
.. SitLong = col_double(),
.. GISArea = col_double(),
.. PA_TYPE = col_character(),
.. Source = col_character(),
.. WDPAID = col_double(),
.. WDPA_PID = col_double(),
.. PA_DEF = col_double(),
.. NAME = col_character(),
.. ORIG_NAME = col_character(),
.. DESIG = col_character(),
.. DESIG_ENG = col_character(),
.. DESIG_TYPE = col_character(),
.. IUCN_CAT = col_character(),
.. INT_CRIT = col_character(),
.. GIS_M_AREA = col_double(),
.. GIS_AREA = col_double(),
.. layer = col_character(),
.. path = col_character(),
.. Inside_Outside_PA = col_character()
.. )
- attr(*, "problems")=<externalptr>
# View summary statistics
summary(en_birds) Common Name Latitude Longitude fid
Length:62497 Min. :5.887 Min. :79.61 Min. : 1.0
Class :character 1st Qu.:6.409 1st Qu.:80.42 1st Qu.: 28.0
Mode :character Median :6.451 Median :80.45 Median : 59.0
Mean :6.779 Mean :80.57 Mean :179.1
3rd Qu.:6.967 3rd Qu.:80.81 3rd Qu.:498.0
Max. :9.831 Max. :84.75 Max. :922.0
NA's :25359
NatName IntName SitLat SitLong
Length:62497 Length:62497 Min. :6.07 Min. :79.82
Class :character Class :character 1st Qu.:6.40 1st Qu.:80.48
Mode :character Mode :character Median :6.40 Median :80.48
Mean :6.61 Mean :80.67
3rd Qu.:6.83 3rd Qu.:80.82
Max. :9.68 Max. :81.59
NA's :35557 NA's :35557
GISArea PA_TYPE Source WDPAID
Min. : 108.7 Length:62497 Length:62497 Min. : 899
1st Qu.: 8037.1 Class :character Class :character 1st Qu.: 902
Median :12007.2 Mode :character Mode :character Median : 2462
Mean :10671.3 Mean : 45227442
3rd Qu.:12007.2 3rd Qu.: 23126
Max. :57007.7 Max. :555592541
NA's :35557 NA's :52299
WDPA_PID PA_DEF NAME ORIG_NAME
Min. : 899 Min. :1 Length:62497 Length:62497
1st Qu.: 902 1st Qu.:1 Class :character Class :character
Median : 2462 Median :1 Mode :character Mode :character
Mean : 45227442 Mean :1
3rd Qu.: 23126 3rd Qu.:1
Max. :555592541 Max. :1
NA's :52299 NA's :52299
DESIG DESIG_ENG DESIG_TYPE IUCN_CAT
Length:62497 Length:62497 Length:62497 Length:62497
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
INT_CRIT GIS_M_AREA GIS_AREA layer
Length:62497 Min. : 0.00 Min. : 0.32 Length:62497
Class :character 1st Qu.: 0.00 1st Qu.: 43.20 Class :character
Mode :character Median : 0.00 Median : 95.81 Mode :character
Mean : 0.73 Mean : 345.47
3rd Qu.: 1.79 3rd Qu.: 979.49
Max. :25.01 Max. :1313.49
NA's :52299 NA's :52299
path Inside_Outside_PA
Length:62497 Length:62497
Class :character Class :character
Mode :character Mode :character
# Count occurrences inside and outside protected areas
en_counts <- en_birds %>%
group_by(Inside_Outside_PA) %>%
summarise(count = n())
print(en_counts)# A tibble: 2 × 2
Inside_Outside_PA count
<chr> <int>
1 Inside 37138
2 Outside 25359
ggplot(en_counts, aes(x = Inside_Outside_PA, y = count, fill = Inside_Outside_PA)) +
geom_bar(stat = "identity") +
labs(title = "Endangered Bird Occurrences Inside vs. Outside Protected Areas",
x = "Location",
y = "Number of Bird Occurrences") +
theme_minimal()# Count occurrences per species inside vs. outside
cr_species_counts <- en_birds %>%
group_by(`Common Name`,Inside_Outside_PA ) %>%
summarise(count = n()) %>%
arrange(desc(count))`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(cr_species_counts)# A tibble: 87 × 3
# Groups: Common Name [44]
`Common Name` Inside_Outside_PA count
<chr> <chr> <int>
1 Great Crested Tern Outside 2523
2 Sri Lanka Orange-billed Babbler Inside 2438
3 Sri Lanka Crested Drongo Inside 2414
4 Sri Lanka Blue-Magpie Inside 2139
5 Little Ringed Plover Outside 1913
6 Yellow-eared Bulbul Inside 1909
7 Yellow Bittern Outside 1903
8 Kentish Plover Inside 1899
9 Sri Lanka Orange-billed Babbler Outside 1771
10 Little Ringed Plover Inside 1744
# ℹ 77 more rows
#Visualization
ggplot(cr_species_counts, aes(x =`Common Name` , y = count, fill = Inside_Outside_PA)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Endangered Bird Species Distribution Inside VS Outside",
x = "Species",
y = "Number of Observations") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))# Read the CSV file
vu_birds <- read_csv("VU_Birds_Iniside_Outside.csv")Rows: 39125 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(vu_birds)# A tibble: 6 × 26
`Common Name` Latitude Longitude fid NatName IntName SitLat SitLong GISArea
<chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 Cotton Pygmy-… 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
2 Sri Lanka Spu… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
3 Cotton Pygmy-… 8.36 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
4 Cotton Pygmy-… 8.34 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
5 Cotton Pygmy-… 8.36 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
6 Cotton Pygmy-… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
# WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
# DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
# GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
# inside_outside <chr>
# Check the structure of the dataset
str(vu_birds)spc_tbl_ [39,125 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Common Name : chr [1:39125] "Cotton Pygmy-Goose" "Sri Lanka Spurfowl" "Cotton Pygmy-Goose" "Cotton Pygmy-Goose" ...
$ Latitude : num [1:39125] 8.88 8.35 8.36 8.34 8.36 ...
$ Longitude : num [1:39125] 80 80.4 80.4 80.4 80.4 ...
$ fid : num [1:39125] 1 2 2 2 2 2 2 2 2 2 ...
$ NatName : chr [1:39125] "Giants Tank" "Anuradhapura" "Anuradhapura" "Anuradhapura" ...
$ IntName : chr [1:39125] "Giants Tank" "Anuradhapura" "Anuradhapura" "Anuradhapura" ...
$ SitLat : num [1:39125] 8.86 8.35 8.35 8.35 8.35 ...
$ SitLong : num [1:39125] 80.1 80.4 80.4 80.4 80.4 ...
$ GISArea : num [1:39125] 4024 3541 3541 3541 3541 ...
$ PA_TYPE : chr [1:39125] "IBA" "IBA" "IBA" "IBA" ...
$ Source : chr [1:39125] NA NA NA NA ...
$ WDPAID : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
$ WDPA_PID : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
$ PA_DEF : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
$ NAME : chr [1:39125] NA NA NA NA ...
$ ORIG_NAME : chr [1:39125] NA NA NA NA ...
$ DESIG : chr [1:39125] NA NA NA NA ...
$ DESIG_ENG : chr [1:39125] NA NA NA NA ...
$ DESIG_TYPE : chr [1:39125] NA NA NA NA ...
$ IUCN_CAT : chr [1:39125] "IBA" "IBA" "IBA" "IBA" ...
$ INT_CRIT : chr [1:39125] NA NA NA NA ...
$ GIS_M_AREA : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
$ GIS_AREA : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
$ layer : chr [1:39125] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
$ path : chr [1:39125] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
$ inside_outside: chr [1:39125] "Inside" "Inside" "Inside" "Inside" ...
- attr(*, "spec")=
.. cols(
.. `Common Name` = col_character(),
.. Latitude = col_double(),
.. Longitude = col_double(),
.. fid = col_double(),
.. NatName = col_character(),
.. IntName = col_character(),
.. SitLat = col_double(),
.. SitLong = col_double(),
.. GISArea = col_double(),
.. PA_TYPE = col_character(),
.. Source = col_character(),
.. WDPAID = col_double(),
.. WDPA_PID = col_double(),
.. PA_DEF = col_double(),
.. NAME = col_character(),
.. ORIG_NAME = col_character(),
.. DESIG = col_character(),
.. DESIG_ENG = col_character(),
.. DESIG_TYPE = col_character(),
.. IUCN_CAT = col_character(),
.. INT_CRIT = col_character(),
.. GIS_M_AREA = col_double(),
.. GIS_AREA = col_double(),
.. layer = col_character(),
.. path = col_character(),
.. inside_outside = col_character()
.. )
- attr(*, "problems")=<externalptr>
# View summary statistics
summary(vu_birds) Common Name Latitude Longitude fid
Length:39125 Min. :5.887 Min. :79.59 Min. : 1.0
Class :character 1st Qu.:6.428 1st Qu.:80.41 1st Qu.: 28.0
Mode :character Median :6.802 Median :80.52 Median : 59.0
Mean :6.849 Mean :80.57 Mean :208.5
3rd Qu.:6.991 3rd Qu.:80.81 3rd Qu.:498.0
Max. :9.823 Max. :81.86 Max. :922.0
NA's :3 NA's :3 NA's :20014
NatName IntName SitLat SitLong
Length:39125 Length:39125 Min. :6.070 Min. :79.82
Class :character Class :character 1st Qu.:6.397 1st Qu.:80.48
Mode :character Mode :character Median :6.490 Median :80.64
Mean :6.705 Mean :80.66
3rd Qu.:6.826 3rd Qu.:80.82
Max. :9.678 Max. :81.59
NA's :26293 NA's :26293
GISArea PA_TYPE Source WDPAID
Min. : 108.7 Length:39125 Length:39125 Min. : 899
1st Qu.: 8037.1 Class :character Class :character 1st Qu.: 902
Median : 8037.1 Mode :character Mode :character Median : 16383
Mean :10998.1 Mean : 63099043
3rd Qu.:12007.2 3rd Qu.: 27809
Max. :57007.7 Max. :555592541
NA's :26293 NA's :32846
WDPA_PID PA_DEF NAME ORIG_NAME
Min. : 899 Min. :1 Length:39125 Length:39125
1st Qu.: 902 1st Qu.:1 Class :character Class :character
Median : 16383 Median :1 Mode :character Mode :character
Mean : 63099043 Mean :1
3rd Qu.: 27809 3rd Qu.:1
Max. :555592541 Max. :1
NA's :32846 NA's :32846
DESIG DESIG_ENG DESIG_TYPE IUCN_CAT
Length:39125 Length:39125 Length:39125 Length:39125
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
INT_CRIT GIS_M_AREA GIS_AREA layer
Length:39125 Min. :0.00 Min. : 0.31 Length:39125
Class :character 1st Qu.:0.00 1st Qu.: 26.91 Class :character
Mode :character Median :0.00 Median : 95.81 Mode :character
Mean :0.47 Mean : 278.39
3rd Qu.:0.00 3rd Qu.: 343.15
Max. :6.53 Max. :1313.49
NA's :32846 NA's :32846
path inside_outside
Length:39125 Length:39125
Class :character Class :character
Mode :character Mode :character
# Count occurrences inside and outside protected areas
vu_counts <- vu_birds %>%
group_by(inside_outside) %>%
summarise(count = n())
print(vu_counts)# A tibble: 2 × 2
inside_outside count
<chr> <int>
1 Inside 19111
2 Outside 20014
ggplot(vu_counts, aes(x = inside_outside, y = count, fill = inside_outside)) +
geom_bar(stat = "identity") +
labs(title = "Vulnerable Bird Occurrences Inside vs. Outside Protected Areas",
x = "Location",
y = "Number of Bird Occurrences") +
theme_minimal()#Species Specific Analyze
# Count occurrences per species inside vs. outside
vu_species_counts <- vu_birds %>%
group_by(`Common Name`,inside_outside ) %>%
summarise(count = n()) %>%
arrange(desc(count))`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(vu_species_counts)# A tibble: 30 × 3
# Groups: Common Name [15]
`Common Name` inside_outside count
<chr> <chr> <int>
1 Indian Swiftlet Outside 6343
2 Indian Swiftlet Inside 2860
3 Oriental Honey-buzzard Outside 2837
4 Sri Lanka White-eye Inside 2634
5 Spot-winged Thrush Inside 2237
6 Sri Lanka White-eye Outside 2175
7 Sri Lanka Myna Inside 2080
8 Little Tern Outside 1812
9 Oriental Honey-buzzard Inside 1682
10 Little Tern Inside 1589
# ℹ 20 more rows
#Visulization
ggplot(vu_species_counts, aes(x =`Common Name` , y = count, fill = inside_outside)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Vulnerable Bird Species Distribution Inside VS Outside",
x = "Species",
y = "Number of Observations") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))# Read the CSV file
cr_birds <- read_csv("CR_Inside_Outside_Protected Areas.csv")Rows: 58187 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(cr_birds)# A tibble: 6 × 26
`Common Name` Latitude Longitude fid NatName IntName SitLat SitLong GISArea
<chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 Gull-billed T… 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
2 Caspian Tern 8.86 80.0 1 Giants… Giants… 8.86 80.1 4024.
3 Caspian Tern 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
4 Gull-billed T… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
5 Gull-billed T… 8.33 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
6 Gull-billed T… 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
# WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
# DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
# GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
# Inside_Outside <chr>
# Check the structure of the dataset
str(cr_birds)spc_tbl_ [58,187 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Common Name : chr [1:58187] "Gull-billed Tern" "Caspian Tern" "Caspian Tern" "Gull-billed Tern" ...
$ Latitude : num [1:58187] 8.88 8.86 8.88 8.35 8.33 ...
$ Longitude : num [1:58187] 80 80 80 80.4 80.4 ...
$ fid : num [1:58187] 1 1 1 2 2 2 2 2 2 2 ...
$ NatName : chr [1:58187] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
$ IntName : chr [1:58187] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
$ SitLat : num [1:58187] 8.86 8.86 8.86 8.35 8.35 ...
$ SitLong : num [1:58187] 80.1 80.1 80.1 80.4 80.4 ...
$ GISArea : num [1:58187] 4024 4024 4024 3541 3541 ...
$ PA_TYPE : chr [1:58187] "IBA" "IBA" "IBA" "IBA" ...
$ Source : chr [1:58187] NA NA NA NA ...
$ WDPAID : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
$ WDPA_PID : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
$ PA_DEF : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
$ NAME : chr [1:58187] NA NA NA NA ...
$ ORIG_NAME : chr [1:58187] NA NA NA NA ...
$ DESIG : chr [1:58187] NA NA NA NA ...
$ DESIG_ENG : chr [1:58187] NA NA NA NA ...
$ DESIG_TYPE : chr [1:58187] NA NA NA NA ...
$ IUCN_CAT : chr [1:58187] "IBA" "IBA" "IBA" "IBA" ...
$ INT_CRIT : chr [1:58187] NA NA NA NA ...
$ GIS_M_AREA : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
$ GIS_AREA : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
$ layer : chr [1:58187] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
$ path : chr [1:58187] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
$ Inside_Outside: chr [1:58187] "Inside" "Inside" "Inside" "Inside" ...
- attr(*, "spec")=
.. cols(
.. `Common Name` = col_character(),
.. Latitude = col_double(),
.. Longitude = col_double(),
.. fid = col_double(),
.. NatName = col_character(),
.. IntName = col_character(),
.. SitLat = col_double(),
.. SitLong = col_double(),
.. GISArea = col_double(),
.. PA_TYPE = col_character(),
.. Source = col_character(),
.. WDPAID = col_double(),
.. WDPA_PID = col_double(),
.. PA_DEF = col_double(),
.. NAME = col_character(),
.. ORIG_NAME = col_character(),
.. DESIG = col_character(),
.. DESIG_ENG = col_character(),
.. DESIG_TYPE = col_character(),
.. IUCN_CAT = col_character(),
.. INT_CRIT = col_character(),
.. GIS_M_AREA = col_double(),
.. GIS_AREA = col_double(),
.. layer = col_character(),
.. path = col_character(),
.. Inside_Outside = col_character()
.. )
- attr(*, "problems")=<externalptr>
# View summary statistics
summary(cr_birds) Common Name Latitude Longitude fid
Length:58187 Min. :5.887 Min. :79.52 Min. : 1.0
Class :character 1st Qu.:6.353 1st Qu.:79.88 1st Qu.: 18.0
Mode :character Median :6.889 Median :80.03 Median : 59.0
Mean :7.038 Mean :80.35 Mean :254.9
3rd Qu.:7.179 3rd Qu.:80.81 3rd Qu.:501.0
Max. :9.831 Max. :81.86 Max. :861.0
NA's :40733
NatName IntName SitLat SitLong
Length:58187 Length:58187 Min. :6.07 Min. :79.81
Class :character Class :character 1st Qu.:6.18 1st Qu.:80.41
Mode :character Mode :character Median :6.46 Median :80.87
Mean :6.96 Mean :80.74
3rd Qu.:7.71 3rd Qu.:81.22
Max. :9.68 Max. :81.59
NA's :48809 NA's :48809
GISArea PA_TYPE Source WDPAID
Min. : 108.7 Length:58187 Length:58187 Min. : 899
1st Qu.: 5282.9 Class :character Class :character 1st Qu.: 899
Median : 5664.3 Mode :character Mode :character Median : 2462
Mean :10760.9 Mean : 42867571
3rd Qu.:12007.2 3rd Qu.: 23126
Max. :99703.9 Max. :555592541
NA's :48809 NA's :50111
WDPA_PID PA_DEF NAME ORIG_NAME
Min. : 899 Min. :1 Length:58187 Length:58187
1st Qu.: 899 1st Qu.:1 Class :character Class :character
Median : 2462 Median :1 Mode :character Mode :character
Mean : 42867571 Mean :1
3rd Qu.: 23126 3rd Qu.:1
Max. :555592541 Max. :1
NA's :50111 NA's :50111
DESIG DESIG_ENG DESIG_TYPE IUCN_CAT
Length:58187 Length:58187 Length:58187 Length:58187
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
INT_CRIT GIS_M_AREA GIS_AREA layer
Length:58187 Min. : 0.00 Min. : 0.31 Length:58187
Class :character 1st Qu.: 0.00 1st Qu.: 8.00 Class :character
Mode :character Median : 0.00 Median : 95.81 Mode :character
Mean : 0.98 Mean : 423.98
3rd Qu.: 1.79 3rd Qu.: 979.49
Max. :25.01 Max. :1313.49
NA's :50111 NA's :50111
path Inside_Outside
Length:58187 Length:58187
Class :character Class :character
Mode :character Mode :character
# Count occurrences inside and outside protected areas
cr_counts <- cr_birds %>%
group_by(Inside_Outside) %>%
summarise(count = n())
print(cr_counts)# A tibble: 2 × 2
Inside_Outside count
<chr> <int>
1 Inside 17454
2 Outside 40733
#Visulization
ggplot(cr_counts, aes(x = Inside_Outside, y = count, fill = Inside_Outside)) +
geom_bar(stat = "identity") +
labs(title = "Critically Endangered Bird Occurrences Inside vs. Outside Protected Areas",
x = "Location",
y = "Number of Bird Occurrences") +
theme_minimal()#Species Specific Analyze
# Count occurrences per species inside vs. outside
cr_species_counts <- cr_birds %>%
group_by(`Common Name`,Inside_Outside ) %>%
summarise(count = n()) %>%
arrange(desc(count))`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(cr_species_counts)# A tibble: 37 × 3
# Groups: Common Name [19]
`Common Name` Inside_Outside count
<chr> <chr> <int>
1 Rock Pigeon Outside 19194
2 Blue-tailed Bee-eater Outside 14016
3 Blue-tailed Bee-eater Inside 7381
4 Rock Pigeon Inside 3972
5 Gull-billed Tern Outside 2650
6 Gull-billed Tern Inside 2572
7 Caspian Tern Inside 1700
8 Caspian Tern Outside 1483
9 Common Tern Outside 1299
10 Bridled Tern Outside 1086
# ℹ 27 more rows
#Visulization
ggplot(cr_species_counts, aes(x =`Common Name` , y = count, fill =Inside_Outside)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Critically Endangered Bird Species Distribution Inside VS Outside",
x = "Species",
y = "Number of Observations") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))# Load necessary libraries
library(dplyr)
library(readr)
# Load each dataset
cr_data <- read_csv("CR_Inside_Outside_Protected Areas.csv") %>% mutate(Threat_Category = "CR")Rows: 58187 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
en_data <- read_csv("EN_Inisde_Outside.csv") %>% mutate(Threat_Category = "EN")Rows: 62497 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vu_data <- read_csv("VU_Birds_Iniside_Outside.csv") %>% mutate(Threat_Category = "VU")Rows: 39125 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all into one dataset
endangered_birds <- bind_rows(cr_data, en_data, vu_data)
# Save the combined dataset
write_csv(endangered_birds, "endangered_birds_sri_lanka.csv")
# View summary
print(endangered_birds)# A tibble: 159,809 × 29
`Common Name` Latitude Longitude fid NatName IntName SitLat SitLong GISArea
<chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 Gull-billed … 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
2 Caspian Tern 8.86 80.0 1 Giants… Giants… 8.86 80.1 4024.
3 Caspian Tern 8.88 80.0 1 Giants… Giants… 8.86 80.1 4024.
4 Gull-billed … 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
5 Gull-billed … 8.33 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
6 Gull-billed … 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
7 Gull-billed … 8.35 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
8 Gull-billed … 8.34 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
9 Rock Pigeon 8.34 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
10 Rock Pigeon 8.34 80.4 2 Anurad… Anurad… 8.35 80.4 3541.
# ℹ 159,799 more rows
# ℹ 20 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
# WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
# DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
# GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
# Inside_Outside <chr>, Threat_Category <chr>, Inside_Outside_PA <chr>,
# inside_outside <chr>
all_endangered_birds <- read_csv("All Endangered Birds Occurence Sri Lanka.csv")Rows: 159809 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Species_Name, IUCN_CAT, inside_outside
dbl (2): Latitude, Longitude
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#All Endangered Birds percentage Inside vs Outside
all_endangered_birds %>%
count(inside_outside) %>%
mutate(percentage = n / sum(n) * 100)# A tibble: 2 × 3
inside_outside n percentage
<chr> <int> <dbl>
1 Inside 73703 46.1
2 Outside 86106 53.9
En_bird_count <- all_endangered_birds %>%
group_by(inside_outside) %>%
summarise(count = n()) %>%
mutate(percentage = count / sum(count) * 100)
print(En_bird_count)# A tibble: 2 × 3
inside_outside count percentage
<chr> <int> <dbl>
1 Inside 73703 46.1
2 Outside 86106 53.9
ggplot(En_bird_count, aes(x = inside_outside, y = percentage, fill = inside_outside)) +
geom_col() +
geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = -0.5) +
theme_minimal() +
labs(title = "Bird Occurrences Inside vs. Outside Protected Areas",
x = "Location",
y = "Percentage") +
scale_fill_manual(values = c("Inside" = "#1f78b4", "Outside" = "#e31a1c"))