Research Progress

suppressPackageStartupMessages({
  library(tidyverse) })
Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'lubridate' was built under R version 4.4.2
#Loading Libraries
library(dplyr)    # For data manipulation
library(ggplot2)  # For visualization
library(tidyverse)

Analyze the data set….

#Loading Dataset
cr_data <- read_csv("All CR Occurence.csv")
Rows: 58187 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
en_data <- read_csv("All EN Occurance Data.csv")
Rows: 62497 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vu_data <- read_csv("All VU Occurence Data.csv")
Rows: 39125 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Common Name
dbl (2): Latitude, Longitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(cr_data)   # Check structure
spc_tbl_ [58,187 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Common Name: chr [1:58187] "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" ...
 $ Latitude   : num [1:58187] 6.35 6.35 6.49 6.5 6.35 ...
 $ Longitude  : num [1:58187] 81.5 81.5 80.9 81.5 81.5 ...
 - attr(*, "spec")=
  .. cols(
  ..   `Common Name` = col_character(),
  ..   Latitude = col_double(),
  ..   Longitude = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
head(cr_data)  # View first few rows
# A tibble: 6 × 3
  `Common Name`      Latitude Longitude
  <chr>                 <dbl>     <dbl>
1 Black-necked Stork     6.35      81.5
2 Black-necked Stork     6.35      81.5
3 Black-necked Stork     6.49      80.9
4 Black-necked Stork     6.50      81.5
5 Black-necked Stork     6.35      81.5
6 Black-necked Stork     6.35      81.5
summary(cr_data)  # Get summary statistics
 Common Name           Latitude       Longitude    
 Length:58187       Min.   :5.887   Min.   :79.52  
 Class :character   1st Qu.:6.353   1st Qu.:79.88  
 Mode  :character   Median :6.889   Median :80.03  
                    Mean   :7.038   Mean   :80.35  
                    3rd Qu.:7.179   3rd Qu.:80.81  
                    Max.   :9.831   Max.   :81.86  
cr_data
# A tibble: 58,187 × 3
   `Common Name`      Latitude Longitude
   <chr>                 <dbl>     <dbl>
 1 Black-necked Stork     6.35      81.5
 2 Black-necked Stork     6.35      81.5
 3 Black-necked Stork     6.49      80.9
 4 Black-necked Stork     6.50      81.5
 5 Black-necked Stork     6.35      81.5
 6 Black-necked Stork     6.35      81.5
 7 Black-necked Stork     6.39      81.5
 8 Black-necked Stork     6.39      81.5
 9 Black-necked Stork     6.58      81.7
10 Black-necked Stork     6.29      81.5
# ℹ 58,177 more rows
#combine multiple data frames
bird_occurrence <- bind_rows(
  cr_data %>% mutate(Threat_Category = "CR"),
  en_data %>% mutate(Threat_Category = "EN"),
  vu_data %>% mutate(Threat_Category = "VU")
)
#Checking
head(bird_occurrence)
# A tibble: 6 × 4
  `Common Name`      Latitude Longitude Threat_Category
  <chr>                 <dbl>     <dbl> <chr>          
1 Black-necked Stork     6.35      81.5 CR             
2 Black-necked Stork     6.35      81.5 CR             
3 Black-necked Stork     6.49      80.9 CR             
4 Black-necked Stork     6.50      81.5 CR             
5 Black-necked Stork     6.35      81.5 CR             
6 Black-necked Stork     6.35      81.5 CR             
str(bird_occurrence)
tibble [159,809 × 4] (S3: tbl_df/tbl/data.frame)
 $ Common Name    : chr [1:159809] "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" "Black-necked Stork" ...
 $ Latitude       : num [1:159809] 6.35 6.35 6.49 6.5 6.35 ...
 $ Longitude      : num [1:159809] 81.5 81.5 80.9 81.5 81.5 ...
 $ Threat_Category: chr [1:159809] "CR" "CR" "CR" "CR" ...
#Summary count of the Bird Occurrence 
bird_occurrence %>%
  group_by(Threat_Category) %>%
  summarise(Count = n())
# A tibble: 3 × 2
  Threat_Category Count
  <chr>           <int>
1 CR              58187
2 EN              62497
3 VU              39125
#Visualization of the summary counts
# Create a bar plot
ggplot(bird_occurrence, aes(x = Threat_Category, fill = Threat_Category)) +
  geom_bar() +
  labs(title = "Number of Bird Occurrences by Threat Category",
       x = "Threat Category",
       y = "Count") +
  theme_minimal() +
  scale_fill_manual(values = c("CR" = "red", "EN" = "orange", "VU" = "yellow"))

#Species counts per Category
bird_occurrence %>%
  group_by(Threat_Category) %>%
  summarise(Unique_Species = n_distinct(`Common Name`))
# A tibble: 3 × 2
  Threat_Category Unique_Species
  <chr>                    <int>
1 CR                          19
2 EN                          44
3 VU                          15
#Top 10 Species
bird_occurrence %>%
  count(`Common Name`, sort = TRUE) %>%
  head(10)
# A tibble: 10 × 2
   `Common Name`                       n
   <chr>                           <int>
 1 Rock Pigeon                     23166
 2 Blue-tailed Bee-eater           21397
 3 Indian Swiftlet                  9203
 4 Gull-billed Tern                 5222
 5 Sri Lanka White-eye              4809
 6 Oriental Honey-buzzard           4519
 7 Sri Lanka Orange-billed Babbler  4209
 8 Little Ringed Plover             3657
 9 Sri Lanka Crested Drongo         3544
10 Yellow Bittern                   3522
#General Map
ggplot(bird_occurrence, aes(x = Longitude, y = Latitude, color = Threat_Category)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(title = " Endangered Bird Occurrence Distribution   in Sri Lanka",
       x = "Longitude", y = "Latitude")
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

library(patchwork)  # For arranging multiple plots together
Warning: package 'patchwork' was built under R version 4.4.2
# Function to generate bar plots for each category
plot_species_occurrence <- function(data, category, color) {
  data %>%
    filter(Threat_Category == category) %>%
    group_by(`Common Name`) %>%
    summarise(Count = n()) %>%
    arrange(desc(Count)) %>%
    ggplot(aes(x = reorder(`Common Name`, -Count), y = Count)) +
    geom_bar(stat = "identity", fill = color) +
    labs(title = paste("Occurrences of", category, "Birds"),
         x = "Species",
         y = "Occurrence Count") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))  # Rotate x labels
}
# Generate plots for CR, EN, and VU
plot_CR <- plot_species_occurrence(bird_occurrence, "CR", "red")
plot_EN <- plot_species_occurrence(bird_occurrence, "EN", "orange")
plot_VU <- plot_species_occurrence(bird_occurrence, "VU", "yellow")
# Display plots side by side
plot_CR

plot_EN

plot_VU

Inside vs Outside for EN, CR, VU Birds

EN Birds Inside vs Outside

# Read the CSV file
en_birds <- read_csv("EN_Inisde_Outside.csv")
Rows: 62497 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(en_birds)
# A tibble: 6 × 26
  `Common Name`  Latitude Longitude   fid NatName IntName SitLat SitLong GISArea
  <chr>             <dbl>     <dbl> <dbl> <chr>   <chr>    <dbl>   <dbl>   <dbl>
1 Yellow Bittern     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
2 Cinnamon Bitt…     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
3 Long-tailed S…     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
4 Lesser Adjuta…     8.36      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
5 Cinnamon Bitt…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
6 Eurasian Kest…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
#   WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
#   DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
#   GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
#   Inside_Outside_PA <chr>
# Check the structure of the dataset
str(en_birds)
spc_tbl_ [62,497 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Common Name      : chr [1:62497] "Yellow Bittern" "Cinnamon Bittern" "Long-tailed Shrike" "Lesser Adjutant" ...
 $ Latitude         : num [1:62497] 8.88 8.88 8.88 8.36 8.35 ...
 $ Longitude        : num [1:62497] 80 80 80 80.4 80.4 ...
 $ fid              : num [1:62497] 1 1 1 2 2 2 2 2 2 2 ...
 $ NatName          : chr [1:62497] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
 $ IntName          : chr [1:62497] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
 $ SitLat           : num [1:62497] 8.86 8.86 8.86 8.35 8.35 ...
 $ SitLong          : num [1:62497] 80.1 80.1 80.1 80.4 80.4 ...
 $ GISArea          : num [1:62497] 4024 4024 4024 3541 3541 ...
 $ PA_TYPE          : chr [1:62497] "IBA" "IBA" "IBA" "IBA" ...
 $ Source           : chr [1:62497] NA NA NA NA ...
 $ WDPAID           : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
 $ WDPA_PID         : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
 $ PA_DEF           : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
 $ NAME             : chr [1:62497] NA NA NA NA ...
 $ ORIG_NAME        : chr [1:62497] NA NA NA NA ...
 $ DESIG            : chr [1:62497] NA NA NA NA ...
 $ DESIG_ENG        : chr [1:62497] NA NA NA NA ...
 $ DESIG_TYPE       : chr [1:62497] NA NA NA NA ...
 $ IUCN_CAT         : chr [1:62497] "IBA" "IBA" "IBA" "IBA" ...
 $ INT_CRIT         : chr [1:62497] NA NA NA NA ...
 $ GIS_M_AREA       : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
 $ GIS_AREA         : num [1:62497] NA NA NA NA NA NA NA NA NA NA ...
 $ layer            : chr [1:62497] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
 $ path             : chr [1:62497] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
 $ Inside_Outside_PA: chr [1:62497] "Inside" "Inside" "Inside" "Inside" ...
 - attr(*, "spec")=
  .. cols(
  ..   `Common Name` = col_character(),
  ..   Latitude = col_double(),
  ..   Longitude = col_double(),
  ..   fid = col_double(),
  ..   NatName = col_character(),
  ..   IntName = col_character(),
  ..   SitLat = col_double(),
  ..   SitLong = col_double(),
  ..   GISArea = col_double(),
  ..   PA_TYPE = col_character(),
  ..   Source = col_character(),
  ..   WDPAID = col_double(),
  ..   WDPA_PID = col_double(),
  ..   PA_DEF = col_double(),
  ..   NAME = col_character(),
  ..   ORIG_NAME = col_character(),
  ..   DESIG = col_character(),
  ..   DESIG_ENG = col_character(),
  ..   DESIG_TYPE = col_character(),
  ..   IUCN_CAT = col_character(),
  ..   INT_CRIT = col_character(),
  ..   GIS_M_AREA = col_double(),
  ..   GIS_AREA = col_double(),
  ..   layer = col_character(),
  ..   path = col_character(),
  ..   Inside_Outside_PA = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
# View summary statistics
summary(en_birds)
 Common Name           Latitude       Longitude          fid       
 Length:62497       Min.   :5.887   Min.   :79.61   Min.   :  1.0  
 Class :character   1st Qu.:6.409   1st Qu.:80.42   1st Qu.: 28.0  
 Mode  :character   Median :6.451   Median :80.45   Median : 59.0  
                    Mean   :6.779   Mean   :80.57   Mean   :179.1  
                    3rd Qu.:6.967   3rd Qu.:80.81   3rd Qu.:498.0  
                    Max.   :9.831   Max.   :84.75   Max.   :922.0  
                                                    NA's   :25359  
   NatName            IntName              SitLat         SitLong     
 Length:62497       Length:62497       Min.   :6.07    Min.   :79.82  
 Class :character   Class :character   1st Qu.:6.40    1st Qu.:80.48  
 Mode  :character   Mode  :character   Median :6.40    Median :80.48  
                                       Mean   :6.61    Mean   :80.67  
                                       3rd Qu.:6.83    3rd Qu.:80.82  
                                       Max.   :9.68    Max.   :81.59  
                                       NA's   :35557   NA's   :35557  
    GISArea          PA_TYPE             Source              WDPAID         
 Min.   :  108.7   Length:62497       Length:62497       Min.   :      899  
 1st Qu.: 8037.1   Class :character   Class :character   1st Qu.:      902  
 Median :12007.2   Mode  :character   Mode  :character   Median :     2462  
 Mean   :10671.3                                         Mean   : 45227442  
 3rd Qu.:12007.2                                         3rd Qu.:    23126  
 Max.   :57007.7                                         Max.   :555592541  
 NA's   :35557                                           NA's   :52299      
    WDPA_PID             PA_DEF          NAME            ORIG_NAME        
 Min.   :      899   Min.   :1       Length:62497       Length:62497      
 1st Qu.:      902   1st Qu.:1       Class :character   Class :character  
 Median :     2462   Median :1       Mode  :character   Mode  :character  
 Mean   : 45227442   Mean   :1                                            
 3rd Qu.:    23126   3rd Qu.:1                                            
 Max.   :555592541   Max.   :1                                            
 NA's   :52299       NA's   :52299                                        
    DESIG            DESIG_ENG          DESIG_TYPE          IUCN_CAT        
 Length:62497       Length:62497       Length:62497       Length:62497      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
   INT_CRIT           GIS_M_AREA       GIS_AREA          layer          
 Length:62497       Min.   : 0.00   Min.   :   0.32   Length:62497      
 Class :character   1st Qu.: 0.00   1st Qu.:  43.20   Class :character  
 Mode  :character   Median : 0.00   Median :  95.81   Mode  :character  
                    Mean   : 0.73   Mean   : 345.47                     
                    3rd Qu.: 1.79   3rd Qu.: 979.49                     
                    Max.   :25.01   Max.   :1313.49                     
                    NA's   :52299   NA's   :52299                       
     path           Inside_Outside_PA 
 Length:62497       Length:62497      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
# Count occurrences inside and outside protected areas
en_counts <- en_birds %>%
  group_by(Inside_Outside_PA) %>%
  summarise(count = n())

print(en_counts)
# A tibble: 2 × 2
  Inside_Outside_PA count
  <chr>             <int>
1 Inside            37138
2 Outside           25359
ggplot(en_counts, aes(x = Inside_Outside_PA, y = count, fill = Inside_Outside_PA)) +
  geom_bar(stat = "identity") +
  labs(title = "Endangered Bird Occurrences Inside vs. Outside Protected Areas",
       x = "Location",
       y = "Number of Bird Occurrences") +
  theme_minimal()

Species Specific Analyzes

# Count occurrences per species inside vs. outside
cr_species_counts <- en_birds %>%
  group_by(`Common Name`,Inside_Outside_PA ) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(cr_species_counts)
# A tibble: 87 × 3
# Groups:   Common Name [44]
   `Common Name`                   Inside_Outside_PA count
   <chr>                           <chr>             <int>
 1 Great Crested Tern              Outside            2523
 2 Sri Lanka Orange-billed Babbler Inside             2438
 3 Sri Lanka Crested Drongo        Inside             2414
 4 Sri Lanka Blue-Magpie           Inside             2139
 5 Little Ringed Plover            Outside            1913
 6 Yellow-eared Bulbul             Inside             1909
 7 Yellow Bittern                  Outside            1903
 8 Kentish Plover                  Inside             1899
 9 Sri Lanka Orange-billed Babbler Outside            1771
10 Little Ringed Plover            Inside             1744
# ℹ 77 more rows
#Visualization

ggplot(cr_species_counts, aes(x =`Common Name` , y = count, fill = Inside_Outside_PA)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Endangered Bird Species Distribution Inside VS Outside",
       x = "Species",
       y = "Number of Observations") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

VU Birds Inside Outside

# Read the CSV file 
vu_birds <- read_csv("VU_Birds_Iniside_Outside.csv")
Rows: 39125 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(vu_birds)
# A tibble: 6 × 26
  `Common Name`  Latitude Longitude   fid NatName IntName SitLat SitLong GISArea
  <chr>             <dbl>     <dbl> <dbl> <chr>   <chr>    <dbl>   <dbl>   <dbl>
1 Cotton Pygmy-…     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
2 Sri Lanka Spu…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
3 Cotton Pygmy-…     8.36      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
4 Cotton Pygmy-…     8.34      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
5 Cotton Pygmy-…     8.36      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
6 Cotton Pygmy-…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
#   WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
#   DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
#   GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
#   inside_outside <chr>
# Check the structure of the dataset
str(vu_birds)
spc_tbl_ [39,125 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Common Name   : chr [1:39125] "Cotton Pygmy-Goose" "Sri Lanka Spurfowl" "Cotton Pygmy-Goose" "Cotton Pygmy-Goose" ...
 $ Latitude      : num [1:39125] 8.88 8.35 8.36 8.34 8.36 ...
 $ Longitude     : num [1:39125] 80 80.4 80.4 80.4 80.4 ...
 $ fid           : num [1:39125] 1 2 2 2 2 2 2 2 2 2 ...
 $ NatName       : chr [1:39125] "Giants Tank" "Anuradhapura" "Anuradhapura" "Anuradhapura" ...
 $ IntName       : chr [1:39125] "Giants Tank" "Anuradhapura" "Anuradhapura" "Anuradhapura" ...
 $ SitLat        : num [1:39125] 8.86 8.35 8.35 8.35 8.35 ...
 $ SitLong       : num [1:39125] 80.1 80.4 80.4 80.4 80.4 ...
 $ GISArea       : num [1:39125] 4024 3541 3541 3541 3541 ...
 $ PA_TYPE       : chr [1:39125] "IBA" "IBA" "IBA" "IBA" ...
 $ Source        : chr [1:39125] NA NA NA NA ...
 $ WDPAID        : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
 $ WDPA_PID      : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
 $ PA_DEF        : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
 $ NAME          : chr [1:39125] NA NA NA NA ...
 $ ORIG_NAME     : chr [1:39125] NA NA NA NA ...
 $ DESIG         : chr [1:39125] NA NA NA NA ...
 $ DESIG_ENG     : chr [1:39125] NA NA NA NA ...
 $ DESIG_TYPE    : chr [1:39125] NA NA NA NA ...
 $ IUCN_CAT      : chr [1:39125] "IBA" "IBA" "IBA" "IBA" ...
 $ INT_CRIT      : chr [1:39125] NA NA NA NA ...
 $ GIS_M_AREA    : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
 $ GIS_AREA      : num [1:39125] NA NA NA NA NA NA NA NA NA NA ...
 $ layer         : chr [1:39125] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
 $ path          : chr [1:39125] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
 $ inside_outside: chr [1:39125] "Inside" "Inside" "Inside" "Inside" ...
 - attr(*, "spec")=
  .. cols(
  ..   `Common Name` = col_character(),
  ..   Latitude = col_double(),
  ..   Longitude = col_double(),
  ..   fid = col_double(),
  ..   NatName = col_character(),
  ..   IntName = col_character(),
  ..   SitLat = col_double(),
  ..   SitLong = col_double(),
  ..   GISArea = col_double(),
  ..   PA_TYPE = col_character(),
  ..   Source = col_character(),
  ..   WDPAID = col_double(),
  ..   WDPA_PID = col_double(),
  ..   PA_DEF = col_double(),
  ..   NAME = col_character(),
  ..   ORIG_NAME = col_character(),
  ..   DESIG = col_character(),
  ..   DESIG_ENG = col_character(),
  ..   DESIG_TYPE = col_character(),
  ..   IUCN_CAT = col_character(),
  ..   INT_CRIT = col_character(),
  ..   GIS_M_AREA = col_double(),
  ..   GIS_AREA = col_double(),
  ..   layer = col_character(),
  ..   path = col_character(),
  ..   inside_outside = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
# View summary statistics
summary(vu_birds)
 Common Name           Latitude       Longitude          fid       
 Length:39125       Min.   :5.887   Min.   :79.59   Min.   :  1.0  
 Class :character   1st Qu.:6.428   1st Qu.:80.41   1st Qu.: 28.0  
 Mode  :character   Median :6.802   Median :80.52   Median : 59.0  
                    Mean   :6.849   Mean   :80.57   Mean   :208.5  
                    3rd Qu.:6.991   3rd Qu.:80.81   3rd Qu.:498.0  
                    Max.   :9.823   Max.   :81.86   Max.   :922.0  
                    NA's   :3       NA's   :3       NA's   :20014  
   NatName            IntName              SitLat         SitLong     
 Length:39125       Length:39125       Min.   :6.070   Min.   :79.82  
 Class :character   Class :character   1st Qu.:6.397   1st Qu.:80.48  
 Mode  :character   Mode  :character   Median :6.490   Median :80.64  
                                       Mean   :6.705   Mean   :80.66  
                                       3rd Qu.:6.826   3rd Qu.:80.82  
                                       Max.   :9.678   Max.   :81.59  
                                       NA's   :26293   NA's   :26293  
    GISArea          PA_TYPE             Source              WDPAID         
 Min.   :  108.7   Length:39125       Length:39125       Min.   :      899  
 1st Qu.: 8037.1   Class :character   Class :character   1st Qu.:      902  
 Median : 8037.1   Mode  :character   Mode  :character   Median :    16383  
 Mean   :10998.1                                         Mean   : 63099043  
 3rd Qu.:12007.2                                         3rd Qu.:    27809  
 Max.   :57007.7                                         Max.   :555592541  
 NA's   :26293                                           NA's   :32846      
    WDPA_PID             PA_DEF          NAME            ORIG_NAME        
 Min.   :      899   Min.   :1       Length:39125       Length:39125      
 1st Qu.:      902   1st Qu.:1       Class :character   Class :character  
 Median :    16383   Median :1       Mode  :character   Mode  :character  
 Mean   : 63099043   Mean   :1                                            
 3rd Qu.:    27809   3rd Qu.:1                                            
 Max.   :555592541   Max.   :1                                            
 NA's   :32846       NA's   :32846                                        
    DESIG            DESIG_ENG          DESIG_TYPE          IUCN_CAT        
 Length:39125       Length:39125       Length:39125       Length:39125      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
   INT_CRIT           GIS_M_AREA       GIS_AREA          layer          
 Length:39125       Min.   :0.00    Min.   :   0.31   Length:39125      
 Class :character   1st Qu.:0.00    1st Qu.:  26.91   Class :character  
 Mode  :character   Median :0.00    Median :  95.81   Mode  :character  
                    Mean   :0.47    Mean   : 278.39                     
                    3rd Qu.:0.00    3rd Qu.: 343.15                     
                    Max.   :6.53    Max.   :1313.49                     
                    NA's   :32846   NA's   :32846                       
     path           inside_outside    
 Length:39125       Length:39125      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
# Count occurrences inside and outside protected areas
vu_counts <- vu_birds %>%
  group_by(inside_outside) %>%
  summarise(count = n())

print(vu_counts)
# A tibble: 2 × 2
  inside_outside count
  <chr>          <int>
1 Inside         19111
2 Outside        20014
ggplot(vu_counts, aes(x = inside_outside, y = count, fill = inside_outside)) +
  geom_bar(stat = "identity") +
  labs(title = "Vulnerable Bird Occurrences Inside vs. Outside Protected Areas",
       x = "Location",
       y = "Number of Bird Occurrences") +
  theme_minimal()

#Species Specific Analyze

# Count occurrences per species inside vs. outside
vu_species_counts <- vu_birds %>%
  group_by(`Common Name`,inside_outside ) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(vu_species_counts)
# A tibble: 30 × 3
# Groups:   Common Name [15]
   `Common Name`          inside_outside count
   <chr>                  <chr>          <int>
 1 Indian Swiftlet        Outside         6343
 2 Indian Swiftlet        Inside          2860
 3 Oriental Honey-buzzard Outside         2837
 4 Sri Lanka White-eye    Inside          2634
 5 Spot-winged Thrush     Inside          2237
 6 Sri Lanka White-eye    Outside         2175
 7 Sri Lanka Myna         Inside          2080
 8 Little Tern            Outside         1812
 9 Oriental Honey-buzzard Inside          1682
10 Little Tern            Inside          1589
# ℹ 20 more rows
#Visulization
ggplot(vu_species_counts, aes(x =`Common Name` , y = count, fill = inside_outside)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Vulnerable Bird Species Distribution Inside VS Outside",
       x = "Species",
       y = "Number of Observations") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

CR Birds Inside vs Outside

# Read the CSV file 
cr_birds <- read_csv("CR_Inside_Outside_Protected Areas.csv")
Rows: 58187 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the first few rows
head(cr_birds)
# A tibble: 6 × 26
  `Common Name`  Latitude Longitude   fid NatName IntName SitLat SitLong GISArea
  <chr>             <dbl>     <dbl> <dbl> <chr>   <chr>    <dbl>   <dbl>   <dbl>
1 Gull-billed T…     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
2 Caspian Tern       8.86      80.0     1 Giants… Giants…   8.86    80.1   4024.
3 Caspian Tern       8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
4 Gull-billed T…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
5 Gull-billed T…     8.33      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
6 Gull-billed T…     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
# ℹ 17 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
#   WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
#   DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
#   GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
#   Inside_Outside <chr>
# Check the structure of the dataset
str(cr_birds)
spc_tbl_ [58,187 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Common Name   : chr [1:58187] "Gull-billed Tern" "Caspian Tern" "Caspian Tern" "Gull-billed Tern" ...
 $ Latitude      : num [1:58187] 8.88 8.86 8.88 8.35 8.33 ...
 $ Longitude     : num [1:58187] 80 80 80 80.4 80.4 ...
 $ fid           : num [1:58187] 1 1 1 2 2 2 2 2 2 2 ...
 $ NatName       : chr [1:58187] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
 $ IntName       : chr [1:58187] "Giants Tank" "Giants Tank" "Giants Tank" "Anuradhapura" ...
 $ SitLat        : num [1:58187] 8.86 8.86 8.86 8.35 8.35 ...
 $ SitLong       : num [1:58187] 80.1 80.1 80.1 80.4 80.4 ...
 $ GISArea       : num [1:58187] 4024 4024 4024 3541 3541 ...
 $ PA_TYPE       : chr [1:58187] "IBA" "IBA" "IBA" "IBA" ...
 $ Source        : chr [1:58187] NA NA NA NA ...
 $ WDPAID        : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
 $ WDPA_PID      : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
 $ PA_DEF        : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
 $ NAME          : chr [1:58187] NA NA NA NA ...
 $ ORIG_NAME     : chr [1:58187] NA NA NA NA ...
 $ DESIG         : chr [1:58187] NA NA NA NA ...
 $ DESIG_ENG     : chr [1:58187] NA NA NA NA ...
 $ DESIG_TYPE    : chr [1:58187] NA NA NA NA ...
 $ IUCN_CAT      : chr [1:58187] "IBA" "IBA" "IBA" "IBA" ...
 $ INT_CRIT      : chr [1:58187] NA NA NA NA ...
 $ GIS_M_AREA    : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
 $ GIS_AREA      : num [1:58187] NA NA NA NA NA NA NA NA NA NA ...
 $ layer         : chr [1:58187] "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" "IBAs Sri Lanka — ibasglobal_2024_september_pol_03" ...
 $ path          : chr [1:58187] "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" "C:\\Users\\ASUS\\Documents\\IBAs Sri Lanka.gpkg|layername=ibasglobal_2024_september_pol_03" ...
 $ Inside_Outside: chr [1:58187] "Inside" "Inside" "Inside" "Inside" ...
 - attr(*, "spec")=
  .. cols(
  ..   `Common Name` = col_character(),
  ..   Latitude = col_double(),
  ..   Longitude = col_double(),
  ..   fid = col_double(),
  ..   NatName = col_character(),
  ..   IntName = col_character(),
  ..   SitLat = col_double(),
  ..   SitLong = col_double(),
  ..   GISArea = col_double(),
  ..   PA_TYPE = col_character(),
  ..   Source = col_character(),
  ..   WDPAID = col_double(),
  ..   WDPA_PID = col_double(),
  ..   PA_DEF = col_double(),
  ..   NAME = col_character(),
  ..   ORIG_NAME = col_character(),
  ..   DESIG = col_character(),
  ..   DESIG_ENG = col_character(),
  ..   DESIG_TYPE = col_character(),
  ..   IUCN_CAT = col_character(),
  ..   INT_CRIT = col_character(),
  ..   GIS_M_AREA = col_double(),
  ..   GIS_AREA = col_double(),
  ..   layer = col_character(),
  ..   path = col_character(),
  ..   Inside_Outside = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
# View summary statistics
summary(cr_birds)
 Common Name           Latitude       Longitude          fid       
 Length:58187       Min.   :5.887   Min.   :79.52   Min.   :  1.0  
 Class :character   1st Qu.:6.353   1st Qu.:79.88   1st Qu.: 18.0  
 Mode  :character   Median :6.889   Median :80.03   Median : 59.0  
                    Mean   :7.038   Mean   :80.35   Mean   :254.9  
                    3rd Qu.:7.179   3rd Qu.:80.81   3rd Qu.:501.0  
                    Max.   :9.831   Max.   :81.86   Max.   :861.0  
                                                    NA's   :40733  
   NatName            IntName              SitLat         SitLong     
 Length:58187       Length:58187       Min.   :6.07    Min.   :79.81  
 Class :character   Class :character   1st Qu.:6.18    1st Qu.:80.41  
 Mode  :character   Mode  :character   Median :6.46    Median :80.87  
                                       Mean   :6.96    Mean   :80.74  
                                       3rd Qu.:7.71    3rd Qu.:81.22  
                                       Max.   :9.68    Max.   :81.59  
                                       NA's   :48809   NA's   :48809  
    GISArea          PA_TYPE             Source              WDPAID         
 Min.   :  108.7   Length:58187       Length:58187       Min.   :      899  
 1st Qu.: 5282.9   Class :character   Class :character   1st Qu.:      899  
 Median : 5664.3   Mode  :character   Mode  :character   Median :     2462  
 Mean   :10760.9                                         Mean   : 42867571  
 3rd Qu.:12007.2                                         3rd Qu.:    23126  
 Max.   :99703.9                                         Max.   :555592541  
 NA's   :48809                                           NA's   :50111      
    WDPA_PID             PA_DEF          NAME            ORIG_NAME        
 Min.   :      899   Min.   :1       Length:58187       Length:58187      
 1st Qu.:      899   1st Qu.:1       Class :character   Class :character  
 Median :     2462   Median :1       Mode  :character   Mode  :character  
 Mean   : 42867571   Mean   :1                                            
 3rd Qu.:    23126   3rd Qu.:1                                            
 Max.   :555592541   Max.   :1                                            
 NA's   :50111       NA's   :50111                                        
    DESIG            DESIG_ENG          DESIG_TYPE          IUCN_CAT        
 Length:58187       Length:58187       Length:58187       Length:58187      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
   INT_CRIT           GIS_M_AREA       GIS_AREA          layer          
 Length:58187       Min.   : 0.00   Min.   :   0.31   Length:58187      
 Class :character   1st Qu.: 0.00   1st Qu.:   8.00   Class :character  
 Mode  :character   Median : 0.00   Median :  95.81   Mode  :character  
                    Mean   : 0.98   Mean   : 423.98                     
                    3rd Qu.: 1.79   3rd Qu.: 979.49                     
                    Max.   :25.01   Max.   :1313.49                     
                    NA's   :50111   NA's   :50111                       
     path           Inside_Outside    
 Length:58187       Length:58187      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
# Count occurrences inside and outside protected areas
cr_counts <- cr_birds %>%
  group_by(Inside_Outside) %>%
  summarise(count = n())

print(cr_counts)
# A tibble: 2 × 2
  Inside_Outside count
  <chr>          <int>
1 Inside         17454
2 Outside        40733
#Visulization
ggplot(cr_counts, aes(x = Inside_Outside, y = count, fill = Inside_Outside)) +
  geom_bar(stat = "identity") +
  labs(title = "Critically Endangered Bird Occurrences Inside vs. Outside Protected Areas",
       x = "Location",
       y = "Number of Bird Occurrences") +
  theme_minimal()

#Species Specific Analyze

# Count occurrences per species inside vs. outside
cr_species_counts <- cr_birds %>%
  group_by(`Common Name`,Inside_Outside ) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
`summarise()` has grouped output by 'Common Name'. You can override using the
`.groups` argument.
print(cr_species_counts)
# A tibble: 37 × 3
# Groups:   Common Name [19]
   `Common Name`         Inside_Outside count
   <chr>                 <chr>          <int>
 1 Rock Pigeon           Outside        19194
 2 Blue-tailed Bee-eater Outside        14016
 3 Blue-tailed Bee-eater Inside          7381
 4 Rock Pigeon           Inside          3972
 5 Gull-billed Tern      Outside         2650
 6 Gull-billed Tern      Inside          2572
 7 Caspian Tern          Inside          1700
 8 Caspian Tern          Outside         1483
 9 Common Tern           Outside         1299
10 Bridled Tern          Outside         1086
# ℹ 27 more rows
#Visulization
ggplot(cr_species_counts, aes(x =`Common Name` , y = count, fill =Inside_Outside)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Critically Endangered Bird Species Distribution Inside VS Outside",
       x = "Species",
       y = "Number of Observations") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Making the Big Table/ Inside vs Outside for Endangered Birds of Sri Lanka (CR+EN+VU)

# Load necessary libraries
library(dplyr)
library(readr)

# Load each dataset
cr_data <- read_csv("CR_Inside_Outside_Protected Areas.csv") %>% mutate(Threat_Category = "CR")
Rows: 58187 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
en_data <- read_csv("EN_Inisde_Outside.csv") %>% mutate(Threat_Category = "EN")
Rows: 62497 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vu_data <- read_csv("VU_Birds_Iniside_Outside.csv") %>% mutate(Threat_Category = "VU")
Rows: 39125 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): Common Name, NatName, IntName, PA_TYPE, Source, NAME, ORIG_NAME, D...
dbl (11): Latitude, Longitude, fid, SitLat, SitLong, GISArea, WDPAID, WDPA_P...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all into one dataset
endangered_birds <- bind_rows(cr_data, en_data, vu_data)

# Save the combined dataset
write_csv(endangered_birds, "endangered_birds_sri_lanka.csv")

# View summary
print(endangered_birds)
# A tibble: 159,809 × 29
   `Common Name` Latitude Longitude   fid NatName IntName SitLat SitLong GISArea
   <chr>            <dbl>     <dbl> <dbl> <chr>   <chr>    <dbl>   <dbl>   <dbl>
 1 Gull-billed …     8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
 2 Caspian Tern      8.86      80.0     1 Giants… Giants…   8.86    80.1   4024.
 3 Caspian Tern      8.88      80.0     1 Giants… Giants…   8.86    80.1   4024.
 4 Gull-billed …     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
 5 Gull-billed …     8.33      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
 6 Gull-billed …     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
 7 Gull-billed …     8.35      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
 8 Gull-billed …     8.34      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
 9 Rock Pigeon       8.34      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
10 Rock Pigeon       8.34      80.4     2 Anurad… Anurad…   8.35    80.4   3541.
# ℹ 159,799 more rows
# ℹ 20 more variables: PA_TYPE <chr>, Source <chr>, WDPAID <dbl>,
#   WDPA_PID <dbl>, PA_DEF <dbl>, NAME <chr>, ORIG_NAME <chr>, DESIG <chr>,
#   DESIG_ENG <chr>, DESIG_TYPE <chr>, IUCN_CAT <chr>, INT_CRIT <chr>,
#   GIS_M_AREA <dbl>, GIS_AREA <dbl>, layer <chr>, path <chr>,
#   Inside_Outside <chr>, Threat_Category <chr>, Inside_Outside_PA <chr>,
#   inside_outside <chr>
all_endangered_birds <- read_csv("All Endangered Birds Occurence Sri Lanka.csv")
Rows: 159809 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Species_Name, IUCN_CAT, inside_outside
dbl (2): Latitude, Longitude

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#All Endangered Birds percentage Inside vs Outside
all_endangered_birds %>%
  count(inside_outside) %>%
  mutate(percentage = n / sum(n) * 100)
# A tibble: 2 × 3
  inside_outside     n percentage
  <chr>          <int>      <dbl>
1 Inside         73703       46.1
2 Outside        86106       53.9
En_bird_count <- all_endangered_birds %>%
  group_by(inside_outside) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100)

print(En_bird_count)
# A tibble: 2 × 3
  inside_outside count percentage
  <chr>          <int>      <dbl>
1 Inside         73703       46.1
2 Outside        86106       53.9
ggplot(En_bird_count, aes(x = inside_outside, y = percentage, fill = inside_outside)) +
  geom_col() +
  geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = -0.5) +
  theme_minimal() +
  labs(title = "Bird Occurrences Inside vs. Outside Protected Areas",
       x = "Location",
       y = "Percentage") +
  scale_fill_manual(values = c("Inside" = "#1f78b4", "Outside" = "#e31a1c"))