1. Load libraries

2. Inits

3. Define WI Region & Season of interest

    max_lat <- 47
    max_lon <- -86.5
    min_lat <- 42.4
    min_lon <- -93
    day_max <- 210
    day_min <- 150

4. Load 2023 ERD & SRD & Solar data

dim(erd)
## [1] 174084    527
    dim(srd)
## [1] 24912   122
    dim(solar)
## [1] 93 13

5. Identify which ERD checklists (in B) are within a specified distance of any Solar installations (in A)

    A <- data.frame(
      lat = solar$latitude,
      lon = solar$longitude )
    A <- A[ complete.cases(A), ]
    

    # In any column in the data frame, remove rows with missing values.
    B <- data.frame(
      lat = erd$latitude,
      lon = erd$longitude, 
      checklist_id = erd$checklist_id )

6. Compute distance matrix

    # (each element is the distance between one point in A and one in B)

    dist_matrix <- distm(
        cbind(B$lon, B$lat), 
        cbind(A$lon, A$lat), 
        fun = distHaversine)
#head(dist_matrix)

7. Find points in B within the specified distance from any point in A

    distance_threshold <- 10000  # 10k meters or 10 km

    within_distance <- apply(
        dist_matrix, 1, 
        function(distances) any(distances <= distance_threshold))

8. Extract the subset of points in B that are within the specified distance

    B_within_distance <- B[within_distance, ]

9. Prepare data for isolated analysis

a. filter solar data frame

to only have latitude, longitude, and project name and remove projects without lat/long data

solar_f = solar %>% select("latitude", "longitude", "Customer.ProjectName")
solar_f = solar_f %>% filter(!is.na(latitude))

# turn customer into a factor
solar_f$Customer.ProjectName = factor(solar_f$Customer.ProjectName)

b. determine max/min lat for 10 km

# 1 deg = 110.574 km

solar_f$ lat.plus.10k = solar_f$ latitude + ((1/110)*10)
solar_f$ lat.less.10k = solar_f$ latitude - ((1/110)*10)

c. determine max/min long for 10 km

# 1. convert degrees to radians

## function that converts degree to radians

deg2rad <- function(x) {(x * pi) / (180)}

# create variable of converted values

solar_f$ lat.deg2radians = deg2rad(solar_f$ latitude)
solar_f$ long.deg2radians = deg2rad(solar_f$ longitude)


# 1 deg = 111.320*cos(latitude) km
# 1 degree = 111.320*cos(lat in radians) km

solar_f$ km_per_1deglong = 111.32*cos(solar_f$ lat.deg2radians)


# calculate 1 degree longitude with formula
# 1 degree = 111.320*cos(lat in radians) km

solar_f$ deg.in.10km = (1/(solar_f$ km_per_1deglong))*10

solar_f$ long.plus.10k = solar_f$ longitude + solar_f$ deg.in.10km
solar_f$ long.less.10k = solar_f$ longitude - solar_f$ deg.in.10km

#head(solar_f)

# rad2deg <- function(rad) {(rad * 180) / (pi)}
# deg2rad <- function(deg) {(deg * pi) / (180)}

10. Isolate a given array

a. Choose array: Hill Construction

df = Hill_constrxn_erd

installed 2016, 46.007367, -91.492534

Hill_install = 2016
Hill_constrxn_erd = erd %>% select (latitude, longitude, checklist_id, year, comrav, rewbla, norpin, gnwtea, mallar3, grhowl, rethaw, reshaw, leasan, wlswar, swathr, marwre, norfli, horlar, amekes, y00475, coohaw, comloo, comyel, leabit, orcwar, linspa, sonspa, bnhcow, rudduc, savspa, doccor, pibgre, vesspa, sora, virrai, yerwar, yelwar, easblu, norsho, buwtea, nrwswa, wesmea, treswa, houwre, yehbla, moudov, whcspa, sancra)

b. select checklists between lat.less.10k / lat.plus.10k and long.plus.10k / long.less.10k

Hill_constrxn_erd <- filter(Hill_constrxn_erd, latitude < solar_f$lat.plus.10k[solar_f$Customer.ProjectName == "Hill Construction"])

Hill_constrxn_erd <- filter(Hill_constrxn_erd, latitude > solar_f$lat.less.10k[solar_f$Customer.ProjectName == "Hill Construction"])

range(Hill_constrxn_erd$latitude)
## [1] 45.91646 46.09816
Hill_constrxn_erd <- filter(Hill_constrxn_erd, longitude < solar_f$long.plus.10k[solar_f$Customer.ProjectName == "Hill Construction"])

Hill_constrxn_erd <- filter(Hill_constrxn_erd, longitude > solar_f$long.less.10k[solar_f$Customer.ProjectName == "Hill Construction"])

range(Hill_constrxn_erd$longitude)
## [1] -91.62039 -91.36499

c. look at number of checklists at this array over time

tibble: count_list_Hill

table with # of checklists per year

count_list_Hill = Hill_constrxn_erd %>% count(checklist_id, year)

count_list_Hill <- count_list_Hill %>% group_by(year) %>%
  summarise(no.check=sum(n))
count_list_Hill
## # A tibble: 13 × 2
##     year no.check
##    <dbl>    <int>
##  1  2010        8
##  2  2012        4
##  3  2013        2
##  4  2014        4
##  5  2015        1
##  6  2016       15
##  7  2017       14
##  8  2018       38
##  9  2019       54
## 10  2020       15
## 11  2021       23
## 12  2022       28
## 13  2023       20

d. determine counts for species on list of birds for analysis

tibble: detections_Hill

data frame with species in one column

detections_Hill = gather(Hill_constrxn_erd, key="species", value="no.detect", comrav:sancra)
#head(detections_Hill)
tibble: detections_Hill_spp

table with detections by species with years combined

detections_Hill_spp <- detections_Hill %>% group_by(species) %>%
  summarise(total.det.spp=sum(no.detect))
detections_Hill_spp
## # A tibble: 44 × 2
##    species total.det.spp
##    <chr>           <int>
##  1 amekes              4
##  2 bnhcow             43
##  3 buwtea              0
##  4 comloo             35
##  5 comrav             36
##  6 comyel            172
##  7 coohaw              1
##  8 doccor              0
##  9 easblu             18
## 10 gnwtea              0
## # ℹ 34 more rows
ggplot(detections_Hill_spp,aes(x=species,y=total.det.spp)) + 
  geom_jitter() +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Total detections per species for Hill Construction, years combined")

tibble: detections_Hill_yr

table with detections by species by year

detections_Hill_yr <- detections_Hill %>% group_by(species, year) %>%
  summarise(total.det.yr=sum(no.detect))
## `summarise()` has grouped output by 'species'. You can override using the
## `.groups` argument.
detections_Hill_yr
## # A tibble: 572 × 3
## # Groups:   species [44]
##    species  year total.det.yr
##    <chr>   <dbl>        <int>
##  1 amekes   2010            0
##  2 amekes   2012            0
##  3 amekes   2013            0
##  4 amekes   2014            0
##  5 amekes   2015            0
##  6 amekes   2016            0
##  7 amekes   2017            0
##  8 amekes   2018            0
##  9 amekes   2019            2
## 10 amekes   2020            0
## # ℹ 562 more rows
ggplot(detections_Hill_yr, aes(x=year, y=total.det.yr)) +
  geom_jitter() +
  geom_point(aes(colour = factor(species))) +
  geom_vline(xintercept = Hill_install)

e. plot average detections over year

prop.detections_Hill <- detections_Hill %>% group_by(checklist_id, year) %>%
  summarise(ave.det=mean(no.detect))
## `summarise()` has grouped output by 'checklist_id'. You can override using the
## `.groups` argument.
prop.detections_Hill
## # A tibble: 226 × 3
## # Groups:   checklist_id [226]
##    checklist_id  year ave.det
##           <dbl> <dbl>   <dbl>
##  1      7004559  2010  0     
##  2      8617023  2010  0.0227
##  3      8617024  2010  0.0455
##  4      8617025  2010  0.114 
##  5      8617026  2010  0.0682
##  6      8617027  2010  0.227 
##  7      8617028  2010  0.136 
##  8      8617029  2010  0.25  
##  9     11243474  2012  0.0455
## 10     11243475  2012  0.0455
## # ℹ 216 more rows
ggplot(prop.detections_Hill, aes(x=year, y=ave.det)) +
  geom_jitter() +
  geom_vline(xintercept = Hill_install)

detections_Hill_table <- detections_Hill %>% group_by(species) %>%
  summarise(tot.det=sum(no.detect))
detections_Hill_table
## # A tibble: 44 × 2
##    species tot.det
##    <chr>     <int>
##  1 amekes        4
##  2 bnhcow       43
##  3 buwtea        0
##  4 comloo       35
##  5 comrav       36
##  6 comyel      172
##  7 coohaw        1
##  8 doccor        0
##  9 easblu       18
## 10 gnwtea        0
## # ℹ 34 more rows

ignore below this point

3. plot change in detections over time

Need to account for number of checklists, therefore look at average number of detections per checklist?

After gather, create habitat variable
table that shows number of detections per species across all arrays
plot count by species
for a single array
has many variables including year; each species has it’s own column with each row returning a 0 or 1 for detection
here I made a table but it shows total observations per year, pooling spp.
What I want is a table with each row a year and each column a species with the values inside the sum of the total colunm for that year