max_lat <- 47
max_lon <- -86.5
min_lat <- 42.4
min_lon <- -93
day_max <- 210
day_min <- 150
dim(erd)
## [1] 174084 527
dim(srd)
## [1] 24912 122
dim(solar)
## [1] 93 13
A <- data.frame(
lat = solar$latitude,
lon = solar$longitude )
A <- A[ complete.cases(A), ]
# In any column in the data frame, remove rows with missing values.
B <- data.frame(
lat = erd$latitude,
lon = erd$longitude,
checklist_id = erd$checklist_id )
# (each element is the distance between one point in A and one in B)
dist_matrix <- distm(
cbind(B$lon, B$lat),
cbind(A$lon, A$lat),
fun = distHaversine)
#head(dist_matrix)
distance_threshold <- 10000 # 10k meters or 10 km
within_distance <- apply(
dist_matrix, 1,
function(distances) any(distances <= distance_threshold))
B_within_distance <- B[within_distance, ]
to only have latitude, longitude, and project name and remove projects without lat/long data
solar_f = solar %>% select("latitude", "longitude", "Customer.ProjectName")
solar_f = solar_f %>% filter(!is.na(latitude))
# turn customer into a factor
solar_f$Customer.ProjectName = factor(solar_f$Customer.ProjectName)
# 1 deg = 110.574 km
solar_f$ lat.plus.10k = solar_f$ latitude + ((1/110)*10)
solar_f$ lat.less.10k = solar_f$ latitude - ((1/110)*10)
# 1. convert degrees to radians
## function that converts degree to radians
deg2rad <- function(x) {(x * pi) / (180)}
# create variable of converted values
solar_f$ lat.deg2radians = deg2rad(solar_f$ latitude)
solar_f$ long.deg2radians = deg2rad(solar_f$ longitude)
# 1 deg = 111.320*cos(latitude) km
# 1 degree = 111.320*cos(lat in radians) km
solar_f$ km_per_1deglong = 111.32*cos(solar_f$ lat.deg2radians)
# calculate 1 degree longitude with formula
# 1 degree = 111.320*cos(lat in radians) km
solar_f$ deg.in.10km = (1/(solar_f$ km_per_1deglong))*10
solar_f$ long.plus.10k = solar_f$ longitude + solar_f$ deg.in.10km
solar_f$ long.less.10k = solar_f$ longitude - solar_f$ deg.in.10km
#head(solar_f)
# rad2deg <- function(rad) {(rad * 180) / (pi)}
# deg2rad <- function(deg) {(deg * pi) / (180)}
installed 2016, 46.007367, -91.492534
Hill_install = 2016
Hill_constrxn_erd = erd %>% select (latitude, longitude, checklist_id, year, comrav, rewbla, norpin, gnwtea, mallar3, grhowl, rethaw, reshaw, leasan, wlswar, swathr, marwre, norfli, horlar, amekes, y00475, coohaw, comloo, comyel, leabit, orcwar, linspa, sonspa, bnhcow, rudduc, savspa, doccor, pibgre, vesspa, sora, virrai, yerwar, yelwar, easblu, norsho, buwtea, nrwswa, wesmea, treswa, houwre, yehbla, moudov, whcspa, sancra)
Hill_constrxn_erd <- filter(Hill_constrxn_erd, latitude < solar_f$lat.plus.10k[solar_f$Customer.ProjectName == "Hill Construction"])
Hill_constrxn_erd <- filter(Hill_constrxn_erd, latitude > solar_f$lat.less.10k[solar_f$Customer.ProjectName == "Hill Construction"])
range(Hill_constrxn_erd$latitude)
## [1] 45.91646 46.09816
Hill_constrxn_erd <- filter(Hill_constrxn_erd, longitude < solar_f$long.plus.10k[solar_f$Customer.ProjectName == "Hill Construction"])
Hill_constrxn_erd <- filter(Hill_constrxn_erd, longitude > solar_f$long.less.10k[solar_f$Customer.ProjectName == "Hill Construction"])
range(Hill_constrxn_erd$longitude)
## [1] -91.62039 -91.36499
table with # of checklists per year
count_list_Hill = Hill_constrxn_erd %>% count(checklist_id, year)
count_list_Hill <- count_list_Hill %>% group_by(year) %>%
summarise(no.check=sum(n))
count_list_Hill
## # A tibble: 13 × 2
## year no.check
## <dbl> <int>
## 1 2010 8
## 2 2012 4
## 3 2013 2
## 4 2014 4
## 5 2015 1
## 6 2016 15
## 7 2017 14
## 8 2018 38
## 9 2019 54
## 10 2020 15
## 11 2021 23
## 12 2022 28
## 13 2023 20
data frame with species in one column
detections_Hill = gather(Hill_constrxn_erd, key="species", value="no.detect", comrav:sancra)
#head(detections_Hill)
table with detections by species with years combined
detections_Hill_spp <- detections_Hill %>% group_by(species) %>%
summarise(total.det.spp=sum(no.detect))
detections_Hill_spp
## # A tibble: 44 × 2
## species total.det.spp
## <chr> <int>
## 1 amekes 4
## 2 bnhcow 43
## 3 buwtea 0
## 4 comloo 35
## 5 comrav 36
## 6 comyel 172
## 7 coohaw 1
## 8 doccor 0
## 9 easblu 18
## 10 gnwtea 0
## # ℹ 34 more rows
ggplot(detections_Hill_spp,aes(x=species,y=total.det.spp)) +
geom_jitter() +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Total detections per species for Hill Construction, years combined")
table with detections by species by year
detections_Hill_yr <- detections_Hill %>% group_by(species, year) %>%
summarise(total.det.yr=sum(no.detect))
## `summarise()` has grouped output by 'species'. You can override using the
## `.groups` argument.
detections_Hill_yr
## # A tibble: 572 × 3
## # Groups: species [44]
## species year total.det.yr
## <chr> <dbl> <int>
## 1 amekes 2010 0
## 2 amekes 2012 0
## 3 amekes 2013 0
## 4 amekes 2014 0
## 5 amekes 2015 0
## 6 amekes 2016 0
## 7 amekes 2017 0
## 8 amekes 2018 0
## 9 amekes 2019 2
## 10 amekes 2020 0
## # ℹ 562 more rows
ggplot(detections_Hill_yr, aes(x=year, y=total.det.yr)) +
geom_jitter() +
geom_point(aes(colour = factor(species))) +
geom_vline(xintercept = Hill_install)
prop.detections_Hill <- detections_Hill %>% group_by(checklist_id, year) %>%
summarise(ave.det=mean(no.detect))
## `summarise()` has grouped output by 'checklist_id'. You can override using the
## `.groups` argument.
prop.detections_Hill
## # A tibble: 226 × 3
## # Groups: checklist_id [226]
## checklist_id year ave.det
## <dbl> <dbl> <dbl>
## 1 7004559 2010 0
## 2 8617023 2010 0.0227
## 3 8617024 2010 0.0455
## 4 8617025 2010 0.114
## 5 8617026 2010 0.0682
## 6 8617027 2010 0.227
## 7 8617028 2010 0.136
## 8 8617029 2010 0.25
## 9 11243474 2012 0.0455
## 10 11243475 2012 0.0455
## # ℹ 216 more rows
ggplot(prop.detections_Hill, aes(x=year, y=ave.det)) +
geom_jitter() +
geom_vline(xintercept = Hill_install)
detections_Hill_table <- detections_Hill %>% group_by(species) %>%
summarise(tot.det=sum(no.detect))
detections_Hill_table
## # A tibble: 44 × 2
## species tot.det
## <chr> <int>
## 1 amekes 4
## 2 bnhcow 43
## 3 buwtea 0
## 4 comloo 35
## 5 comrav 36
## 6 comyel 172
## 7 coohaw 1
## 8 doccor 0
## 9 easblu 18
## 10 gnwtea 0
## # ℹ 34 more rows
Need to account for number of checklists, therefore look at average number of detections per checklist?