Basic setup and data prep

For all code, I will use tidyverse and dplyr…

and the data file, “Overall data” calling the dataframe “bsdata.”

To use the data, I will:

create a variable for seasons

bsdata$season <- as.character(bsdata$Date)
bsdata$season [bsdata$Date %in% 0:60000] <- "spring"
bsdata$season [bsdata$Date %in% 60001:123026] <- "fall"
bsdata$season = factor(bsdata$season)

fix building names; i.e., turn Sage.High into Sage and Polk.ctyd into Polk.

Note: This causes an overrepresentation of samples at Polk and Sage… but I can’t remember why…

bsdata$Building = factor(bsdata$Building)

levels(bsdata$Building)[match("Sage.High",levels(bsdata$Building))] <- "Sage"
levels(bsdata$Building)[match("Polk.ctyd",levels(bsdata$Building))] <- "Polk"
levels(bsdata$Building)
##  [1] "ACT"        "ALBEE"      "BLACKHAWK"  "CCE&D"      "CFWC"      
##  [6] "Clow"       "DEMPSEY"    "FDL"        "FLETCHER"   "GRUENHAGEN"
## [11] "Halsey"     "Harrington" "HORIZON"    "LINCOLN"    "Polk"      
## [16] "PRKGRMP"    "RADFORD"    "Reeve"      "Sage"       "SRWC"      
## [21] "unknown"

1. Summary of proportion of systematic samples that observed strikes

strikes.bldg <- 
  bsdata %>% filter (Syst.Opp=="systematic") %>% 
  # filter (sample.confirmed == "yes") %>% 
  select("Building","Year","season", "strike") %>% 
  droplevels()
names(strikes.bldg)
## [1] "Building" "Year"     "season"   "strike"
# "strike" is whether or not that sampling session saw a strike (yes = 1, no = 0)

# To exclude unconfirmed, keep the line
  #    >   filter (sample.confirmed == "yes") %>%   
# To include unconfirmed, put a # at the beginning of that line

1A. Proportions of samples with strikes by building, season, and year

3-way ANOVA with building, year, and season

(https://www.statology.org/three-way-anova-in-r/)

summary(lm(total.table$proportion ~ total.table$Building + total.table$Year + total.table$season))
## 
## Call:
## lm(formula = total.table$proportion ~ total.table$Building + 
##     total.table$Year + total.table$season)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20823 -0.04627 -0.01102  0.04404  0.34659 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.20448    0.06309   3.241 0.002333 ** 
## total.table$BuildingCFWC       -0.03109    0.05589  -0.556 0.581039    
## total.table$BuildingClow        0.03583    0.05589   0.641 0.524919    
## total.table$BuildingHalsey     -0.05592    0.05589  -1.000 0.322805    
## total.table$BuildingHarrington -0.06152    0.11113  -0.554 0.582772    
## total.table$BuildingPolk       -0.03799    0.05400  -0.704 0.485538    
## total.table$BuildingReeve       0.05638    0.05067   1.113 0.272170    
## total.table$BuildingSage        0.01962    0.05067   0.387 0.700518    
## total.table$BuildingSRWC       -0.03969    0.05589  -0.710 0.481537    
## total.table$Year2020            0.02002    0.06946   0.288 0.774540    
## total.table$Year2021            0.05337    0.04769   1.119 0.269410    
## total.table$Year2022            0.02958    0.04718   0.627 0.534129    
## total.table$Year2023            0.02420    0.04671   0.518 0.607059    
## total.table$Year2024            0.02654    0.04961   0.535 0.595477    
## total.table$seasonspring       -0.13825    0.03666  -3.771 0.000503 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09922 on 42 degrees of freedom
## Multiple R-squared:  0.4473, Adjusted R-squared:  0.2631 
## F-statistic: 2.428 on 14 and 42 DF,  p-value: 0.01344

1B. Proportions of samples with strikes by building

summary(aov(total.table$proportion ~ total.table$Building))
##                      Df Sum Sq Mean Sq F value Pr(>F)
## total.table$Building  8 0.1292 0.01615   1.252  0.291
## Residuals            48 0.6189 0.01289

1C. Proportions of samples with strikes by year

summary(aov(total.table$proportion ~ total.table$Year))
##                  Df Sum Sq Mean Sq F value Pr(>F)
## total.table$Year  5 0.0713 0.01427   1.075  0.385
## Residuals        51 0.6767 0.01327

1C. Proportions of samples with strikes by season

t.test(proportion ~ season, data = total.table)
## 
##  Welch Two Sample t-test
## 
## data:  proportion by season
## t = 3.1628, df = 12.966, p-value = 0.007507
## alternative hypothesis: true difference in means between group fall and group spring is not equal to 0
## 95 percent confidence interval:
##  0.04823658 0.25633114
## sample estimates:
##   mean in group fall mean in group spring 
##           0.23911652           0.08683266

2. Pre v post dots

This is a difficult thing to measure.

2A. Only Sage and door 1

First, we have to only look at Sage and then only at Door 1. (We could also pick a random door at other buildings for comparison but that’s a bit dodgy.). Unfortunately, the only Sage data that have door numbers are 2021 and later. This is when the dots were installed. We can, however, look at strikes in 2021 and beyond.

build.sage <- bsdata %>% filter (Building=="Sage") %>%  droplevels()
#levels(build.sage$Building)
build.sage$Door.no = factor(build.sage$Door.no)
build.sage$Year = factor(build.sage$Year)
build.sage = build.sage %>% filter(!is.na(Door.no))
build.sage = build.sage %>% filter(Door.no == 1) %>% droplevels()
levels(build.sage$Door.no)
## [1] "1"
names(build.sage)
##  [1] "Obs.no"              "Year"                "julian.day"         
##  [4] "Date.full"           "Time"                "Observer"           
##  [7] "Building"            "Location"            "Door.no"            
## [10] "Side"                "strike"              "Carcass"            
## [13] "Common.name"         "Abbr"                "Family"             
## [16] "Vegetation"          "Notes"               "No.photos"          
## [19] "link.to.photos"      "Syst.Opp"            "sample.confirmed"   
## [22] "sample.confirmed.by" "spp.confirm"         "spp.confirm.by"     
## [25] "Data.entry.notes"    "season"
build.sage = build.sage %>% select(Year, strike, season) 
names(build.sage)
## [1] "Year"   "strike" "season"
build.sage.count = build.sage %>% count(strike, Year, season)

#build.sage.count
ggplot(build.sage.count, aes(x=Year, y=n)) + 
  geom_col(aes(fill = season) ,position = "dodge2")

### 2B. Comparing all buildings pre and post dots

t-test for proportion as function of pre and post; though I haven’t tested any assumptions…

t.test(pre.v.post$proportion ~ pre.v.post$pre.post)
## 
##  Welch Two Sample t-test
## 
## data:  pre.v.post$proportion by pre.v.post$pre.post
## t = -2.1031, df = 32.219, p-value = 0.04335
## alternative hypothesis: true difference in means between group pre and group post is not equal to 0
## 95 percent confidence interval:
##  -0.107430519 -0.001731521
## sample estimates:
##  mean in group pre mean in group post 
##         0.07379889         0.12837991
wilcox.test(pre.v.post$proportion ~ pre.v.post$pre.post)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  pre.v.post$proportion by pre.v.post$pre.post
## W = 133, p-value = 0.1046
## alternative hypothesis: true location shift is not equal to 0

3. Heatmaps

The next three graphs are heat maps that might not be helpful but here they are. For each, the color represents proportion of samples that had strikes with the darker colors having a greater proportion.

4. Relationship between strikes and direction (“Side”)

5. Families represented in the systematic samples

I created a data frame, “family.data”, with only columns for building, year, strike, family, and season - omitting all non-strike samples.

I removed unknowns and needed from data frame.

##  [1] "Bombycillidae" "Cardinalidae"  "Columbidae"    "Fringillidae" 
##  [5] "Mimidae"       "Paridae"       "Parulidae"     "Passerelidae" 
##  [9] "Picidae"       "Regulidae"     "Sturnidae"     "Trochilidae"  
## [13] "Turdidae"
ggplot(family.data, aes(x=Family, y=strike.family)) + 
  geom_col() + 
  theme(axis.text.x = element_text(angle = 90)) 

5A. Families per building

##                
##                 ACT CFWC Clow Halsey Polk Reeve Sage SRWC
##   Bombycillidae   0    0    0      1    0     0    0    0
##   Cardinalidae    0    0    0      1    0     0    0    0
##   Columbidae      0    1    0      0    0     0    0    0
##   Fringillidae    0    2    0      0    0     0    0    0
##   Mimidae         0    1    0      0    0     0    0    1
##   Paridae         0    0    1      0    0     0    0    0
##   Parulidae       0    1    1      0    2     1    0    0
##   Passerelidae    1    0    0      0    0     1    0    0
##   Picidae         0    0    1      0    1     0    0    0
##   Regulidae       0    0    2      0    0     2    0    0
##   Sturnidae       0    0    0      0    0     1    0    0
##   Trochilidae     0    0    0      0    0     2    1    1
##   Turdidae        0    0    0      0    2     0    0    0

5B. The different families represented in the samples per year.

##                
##                 2021 2022 2023 2024
##   Bombycillidae    0    0    0    1
##   Cardinalidae     0    0    0    1
##   Columbidae       0    0    1    0
##   Fringillidae     0    0    2    0
##   Mimidae          0    0    0    2
##   Paridae          0    0    1    0
##   Parulidae        1    3    0    1
##   Passerelidae     0    1    0    1
##   Picidae          0    0    1    1
##   Regulidae        0    1    3    0
##   Sturnidae        0    1    0    0
##   Trochilidae      0    1    1    2
##   Turdidae         0    0    1    1

5C. The different families represented in the samples per season.

##                
##                 fall spring
##   Bombycillidae    0      1
##   Cardinalidae     0      1
##   Columbidae       1      0
##   Fringillidae     0      2
##   Mimidae          0      2
##   Paridae          1      0
##   Parulidae        0      5
##   Passerelidae     1      1
##   Picidae          1      1
##   Regulidae        0      4
##   Sturnidae        0      1
##   Trochilidae      1      3
##   Turdidae         1      1

6. Change in proportion of systematic samples over the course of a season

I used Julian day instead of date to make things easier to code.

6A. Spring

julian.day.data.spring <- bsdata %>% filter (julian.day != "NA") %>% select("Building","Year","season", "strike", "julian.day") %>% droplevels()

julian.day.data.spring$jd.season = factor(julian.day.data.spring$season)


julian.day.data.spring <- filter(julian.day.data.spring, jd.season == "spring") %>% droplevels()
#levels(julian.day.data.spring$jd.season)
julian.day.data.spring$strike = as.integer(julian.day.data.spring$strike)
## # A tibble: 179 × 4
## # Groups:   julian.day, Year [179]
##    julian.day  Year season JD.strikes
##         <int> <int> <fct>       <int>
##  1        104  2020 spring          0
##  2        106  2020 spring          0
##  3        106  2024 spring          2
##  4        107  2021 spring          1
##  5        107  2023 spring          2
##  6        107  2024 spring          1
##  7        108  2020 spring          2
##  8        108  2021 spring          0
##  9        108  2023 spring          0
## 10        109  2021 spring          0
## # ℹ 169 more rows
#  labs(x="Familes",y="strikes",title="Julian day")
JD.table.spring$julian.year = factor(JD.table.spring$Year)
ggplot(JD.table.spring,(aes(x=julian.day,y=JD.strikes,group=julian.year,color=julian.year)))+
  geom_line()

6B. Spring

julian.day.data.fall <- bsdata %>% filter (julian.day != "NA") %>% select("Building","Year","season", "strike", "julian.day") %>% droplevels()

julian.day.data.fall$jd.season = factor(julian.day.data.fall$season)


julian.day.data.fall <- filter(julian.day.data.fall, jd.season == "fall") %>% droplevels()
#levels(julian.day.data.fall$jd.season)
julian.day.data.fall$strike = as.integer(julian.day.data.fall$strike)

STUFF NOT DONE YET…

Table with totals strikes = JD.table Table with samples = JD.total.table

Table with proportions

Change in strikes on Sage based on door

  1. Filter bsdata so only Sage

  2. Filter strikes.Sage so it is only systematic data.

  • This section can also control whether unconfirmed samples are included.
  1. Filter strikes.Sage so it is only Door.no 1

  2. Filter strikes.Sage so it only has columns for year, strike, and season.

E. Create table based on strikes.Sage with number of strikes at Sage per year, season

Unused code

This time the strikes are separated out by year; again, the colors represent years and the shapes represent seasons.

From family data

family x season

ggplot(FxS, aes(FxS.family, FxS.season, fill= FxScounts)) + 
  geom_tile()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_gradient2(low="lavender", mid = "orchid", high="slateblue", guide="colorbar", midpoint = (max(FxYcounts)/2))+
  labs(x="Familes",y="Seasons",title="Number of strikes per season per taxonomic family")