###Preparation ####Loading the libraries

library(maptools)
## Loading required package: sp
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step

####Setting the working directory In order to establish where the program will look for data, I am going to set the working directory (WD) to a file I have created and labeled “Final” that will contain the .csv files and the requisite files for the map.

setwd("~/Desktop/ANT291WD/Final")

###1. Pretty map ####Provided chunk

library(maptools)
caCounties <- readShapePoly("CA_Counties_TIGER2016.shp") #change the path!
## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read
# NOTE: the above command will only work if the .shp file is in the same
# folder as CA_Counties_TIGER2016.dbf, CA_Counties_TIGER2016.shx...etc You
# can ignore the message: 'Warning message:readShapePoly is deprecated'
length(caCounties) # 58 counties in CA
## [1] 58
plot(caCounties, col = 1:58) ##can take a few seconds

plot(caCounties, col = c("#cf368f",
"#63c346",
"#a651cb",
"#b9c024",
"#5565dc",
"#a4c43b",
"#6b4da9",
"#51cb6f",
"#dd6ccc",
"#4b9b35",
"#9975da",
"#80a028",
"#5586e3",
"#d0b737",
"#5865a6",
"#e8a532",
"#4e96cc",
"#e38421",
"#55c6ea",
"#e56a2b",
"#5bd6c4",
"#e23c5d",
"#3eab69",
"#b72e28",
"#4cb998",
"#e05438",
"#39afb6",
"#ad5715",
"#9d9bde",
"#a1902a",
"#c38dde",
"#588028",
"#964f95",
"#a3b75b",
"#b7395c",
"#79c688",
"#e7729d",
"#2d8346",
"#9e4974",
"#39703c",
"#d38cbc",
"#5d6713",
"#e5726b",
"#368d71",
"#e39552",
"#226a4d",
"#eb936c",
"#689356",
"#9d4d54",
"#acb371",
"#a1502e",
"#56642b",
"#da8485",
"#bb8226",
"#8d8444",
"#d5ae69",
"#7d5e20",
"#b37d4e"))

levels(caCounties$NAME)
##  [1] "Alameda"         "Alpine"          "Amador"         
##  [4] "Butte"           "Calaveras"       "Colusa"         
##  [7] "Contra Costa"    "Del Norte"       "El Dorado"      
## [10] "Fresno"          "Glenn"           "Humboldt"       
## [13] "Imperial"        "Inyo"            "Kern"           
## [16] "Kings"           "Lake"            "Lassen"         
## [19] "Los Angeles"     "Madera"          "Marin"          
## [22] "Mariposa"        "Mendocino"       "Merced"         
## [25] "Modoc"           "Mono"            "Monterey"       
## [28] "Napa"            "Nevada"          "Orange"         
## [31] "Placer"          "Plumas"          "Riverside"      
## [34] "Sacramento"      "San Benito"      "San Bernardino" 
## [37] "San Diego"       "San Francisco"   "San Joaquin"    
## [40] "San Luis Obispo" "San Mateo"       "Santa Barbara"  
## [43] "Santa Clara"     "Santa Cruz"      "Shasta"         
## [46] "Sierra"          "Siskiyou"        "Solano"         
## [49] "Sonoma"          "Stanislaus"      "Sutter"         
## [52] "Tehama"          "Trinity"         "Tulare"         
## [55] "Tuolumne"        "Ventura"         "Yolo"           
## [58] "Yuba"
plot(caCounties, col = c(rep("white", 56), "red"))

###2. Bicycles crossing the bridge ####Importing the data and creating an object

bb <- read.csv("~/Desktop/ANT291WD/Final/bb_bicyclists.csv")
ts(bb$BB_COUNT)
## Time Series:
## Start = 1 
## End = 214 
## Frequency = 1 
##   [1]  606 2021 2807 1674 2375 3324 3887 3353 2942 2253 3415 2520 2959 3679
##  [15] 2225 3084 3423 3342 3019 2886 2718 2810 2657 2640 2685 3666 3535 3190
##  [29] 2952 2612 3174 2609 2640 3468 3211 3253 3401 3066 2465 2854 3510 2054
##  [43] 3304 3368 2625 3386 3766 3356 2467 2296 3170 2718 3048 3506 2929 2860
##  [57] 2563 2853 2917 3264 3507 3114 2751 3191 3821 3123 2074 3331 3560 3492
##  [71] 3346 3130 3598 3893 3423 3274 3291 3685 3637 4693 2822 3088 3688 3144
##  [85] 2710 2676 3332 3279 2945 2866 3234 2609 4960 3657 3497 3344 2560 2676
##  [99] 2673 3296 3317 3297 2810 2543 3276 3157 3216 3421 2988 1903 2297 3387
## [113] 3386 3412 3312 2982 2750 3922 2839 2869 3264 3265 3169 2538 2744 3189
## [127] 3367 2565 3150 2245 2727 1222 2877 3152 1965 2544 3539 2161 3271 2589
## [141] 2882 2199 2687 3065 2840 3287 3148 2994 3468 3244 3249 2169 2751 2565
## [155] 1452 2171 3013 2470 2689 3407 2969 2283 3315 2301 1193 2321 2994 2721
## [169] 2411 2021 1805 2637 2590 1472 1318 4146  836 2400 1567 1892 3182  954
## [183] 2012 1235 1848 1428  898 1426 2596 3409 2983 1247  907 1232 2714 2149
## [197] 1876 2301  488  768  461 1576 1004  804 1064  611  723 2307  151 1648
## [211] 1399 1513  513  183
plot(bb$LOW_T~bb$HIGH_T)

bblm <- glm(bb$LOW_T~bb$HIGH_T)
summary(bblm)
## 
## Call:
## glm(formula = bb$LOW_T ~ bb$HIGH_T)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -11.3923   -2.7417    0.6765    2.6857    8.9105  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.95172    2.11082   1.398    0.163    
## bb$HIGH_T    0.79614    0.02817  28.259   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 18.25266)
## 
##     Null deviance: 18445.3  on 213  degrees of freedom
## Residual deviance:  3869.6  on 212  degrees of freedom
## AIC: 1232.8
## 
## Number of Fisher Scoring iterations: 2
par(mfrow=c(2,2))
plot(bb$BB_COUNT~ bb$HIGH_T)
plot(bb$BB_COUNT~bb$LOW_T)
plot(bb$BB_COUNT~bb$PRECIP)

summary(bb$didRain)
##  NO YES 
## 131  83
83/214
## [1] 0.3878505

I used excel to add another column to the data. Days where PRECIP = 0 are coded as NO, and days where PRECIP > 0 as YES

It rained 39% of the days.

I would imagine that, given Brooklyn is at a northern latitude, lower temperatures will have a greater impact on bike use than high temperatures

highbb <- glm(BB_COUNT~HIGH_T, data=bb)
summary(highbb)
## 
## Call:
## glm(formula = BB_COUNT ~ HIGH_T, data = bb)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2119.02   -479.57     80.14    545.11   2369.44  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -335.486    368.017  -0.912    0.363    
## HIGH_T        40.640      4.912   8.274 1.44e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 554829.8)
## 
##     Null deviance: 155603031  on 213  degrees of freedom
## Residual deviance: 117623914  on 212  degrees of freedom
## AIC: 3441.7
## 
## Number of Fisher Scoring iterations: 2
lowbb <- glm(BB_COUNT~LOW_T, data=bb)
summary(lowbb)
## 
## Call:
## glm(formula = BB_COUNT ~ LOW_T, data = bb)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2469.3   -394.4    150.8    558.6   2415.8  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1003.825    378.119   2.655  0.00854 ** 
## LOW_T         27.024      6.029   4.482 1.21e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 670436.4)
## 
##     Null deviance: 155603031  on 213  degrees of freedom
## Residual deviance: 142132526  on 212  degrees of freedom
## AIC: 3482.3
## 
## Number of Fisher Scoring iterations: 2
tempbb <-  glm(BB_COUNT~LOW_T + HIGH_T, data=bb)
summary(tempbb)
## 
## Call:
## glm(formula = BB_COUNT ~ LOW_T + HIGH_T, data = bb)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1714.24   -468.45     51.82    530.52   2161.67  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -148.17     345.17  -0.429    0.668    
## LOW_T         -63.46      11.18  -5.677 4.51e-08 ***
## HIGH_T         91.16      10.01   9.105  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 483605.6)
## 
##     Null deviance: 155603031  on 213  degrees of freedom
## Residual deviance: 102040790  on 211  degrees of freedom
## AIC: 3413.3
## 
## Number of Fisher Scoring iterations: 2
tempPreciphigh <-  glm(BB_COUNT~HIGH_T + didRain, data=bb)
summary(tempPreciphigh)
## 
## Call:
## glm(formula = BB_COUNT ~ HIGH_T + didRain, data = bb)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1693.71   -427.26     52.12    397.12   2036.79  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  559.107    329.951   1.695   0.0916 .  
## HIGH_T        32.835      4.286   7.661 6.58e-13 ***
## didRainYES  -813.376     91.178  -8.921 2.25e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 404790.7)
## 
##     Null deviance: 155603031  on 213  degrees of freedom
## Residual deviance:  85410845  on 211  degrees of freedom
## AIC: 3375.3
## 
## Number of Fisher Scoring iterations: 2
tpinterhigh <-  glm(BB_COUNT~HIGH_T * didRain, data=bb)
summary(tpinterhigh)
## 
## Call:
## glm(formula = BB_COUNT ~ HIGH_T * didRain, data = bb)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1775.12   -370.59     44.26    380.51   2218.38  
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1350.344    448.525   3.011 0.002926 ** 
## HIGH_T               22.408      5.866   3.820 0.000176 ***
## didRainYES        -2412.833    629.888  -3.831 0.000169 ***
## HIGH_T:didRainYES    21.724      8.467   2.566 0.010997 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 394357.5)
## 
##     Null deviance: 155603031  on 213  degrees of freedom
## Residual deviance:  82815078  on 210  degrees of freedom
## AIC: 3370.7
## 
## Number of Fisher Scoring iterations: 2
plot(tpinterhigh)

If we look at the variables individually, high temperature better explains how many bikers will cross the bridge each day than does low temperature. The resultsa are significant according to the pvalue, but the lower AIC for the high temperature demonstrates its superior explanatory power. The addition of low temperature to high temperature negatively impact the power of the model. When didRain is added to high temperature, it improves the predictive power of the model, and the interaction between high_t and didRain performs the best to predict bb_count. When we plot this, this model shows the best normal Q-Q graph, with tails that don’t stick out too much.

###3. Leaf Mass by Area

LMA <- read.csv("LMA data.csv")
View(LMA)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/
## Resources/modules/R_de.so'' had status 1
levels(LMA$ID)
##  [1] "FP11" "FP15" "FP16" "FP17" "FP21" "FP22" "FP23" "FP24" "FP25" "FP27"
## [11] "FP29" "FP30" "FP32" "FP34" "FP35" "FP37" "FP38" "FP39" "FW11" "FW12"
## [21] "FW15" "FW16" "FW18" "FW19" "FW21" "FW22" "FW24" "FW26" "FW27" "FW28"
## [31] "FW30" "FW38" "FW39" "FW42" "FW44"

If I view the data, I can see that there are 249 individual observations in the data, but with the levels function, I can see there are only 35 total trees.

ggplot(as.data.frame(LMA), aes(x=ID, y= frequency(ID),fill=ID)) + geom_col()

Crazy colors aside, this graph shows that frequency of observation by individual vary from 3 at the least to 8 at the most.

ggplot(LMA, aes(x=ID, y = LMA, fill = species)) + geom_boxplot()

tapply(LMA$LMA, LMA$species, mean)
## Pinus monticola Pinus ponderosa 
##        161.5583        294.5955

Here is a boxplot that shows the LMA per individual. A distinct difference is visible between species. The Average LMA is 161.6 for P. monticola, and 294.6 for P. ponderosa

Here is the mixed effects model I constructed to test the correlation between leaf mass and the distance from the top of the tree. I didn’t know what to use as a random variable, so I tried with height. To be honest, I couldn’t get the model to work no matter what I tried to use as the random variable.

LMAglm <- glm(LMA~dfromtop, data=LMA)
summary(LMAglm)
## 
## Call:
## glm(formula = LMA ~ dfromtop, data = LMA)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -116.14   -60.46   -17.46    62.22   190.57  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 241.4537     7.2035  33.519   <2e-16 ***
## dfromtop     -1.9178     0.7768  -2.469   0.0142 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 5316.346)
## 
##     Null deviance: 1345544  on 248  degrees of freedom
## Residual deviance: 1313138  on 247  degrees of freedom
## AIC: 2846.7
## 
## Number of Fisher Scoring iterations: 2

If I were to have gotten my model to work, it would have looked something like this. here we can see the y intercept of the index species is 241.5, and the intercept of the other species is 1.9 units below that. Because this is not a mixed effects model, the two species have the same slope. In this case, we can see that the difference between the two species is significant.