This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(ggplot2)
package <U+393C><U+3E31>ggplot2<U+393C><U+3E32> was built under R version 3.4.3
library(pscl)
package <U+393C><U+3E31>pscl<U+393C><U+3E32> was built under R version 3.4.3Classes and Methods for R developed in the
Political Science Computational Laboratory
Department of Political Science
Stanford University
Simon Jackman
hurdle and zeroinfl functions by Achim Zeileis
library(MASS)
library(boot)
df <- read.csv("R_file/Thesis_regression/regression_data.csv")
summary(df)
 Number_of_photos Area_sq_meters      Building_y        Impervious.Surface_y
 Min.   :   0.0   Min.   :     28   Min.   :0.0000000   Min.   :0.00000     
 1st Qu.:   0.0   1st Qu.:   1665   1st Qu.:0.0002561   1st Qu.:0.03031     
 Median :   0.0   Median :   5668   Median :0.0122056   Median :0.12311     
 Mean   :  23.6   Mean   :  88741   Mean   :0.0590268   Mean   :0.22096     
 3rd Qu.:   1.0   3rd Qu.:  19985   3rd Qu.:0.0586557   3rd Qu.:0.35174     
 Max.   :3673.0   Max.   :5314076   Max.   :0.9681596   Max.   :0.99255     
 Percentage_of_Water Percentage_of_Wood_Vegetation  Shape_index     num_busstop    
 Min.   :0.000000    Min.   :0.00000               Min.   :1.013   Min.   :  0.00  
 1st Qu.:0.000000    1st Qu.:0.07722               1st Qu.:1.172   1st Qu.:  9.00  
 Median :0.000000    Median :0.24603               Median :1.294   Median : 15.00  
 Mean   :0.004658    Mean   :0.33684               Mean   :1.579   Mean   : 18.75  
 3rd Qu.:0.000000    3rd Qu.:0.54497               3rd Qu.:1.627   3rd Qu.: 25.00  
 Max.   :0.414621    Max.   :1.00000               Max.   :8.143   Max.   :203.00  
  Bike_parking    bart_nearest_dist Sports_facilities num_landmarks      Shore_view    
 Min.   :0.0000   Min.   : 208.9    Min.   :0.0000    Min.   : 0.000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:1275.6    1st Qu.:0.0000    1st Qu.: 0.000   1st Qu.:0.0000  
 Median :0.0000   Median :2251.4    Median :1.0000    Median : 0.000   Median :0.0000  
 Mean   :0.4701   Mean   :2830.3    Mean   :0.5418    Mean   : 1.916   Mean   :0.1594  
 3rd Qu.:1.0000   3rd Qu.:4264.0    3rd Qu.:1.0000    3rd Qu.: 2.000   3rd Qu.:0.0000  
 Max.   :1.0000   Max.   :8853.0    Max.   :1.0000    Max.   :34.000   Max.   :1.0000  
 avg_hgt_blding   std_hgt_blding    area_bld_dens     count_bld_dens    
 Min.   : 2.284   Min.   : 0.7294   Min.   : 0.3245   Min.   :0.001965  
 1st Qu.: 6.628   1st Qu.: 1.6798   1st Qu.:19.1621   1st Qu.:0.131057  
 Median : 7.938   Median : 2.1177   Median :29.1700   Median :0.206534  
 Mean   : 8.917   Mean   : 3.3591   Mean   :28.7228   Mean   :0.217597  
 3rd Qu.: 9.623   3rd Qu.: 2.9181   3rd Qu.:37.2014   3rd Qu.:0.314367  
 Max.   :51.125   Max.   :52.1427   Max.   :62.5545   Max.   :0.515348  
 Percentage_of_Impervious_Surface
 Min.   :0.00000                 
 1st Qu.:0.05634                 
 Median :0.16148                 
 Mean   :0.27999                 
 3rd Qu.:0.44324                 
 Max.   :0.99288                 
help.search("zeroinfl")
m_full <- zeroinfl(Number_of_photos ~ . | ., data = df, dist = "negbin", EM = TRUE)
glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(ml)

Call:
zeroinfl(formula = Number_of_photos ~ Sports_facilities + Shape_index + Percentage_of_Wood_Vegetation | num_busstop + bart_nearest_dist, 
    data = df, dist = "negbin", EM = TRUE)

Pearson residuals:
    Min      1Q  Median      3Q     Max 
-0.3296 -0.2829 -0.2166 -0.1094 10.7113 

Count model coefficients (negbin with log link):
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     0.2726     0.9799   0.278   0.7808    
Sports_facilities               0.7839     0.6009   1.304   0.1921    
Shape_index                     0.8988     0.5317   1.691   0.0909 .  
Percentage_of_Wood_Vegetation   2.8683     1.2866   2.229   0.0258 *  
Log(theta)                     -2.2197     0.2030 -10.936   <2e-16 ***

Zero-inflation model coefficients (binomial with logit link):
                    Estimate Std. Error z value Pr(>|z|)    
(Intercept)        4.9052832  0.9173214   5.347 8.92e-08 ***
num_busstop       -0.1875256  0.0412486  -4.546 5.46e-06 ***
bart_nearest_dist -0.0007636  0.0002122  -3.598  0.00032 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Theta = 0.1086 
Number of iterations in BFGS optimization: 2 
Log-likelihood: -394.5 on 8 Df
summary(m2)

Call:
zeroinfl(formula = Number_of_photos ~ Sports_facilities + Shape_index + count_bld_dens | num_busstop + bart_nearest_dist, data = df, dist = "negbin", 
    EM = TRUE)

Pearson residuals:
    Min      1Q  Median      3Q     Max 
-0.3301 -0.3011 -0.2344 -0.1034 19.6187 

Count model coefficients (negbin with log link):
                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)         0.8828     1.2093   0.730 0.465395    
Sports_facilities   1.9714     0.5310   3.713 0.000205 ***
Shape_index         1.5987     0.6769   2.362 0.018188 *  
count_bld_dens    -11.0427     2.1701  -5.089 3.61e-07 ***
Log(theta)         -2.2165     0.1778 -12.464  < 2e-16 ***

Zero-inflation model coefficients (binomial with logit link):
                    Estimate Std. Error z value Pr(>|z|)    
(Intercept)        5.2670491  1.0951466   4.809 1.51e-06 ***
num_busstop       -0.2226162  0.0563091  -3.953 7.70e-05 ***
bart_nearest_dist -0.0009487  0.0002095  -4.528 5.94e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Theta = 0.109 
Number of iterations in BFGS optimization: 2 
Log-likelihood: -386.8 on 8 Df
AIC_c1
Error: object 'AIC_c1' not found

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHBzY2wpDQpsaWJyYXJ5KE1BU1MpDQpsaWJyYXJ5KGJvb3QpDQpgYGANCg0KYGBge3J9DQpkZiA8LSByZWFkLmNzdigiUl9maWxlL1RoZXNpc19yZWdyZXNzaW9uL3JlZ3Jlc3Npb25fZGF0YS5jc3YiKQ0KYGBgDQpgYGB7cn0NCnN1bW1hcnkoZGYpDQpgYGANCmBgYHtyfQ0KaGVscC5zZWFyY2goInplcm9pbmZsIikNCmBgYA0KDQpgYGB7cn0NCm1sIDwtIHplcm9pbmZsKE51bWJlcl9vZl9waG90b3MgfiBTcG9ydHNfZmFjaWxpdGllcytTaGFwZV9pbmRleCtQZXJjZW50YWdlX29mX1dvb2RfVmVnZXRhdGlvbiB8IG51bV9idXNzdG9wK2JhcnRfbmVhcmVzdF9kaXN0LCBkYXRhID0gZGYsIGRpc3QgPSAibmVnYmluIiwgRU0gPSBUUlVFKQ0KbTIgPC0gemVyb2luZmwoTnVtYmVyX29mX3Bob3RvcyB+IFNwb3J0c19mYWNpbGl0aWVzK1NoYXBlX2luZGV4K2NvdW50X2JsZF9kZW5zIHwgbnVtX2J1c3N0b3ArYmFydF9uZWFyZXN0X2Rpc3QsIGRhdGEgPSBkZiwgZGlzdCA9ICJuZWdiaW4iLCBFTSA9IFRSVUUpDQpgYGANCmBgYHtyfQ0Kc3VtbWFyeShtbCkNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkobTIpDQpgYGANCg0KYGBge3J9DQpBSUNfYzE9LTIqKC0zOTQuNSkrMio1KzIqNSooNSsxKS8oMjUxLTUtMSkNCkFJQ19jMQ0KYGBgDQoNCmBgYHtyfQ0KQUlDX2MyPS0yKigtMzg2LjgpKzIqNSsyKjUqKDUrMSkvKDI1MS01LTEpDQpBSUNfYzINCmBgYA0KDQoNCg0KDQpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ3RybCtBbHQrSSouDQoNCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ3RybCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLg0K