rm = (list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 541390 29.0 1203637 64.3 686382 36.7
## Vcells 985193 7.6 8388608 64.0 1876044 14.4
library(clarify)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(pscl)
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
data <- read.csv("C:/DATA 712/Cleaned_Air_Quality_Count_Data.csv")
head(data)
## Unique.ID Indicator.ID Name Measure
## 1 179772 640 Boiler Emissions- Total SO2 Emissions Number per km2
## 2 179785 640 Boiler Emissions- Total SO2 Emissions Number per km2
## 3 130413 640 Boiler Emissions- Total SO2 Emissions Number per km2
## 4 130412 640 Boiler Emissions- Total SO2 Emissions Number per km2
## 5 130434 640 Boiler Emissions- Total SO2 Emissions Number per km2
## 6 130419 640 Boiler Emissions- Total SO2 Emissions Number per km2
## Measure.Info Geo.Type.Name Geo.Place.Name Time.Period
## 1 number UHF42 Southeast Queens 2015
## 2 number UHF42 Bensonhurst - Bay Ridge 2015
## 3 number UHF42 Coney Island - Sheepshead Bay 2013
## 4 number UHF42 Bensonhurst - Bay Ridge 2013
## 5 number UHF42 Rockaways 2013
## 6 number UHF42 Upper East Side 2013
## Start_Date Data.Value
## 1 01/01/2015 0.3
## 2 01/01/2015 1.2
## 3 01/01/2013 0.9
## 4 01/01/2013 1.7
## 5 01/01/2013 0.0
## 6 01/01/2013 95.0
# Convert categorical variables to factors
data$Geo.Type.Name <- as.factor(data$Geo.Type.Name)
data$Geo.Place.Name <- as.factor(data$Geo.Place.Name)
summary(data)
## Unique.ID Indicator.ID Name Measure
## Min. :130397 Min. :640 Length:288 Length:288
## 1st Qu.:130469 1st Qu.:640 Class :character Class :character
## Median :155129 Median :641 Mode :character Mode :character
## Mean :155129 Mean :641
## 3rd Qu.:179789 3rd Qu.:642
## Max. :179861 Max. :642
##
## Measure.Info Geo.Type.Name Geo.Place.Name
## Length:288 Borough : 30 Bayside - Little Neck : 6
## Class :character Citywide: 6 Bedford Stuyvesant - Crown Heights: 6
## Mode :character UHF42 :252 Bensonhurst - Bay Ridge : 6
## Borough Park : 6
## Bronx : 6
## Brooklyn : 6
## (Other) :252
## Time.Period Start_Date Data.Value
## Min. :2013 Length:288 Min. : 0.00
## 1st Qu.:2013 Class :character 1st Qu.: 0.30
## Median :2014 Mode :character Median : 3.00
## Mean :2014 Mean : 22.05
## 3rd Qu.:2015 3rd Qu.: 23.60
## Max. :2015 Max. :284.70
##
table(data$Geo.Place.Name)
##
## Bayside - Little Neck Bedford Stuyvesant - Crown Heights
## 6 6
## Bensonhurst - Bay Ridge Borough Park
## 6 6
## Bronx Brooklyn
## 6 6
## Canarsie - Flatlands Central Harlem - Morningside Heights
## 6 6
## Chelsea - Clinton Coney Island - Sheepshead Bay
## 6 6
## Crotona -Tremont Downtown - Heights - Slope
## 6 6
## East Flatbush - Flatbush East Harlem
## 6 6
## East New York Flushing - Clearview
## 6 6
## Fordham - Bronx Pk Fresh Meadows
## 6 6
## Gramercy Park - Murray Hill Greenpoint
## 6 6
## Greenwich Village - SoHo High Bridge - Morrisania
## 6 6
## Hunts Point - Mott Haven Jamaica
## 6 6
## Kingsbridge - Riverdale Long Island City - Astoria
## 6 6
## Lower Manhattan Manhattan
## 6 6
## New York City Northeast Bronx
## 6 6
## Pelham - Throgs Neck Port Richmond
## 6 6
## Queens Ridgewood - Forest Hills
## 6 6
## Rockaways South Beach - Tottenville
## 6 6
## Southeast Queens Southwest Queens
## 6 6
## Stapleton - St. George Staten Island
## 6 6
## Sunset Park Union Square - Lower East Side
## 6 6
## Upper East Side Upper West Side
## 6 6
## Washington Heights West Queens
## 6 6
## Williamsburg - Bushwick Willowbrook
## 6 6
hist(data$Data.Value, breaks = 30, main="Distribution of Emissions", xlab="Emissions per km²")
In this analysis, we will determine whether emissions vary by geographic
location or time period.
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
poisson_model <- glm(Data.Value ~ Geo.Place.Name + Time.Period,
family = poisson(link = "log"),
data = data)
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 24.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 36.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 39.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 32.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 225.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 78.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 210.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 117.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 114.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 99.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 256.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 121.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 39.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 72.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 142.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 35.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 100.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 77.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 181.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 32.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 35.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 24.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 67.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 126.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 118.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 269.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 42.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 115.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 82.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 247.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 161.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 24.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 24.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 41.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 204.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 36.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 42.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 284.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 132.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.400000
summary(poisson_model)
##
## Call:
## glm(formula = Data.Value ~ Geo.Place.Name + Time.Period, family = poisson(link = "log"),
## data = data)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 185.55797 25.37722 7.312
## Geo.Place.NameBedford Stuyvesant - Crown Heights 1.04679 0.24184 4.328
## Geo.Place.NameBensonhurst - Bay Ridge 0.88373 0.24734 3.573
## Geo.Place.NameBorough Park 1.13961 0.23904 4.767
## Geo.Place.NameBronx 1.47430 0.23065 6.392
## Geo.Place.NameBrooklyn 0.71668 0.25383 2.823
## Geo.Place.NameCanarsie - Flatlands -0.55207 0.34421 -1.604
## Geo.Place.NameCentral Harlem - Morningside Heights 2.10667 0.22035 9.560
## Geo.Place.NameChelsea - Clinton 3.08016 0.21279 14.475
## Geo.Place.NameConey Island - Sheepshead Bay 0.75199 0.25239 2.980
## Geo.Place.NameCrotona -Tremont 1.85400 0.22376 8.286
## Geo.Place.NameDowntown - Heights - Slope 1.06187 0.24137 4.399
## Geo.Place.NameEast Flatbush - Flatbush 1.12566 0.23945 4.701
## Geo.Place.NameEast Harlem 1.68287 0.22657 7.427
## Geo.Place.NameEast New York 0.24794 0.27762 0.893
## Geo.Place.NameFlushing - Clearview 0.66238 0.25615 2.586
## Geo.Place.NameFordham - Bronx Pk 2.06637 0.22084 9.357
## Geo.Place.NameFresh Meadows 0.49512 0.26396 1.876
## Geo.Place.NameGramercy Park - Murray Hill 3.37592 0.21159 15.955
## Geo.Place.NameGreenpoint 0.51083 0.26318 1.941
## Geo.Place.NameGreenwich Village - SoHo 2.60060 0.21565 12.060
## Geo.Place.NameHigh Bridge - Morrisania 2.09181 0.22053 9.485
## Geo.Place.NameHunts Point - Mott Haven 1.21688 0.23688 5.137
## Geo.Place.NameJamaica 0.17071 0.28246 0.604
## Geo.Place.NameKingsbridge - Riverdale 1.54507 0.22918 6.742
## Geo.Place.NameLong Island City - Astoria 1.15200 0.23868 4.826
## Geo.Place.NameLower Manhattan 2.41506 0.21716 11.121
## Geo.Place.NameManhattan 2.82862 0.21412 13.210
## Geo.Place.NameNew York City 1.07378 0.24100 4.455
## Geo.Place.NameNortheast Bronx 1.12986 0.23932 4.721
## Geo.Place.NamePelham - Throgs Neck 0.89618 0.24690 3.630
## Geo.Place.NamePort Richmond -1.41707 0.47102 -3.008
## Geo.Place.NameQueens 0.43812 0.26688 1.642
## Geo.Place.NameRidgewood - Forest Hills 0.78412 0.25110 3.123
## Geo.Place.NameRockaways -0.63840 0.35392 -1.804
## Geo.Place.NameSouth Beach - Tottenville -1.75354 0.54156 -3.238
## Geo.Place.NameSoutheast Queens -0.31845 0.32065 -0.993
## Geo.Place.NameSouthwest Queens 0.27132 0.27621 0.982
## Geo.Place.NameStapleton - St. George -0.84730 0.37987 -2.231
## Geo.Place.NameStaten Island -1.41707 0.47102 -3.008
## Geo.Place.NameSunset Park 0.19237 0.28108 0.684
## Geo.Place.NameUnion Square - Lower East Side 2.52174 0.21626 11.661
## Geo.Place.NameUpper East Side 3.33190 0.21175 15.735
## Geo.Place.NameUpper West Side 3.30175 0.21186 15.585
## Geo.Place.NameWashington Heights 2.59318 0.21570 12.022
## Geo.Place.NameWest Queens 0.81525 0.24989 3.262
## Geo.Place.NameWilliamsburg - Bushwick 0.89264 0.24702 3.614
## Geo.Place.NameWillowbrook -1.70475 0.53046 -3.214
## Time.Period -0.09147 0.01260 -7.259
## Pr(>|z|)
## (Intercept) 2.63e-13 ***
## Geo.Place.NameBedford Stuyvesant - Crown Heights 1.50e-05 ***
## Geo.Place.NameBensonhurst - Bay Ridge 0.000353 ***
## Geo.Place.NameBorough Park 1.87e-06 ***
## Geo.Place.NameBronx 1.64e-10 ***
## Geo.Place.NameBrooklyn 0.004752 **
## Geo.Place.NameCanarsie - Flatlands 0.108739
## Geo.Place.NameCentral Harlem - Morningside Heights < 2e-16 ***
## Geo.Place.NameChelsea - Clinton < 2e-16 ***
## Geo.Place.NameConey Island - Sheepshead Bay 0.002887 **
## Geo.Place.NameCrotona -Tremont < 2e-16 ***
## Geo.Place.NameDowntown - Heights - Slope 1.09e-05 ***
## Geo.Place.NameEast Flatbush - Flatbush 2.59e-06 ***
## Geo.Place.NameEast Harlem 1.11e-13 ***
## Geo.Place.NameEast New York 0.371808
## Geo.Place.NameFlushing - Clearview 0.009712 **
## Geo.Place.NameFordham - Bronx Pk < 2e-16 ***
## Geo.Place.NameFresh Meadows 0.060693 .
## Geo.Place.NameGramercy Park - Murray Hill < 2e-16 ***
## Geo.Place.NameGreenpoint 0.052262 .
## Geo.Place.NameGreenwich Village - SoHo < 2e-16 ***
## Geo.Place.NameHigh Bridge - Morrisania < 2e-16 ***
## Geo.Place.NameHunts Point - Mott Haven 2.79e-07 ***
## Geo.Place.NameJamaica 0.545604
## Geo.Place.NameKingsbridge - Riverdale 1.57e-11 ***
## Geo.Place.NameLong Island City - Astoria 1.39e-06 ***
## Geo.Place.NameLower Manhattan < 2e-16 ***
## Geo.Place.NameManhattan < 2e-16 ***
## Geo.Place.NameNew York City 8.37e-06 ***
## Geo.Place.NameNortheast Bronx 2.35e-06 ***
## Geo.Place.NamePelham - Throgs Neck 0.000284 ***
## Geo.Place.NamePort Richmond 0.002625 **
## Geo.Place.NameQueens 0.100664
## Geo.Place.NameRidgewood - Forest Hills 0.001792 **
## Geo.Place.NameRockaways 0.071262 .
## Geo.Place.NameSouth Beach - Tottenville 0.001204 **
## Geo.Place.NameSoutheast Queens 0.320631
## Geo.Place.NameSouthwest Queens 0.325967
## Geo.Place.NameStapleton - St. George 0.025714 *
## Geo.Place.NameStaten Island 0.002625 **
## Geo.Place.NameSunset Park 0.493716
## Geo.Place.NameUnion Square - Lower East Side < 2e-16 ***
## Geo.Place.NameUpper East Side < 2e-16 ***
## Geo.Place.NameUpper West Side < 2e-16 ***
## Geo.Place.NameWashington Heights < 2e-16 ***
## Geo.Place.NameWest Queens 0.001105 **
## Geo.Place.NameWilliamsburg - Bushwick 0.000302 ***
## Geo.Place.NameWillowbrook 0.001310 **
## Time.Period 3.90e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 15400.9 on 287 degrees of freedom
## Residual deviance: 7364.2 on 239 degrees of freedom
## AIC: Inf
##
## Number of Fisher Scoring iterations: 6
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
dispersion_test <- dispersiontest(poisson_model)
print(dispersion_test)
##
## Overdispersion test
##
## data: poisson_model
## z = 11.093, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 24.45236
# If overdispersion is present, use Negative Binomial
nb_model <- glm.nb(Data.Value ~ Geo.Place.Name + Time.Period, data = data)
summary(nb_model)
##
## Call:
## glm.nb(formula = Data.Value ~ Geo.Place.Name + Time.Period, data = data,
## init.theta = 0.5252745668, link = log)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 99.76025 171.35753 0.582
## Geo.Place.NameBedford Stuyvesant - Crown Heights 1.04967 0.83257 1.261
## Geo.Place.NameBensonhurst - Bay Ridge 0.88557 0.83420 1.062
## Geo.Place.NameBorough Park 1.14170 0.83177 1.373
## Geo.Place.NameBronx 1.47341 0.82942 1.776
## Geo.Place.NameBrooklyn 0.71914 0.83614 0.860
## Geo.Place.NameCanarsie - Flatlands -0.55011 0.86787 -0.634
## Geo.Place.NameCentral Harlem - Morningside Heights 2.10688 0.82660 2.549
## Geo.Place.NameChelsea - Clinton 3.07733 0.82461 3.732
## Geo.Place.NameConey Island - Sheepshead Bay 0.75418 0.83570 0.902
## Geo.Place.NameCrotona -Tremont 1.85168 0.82752 2.238
## Geo.Place.NameDowntown - Heights - Slope 1.06517 0.83243 1.280
## Geo.Place.NameEast Flatbush - Flatbush 1.12761 0.83189 1.355
## Geo.Place.NameEast Harlem 1.68048 0.82829 2.029
## Geo.Place.NameEast New York 0.25042 0.84366 0.297
## Geo.Place.NameFlushing - Clearview 0.66085 0.83690 0.790
## Geo.Place.NameFordham - Bronx Pk 2.06519 0.82673 2.498
## Geo.Place.NameFresh Meadows 0.49490 0.83931 0.590
## Geo.Place.NameGramercy Park - Murray Hill 3.37362 0.82430 4.093
## Geo.Place.NameGreenpoint 0.51300 0.83903 0.611
## Geo.Place.NameGreenwich Village - SoHo 2.59896 0.82536 3.149
## Geo.Place.NameHigh Bridge - Morrisania 2.09111 0.82665 2.530
## Geo.Place.NameHunts Point - Mott Haven 1.21768 0.83116 1.465
## Geo.Place.NameJamaica 0.17285 0.84528 0.204
## Geo.Place.NameKingsbridge - Riverdale 1.54072 0.82902 1.858
## Geo.Place.NameLong Island City - Astoria 1.15262 0.83168 1.386
## Geo.Place.NameLower Manhattan 2.41593 0.82575 2.926
## Geo.Place.NameManhattan 2.82580 0.82496 3.425
## Geo.Place.NameNew York City 1.07306 0.83236 1.289
## Geo.Place.NameNortheast Bronx 1.13175 0.83185 1.361
## Geo.Place.NamePelham - Throgs Neck 0.89630 0.83408 1.075
## Geo.Place.NamePort Richmond -1.41522 0.92550 -1.529
## Geo.Place.NameQueens 0.43887 0.84022 0.522
## Geo.Place.NameRidgewood - Forest Hills 0.78432 0.83534 0.939
## Geo.Place.NameRockaways -0.63618 0.87175 -0.730
## Geo.Place.NameSouth Beach - Tottenville -1.75188 0.96332 -1.819
## Geo.Place.NameSoutheast Queens -0.31612 0.85878 -0.368
## Geo.Place.NameSouthwest Queens 0.27307 0.84321 0.324
## Geo.Place.NameStapleton - St. George -0.84629 0.88267 -0.959
## Geo.Place.NameStaten Island -1.41522 0.92550 -1.529
## Geo.Place.NameSunset Park 0.19641 0.84477 0.232
## Geo.Place.NameUnion Square - Lower East Side 2.52099 0.82551 3.054
## Geo.Place.NameUpper East Side 3.32670 0.82434 4.036
## Geo.Place.NameUpper West Side 3.29741 0.82437 4.000
## Geo.Place.NameWashington Heights 2.59033 0.82537 3.138
## Geo.Place.NameWest Queens 0.81588 0.83497 0.977
## Geo.Place.NameWilliamsburg - Bushwick 0.89522 0.83409 1.073
## Geo.Place.NameWillowbrook -1.70306 0.95712 -1.779
## Time.Period -0.04886 0.08508 -0.574
## Pr(>|z|)
## (Intercept) 0.560448
## Geo.Place.NameBedford Stuyvesant - Crown Heights 0.207398
## Geo.Place.NameBensonhurst - Bay Ridge 0.288427
## Geo.Place.NameBorough Park 0.169873
## Geo.Place.NameBronx 0.075660 .
## Geo.Place.NameBrooklyn 0.389749
## Geo.Place.NameCanarsie - Flatlands 0.526166
## Geo.Place.NameCentral Harlem - Morningside Heights 0.010808 *
## Geo.Place.NameChelsea - Clinton 0.000190 ***
## Geo.Place.NameConey Island - Sheepshead Bay 0.366820
## Geo.Place.NameCrotona -Tremont 0.025246 *
## Geo.Place.NameDowntown - Heights - Slope 0.200691
## Geo.Place.NameEast Flatbush - Flatbush 0.175264
## Geo.Place.NameEast Harlem 0.042474 *
## Geo.Place.NameEast New York 0.766601
## Geo.Place.NameFlushing - Clearview 0.429740
## Geo.Place.NameFordham - Bronx Pk 0.012489 *
## Geo.Place.NameFresh Meadows 0.555422
## Geo.Place.NameGramercy Park - Murray Hill 4.26e-05 ***
## Geo.Place.NameGreenpoint 0.540919
## Geo.Place.NameGreenwich Village - SoHo 0.001639 **
## Geo.Place.NameHigh Bridge - Morrisania 0.011418 *
## Geo.Place.NameHunts Point - Mott Haven 0.142910
## Geo.Place.NameJamaica 0.837969
## Geo.Place.NameKingsbridge - Riverdale 0.063101 .
## Geo.Place.NameLong Island City - Astoria 0.165780
## Geo.Place.NameLower Manhattan 0.003436 **
## Geo.Place.NameManhattan 0.000614 ***
## Geo.Place.NameNew York City 0.197338
## Geo.Place.NameNortheast Bronx 0.173666
## Geo.Place.NamePelham - Throgs Neck 0.282555
## Geo.Place.NamePort Richmond 0.126231
## Geo.Place.NameQueens 0.601439
## Geo.Place.NameRidgewood - Forest Hills 0.347769
## Geo.Place.NameRockaways 0.465530
## Geo.Place.NameSouth Beach - Tottenville 0.068974 .
## Geo.Place.NameSoutheast Queens 0.712794
## Geo.Place.NameSouthwest Queens 0.746058
## Geo.Place.NameStapleton - St. George 0.337668
## Geo.Place.NameStaten Island 0.126231
## Geo.Place.NameSunset Park 0.816154
## Geo.Place.NameUnion Square - Lower East Side 0.002259 **
## Geo.Place.NameUpper East Side 5.45e-05 ***
## Geo.Place.NameUpper West Side 6.34e-05 ***
## Geo.Place.NameWashington Heights 0.001699 **
## Geo.Place.NameWest Queens 0.328503
## Geo.Place.NameWilliamsburg - Bushwick 0.283145
## Geo.Place.NameWillowbrook 0.075181 .
## Time.Period 0.565751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.5253) family taken to be 1)
##
## Null deviance: 568.62 on 287 degrees of freedom
## Residual deviance: 354.30 on 239 degrees of freedom
## AIC: 2021.5
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.5253
## Std. Err.: 0.0460
##
## 2 x log-likelihood: -1921.5400
set.seed(123)
sim <- sim(poisson_model, n = 1000)
summary(sim)
## Length Class Mode
## sim.coefs 49000 -none- numeric
## coefs 49 -none- numeric
## vcov 2401 -none- numeric
## fit 30 glm list
predicted <- predict(poisson_model,
newdata = data.frame(Geo.Place.Name = "Southeast Queens",
Time.Period = 2015),
type = "response")
print(predicted)
## 1
## 2.544605
set.seed(123)
sim_results <- sim(poisson_model, n = 1000)
# Extract simulated predictions
predicted <- apply(sim_results$sim.coefs, 2, mean) # Average simulated coefficients
print(predicted)
## (Intercept)
## 185.04906995
## Geo.Place.NameBedford Stuyvesant - Crown Heights
## 1.06262704
## Geo.Place.NameBensonhurst - Bay Ridge
## 0.89255109
## Geo.Place.NameBorough Park
## 1.15244241
## Geo.Place.NameBronx
## 1.48482625
## Geo.Place.NameBrooklyn
## 0.72663682
## Geo.Place.NameCanarsie - Flatlands
## -0.53149051
## Geo.Place.NameCentral Harlem - Morningside Heights
## 2.11784450
## Geo.Place.NameChelsea - Clinton
## 3.09068258
## Geo.Place.NameConey Island - Sheepshead Bay
## 0.76328935
## Geo.Place.NameCrotona -Tremont
## 1.86602736
## Geo.Place.NameDowntown - Heights - Slope
## 1.07089197
## Geo.Place.NameEast Flatbush - Flatbush
## 1.13716754
## Geo.Place.NameEast Harlem
## 1.69835767
## Geo.Place.NameEast New York
## 0.25376464
## Geo.Place.NameFlushing - Clearview
## 0.68095587
## Geo.Place.NameFordham - Bronx Pk
## 2.07771871
## Geo.Place.NameFresh Meadows
## 0.50576384
## Geo.Place.NameGramercy Park - Murray Hill
## 3.38546986
## Geo.Place.NameGreenpoint
## 0.51765315
## Geo.Place.NameGreenwich Village - SoHo
## 2.61195880
## Geo.Place.NameHigh Bridge - Morrisania
## 2.10475454
## Geo.Place.NameHunts Point - Mott Haven
## 1.22811915
## Geo.Place.NameJamaica
## 0.18083171
## Geo.Place.NameKingsbridge - Riverdale
## 1.54998106
## Geo.Place.NameLong Island City - Astoria
## 1.16192730
## Geo.Place.NameLower Manhattan
## 2.42719261
## Geo.Place.NameManhattan
## 2.84127331
## Geo.Place.NameNew York City
## 1.08406312
## Geo.Place.NameNortheast Bronx
## 1.13907547
## Geo.Place.NamePelham - Throgs Neck
## 0.90074526
## Geo.Place.NamePort Richmond
## -1.42150526
## Geo.Place.NameQueens
## 0.44588883
## Geo.Place.NameRidgewood - Forest Hills
## 0.79928834
## Geo.Place.NameRockaways
## -0.62652069
## Geo.Place.NameSouth Beach - Tottenville
## -1.72024870
## Geo.Place.NameSoutheast Queens
## -0.29265405
## Geo.Place.NameSouthwest Queens
## 0.27924316
## Geo.Place.NameStapleton - St. George
## -0.84727019
## Geo.Place.NameStaten Island
## -1.39385804
## Geo.Place.NameSunset Park
## 0.21181768
## Geo.Place.NameUnion Square - Lower East Side
## 2.53107536
## Geo.Place.NameUpper East Side
## 3.34378575
## Geo.Place.NameUpper West Side
## 3.31046018
## Geo.Place.NameWashington Heights
## 2.60485197
## Geo.Place.NameWest Queens
## 0.82925574
## Geo.Place.NameWilliamsburg - Bushwick
## 0.89826758
## Geo.Place.NameWillowbrook
## -1.69975272
## Time.Period
## -0.09121935
This Poisson regression model predicts emissions counts based on geographic locations (Geo.Place.Name) and Time.Period.
(Intercept) = 185.049
This is the baseline log count of emissions for the reference category .
Geo.Place.Name[Location] (e.g., Bedford Stuyvesant - Crown Heights = 1.0626)
Each location’s coefficient represents the estimated log change in emissions compared to the reference location.
A positive coefficient means higher emissions than the reference location.
A negative coefficient means lower emissions than the reference location.
Time.Period = -0.0912
This means that for each unit increase in time (e.g., each year), emissions decrease by 0.0912 on the log scale.
In percentage terms:
𝑒 − 0.0912 = 0.9128
A decrease of ~8.7% per year in emissions.
Locations with positive coefficients (e.g., Upper East Side = 3.34) have higher emissions.
Locations with negative coefficients (e.g., Willowbrook = -1.70) have lower emissions.
The Time.Period coefficient suggests that emissions are decreasing over time.