From agency.R code in ptaxsim Gitlab “The levy of each jurisdiction is reported by the Cook County Clerk’s Office. URL here: https://www.cookcountyclerkil.gov/service/tax-extension-and-rates
PTELL = Property Tax Extension Law Limit.
Assessed value as equalized (AVE) = (assessed value * equalization factor)
People can appeal their assessed values (assessed value is 10% of market value for residential)
Taxable Value of property = Equalized Assessed Value (EAV) = “assessed value as equalized”-exemptions
Note:
Exemptions lower tax bills by reducing the equalized assessed value of your property.
Taxing District tax levy / taxable value in the district = District Tax Rate
Number of taxing districts includes any district which has levied at some point in the past and has not been terminated by its governing jurisdiction plus TIFs
Chicago general composite rate does not include special districts such as mosquito abatement, special service areas, or home equity assurance district
Random notes:
Goal: Estimate the revenue elasticity of the property tax base.
Theory: Revenue elasticity of the tax base might be equal to zero at least in expected value (all else being equal).
(%change/%change=elasticity)
Need:
Model notes: you must set robust to either “HC1”, “HC2”, or “HC3” in order to have clustered standard errors
# has EAV values, extensions by agency_num
agency_dt <- DBI::dbGetQuery(
ptaxsim_db_conn,
"SELECT *
FROM agency
"
) %>%
mutate(first6 = str_sub(agency_num,1,6),
first5 = str_sub(agency_num,1,5)
)
# grabs all unique muni names. Would be needed if creating a loop for calculating all munis
# municipality names and their agency number
muni_agency_names <- DBI::dbGetQuery(
ptaxsim_db_conn,
"SELECT DISTINCT agency_num, agency_name, minor_type
FROM agency_info
WHERE minor_type = 'MUNI'
OR agency_num = '020060000'
"
) %>%
mutate(first6 = str_sub(agency_num,1,6),
first5 = str_sub(agency_num,1,5)
) %>%
select(-minor_type)
triads <- read_csv("muni_agency_triads.csv")
# took from one of Josh files. Made pivot table of his agencies and their triad
# then saved the info as a csv on my computer named muni_agency_triads
# all_taxing_agencies %>% filter(minor_type == "TOWNSHIP")
# North <- c("020010000", "020070000", "020080000", "020090000", "020110000","020130000", "020140000", "020150000", "020160000","020170000", "020200000", "020260000", "020290000" )
#
# South <- c("020020000", "020030000","020040000","020050000", "020060000", "020100000", "020120000", "020180000", "020190000", "020210000", "020220000", "020230000","020240000", "020250000", "020270000", "020280000", "020300000")
#
# # Chicago townships include Hyde Park, Jefferson, Lake, Lakeview, North Chicago, Rogers Park, South Chicago, West Chicago.
# Chicago <- c("030210000")
#
# muni_agency_names <- muni_agency_names %>%
# mutate(agency_num = as.character(agency_num) )%>%
# mutate(
# triad = ifelse(agency_num %in% North, "North", "")) %>%
# mutate(
# triad = ifelse(agency_num %in% South, "South", triad),
# triad = ifelse(agency_num %in% Chicago, "Chicago", triad))
muni_agency_names <- muni_agency_names %>% left_join(triads)
table(muni_agency_names$triad)
##
## City North South
## 1 42 83
# all agency names, numbers, and types
# includes TIF and non-TIF agencies
all_taxing_agencies <- DBI::dbGetQuery(
ptaxsim_db_conn,
"SELECT agency_num, agency_name, major_type, minor_type
FROM agency_info
"
)
all_taxing_agencies <- all_taxing_agencies %>%
mutate(first6 = str_sub(agency_num,1,6),
first5 = str_sub(agency_num,1,5)
)
all_taxing_agencies <- all_taxing_agencies %>%
left_join(muni_agency_names, by = c("first6", "first5")) %>%
rename(muni_name = agency_name.y,
muni_num = agency_num.y,
agency_name = agency_name.x,
agency_num = agency_num.x)
agency_dt <- left_join(agency_dt, all_taxing_agencies, by = c("agency_num", "first5", "first6"))
grouped_munis <- agency_dt %>%
mutate(cty_cook_eav = as.numeric(cty_cook_eav), # EAV after exemptions
total_final_levy = as.numeric(total_final_levy), # value that matches Clerk Office PDF for Levy
cty_cook_eav = ifelse(cty_cook_eav < 1, NA, cty_cook_eav), # Code as missing so Log() doesn't make -Inf
total_final_levy = ifelse(total_final_levy <1, NA, total_final_levy)) %>%
group_by(muni_name, year) %>%
summarize(levy_sum = sum(total_final_levy, na.rm = TRUE),
eav = first(cty_cook_eav),
log_levy = log(levy_sum),
log_eav = log(eav),
year=first(year)
)
grouped_munis
table(grouped_munis$year)
##
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
## 134 135 135 135 135 135 135 135 135 135 135 135 135 135 135 135
grouped_munis_rectangle <- grouped_munis %>% filter(!is.na(muni_name) & !is.na(year))
grouped_munis_rectangle
table(grouped_munis_rectangle$year)
##
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
## 133 134 134 134 134 134 134 134 134 134 134 134 134 134 134 134
# All Agencies-Agency & Year FE"
grouped_model<- plm(log_levy ~ log_eav,
index = c("muni_name", "year"),
model = "within",
effect= "twoways",
data = grouped_munis_rectangle)
model_names <- c(grouped_model = "Grouped Munis with Summed Levies")
export_summs(grouped_model, robust = "HC3", model.names = model_names, statistics = c(N = "nobs", R2 = "r.squared"))
plot(grouped_model)
# add taxing agency names and agency type to data table that has eav and extension values
agency_data <- agency_dt %>%
#right_join(agency_dt, all_taxing_agencies, by = c("first6", "first5", "agency_num") ) %>%
mutate(first2 = str_sub(agency_num, 1,2),
last2 = str_sub(agency_num,8,9),
in_SSA = ifelse(minor_type == "SSA", 1,0),
in_chicago = ifelse(str_detect(agency_name, "CHICAGO"),1,0)) %>%
select(-c(cty_dupage_eav:cty_livingston_eav))
agency_data <- agency_data %>%
mutate(cty_cook_eav = as.numeric(cty_cook_eav), # EAV after exemptions
total_final_levy = as.numeric(total_final_levy), # value that matches Clerk Office PDF for Levy
cty_cook_eav = ifelse(cty_cook_eav < 1, NA, cty_cook_eav), # Code as missing so Log() doesn't make -Inf
total_final_levy = ifelse(total_final_levy <1, NA, total_final_levy)) %>%
mutate(log_eav = log(cty_cook_eav),
log_levy = log(total_final_levy))
Simple model was log(levy) ~ log(base) where each observation is an agency-year and the base is the sum of that jurisdiction’s taxable eav. (The taxable eav = equalized assessed value - TIF increments - exemptions)
The coefficients would be interpreted as “a x% change in eav leads to a y% change in levy”, which is also the elasticity.
plm() uses the entity-demeaned OLS algorithm and does not report dummy coefficients.
robust has multiple options: HC0 to HC5. Default is set to “HC3 BUT Stata default is to use”HC1”
Command and package author site for summ() and table formatting
model = “within” is the same as creating dummies for all agencies
# tax base = log(levy)
# estimate the fixed effects regression with plm()
panel_data <- agency_data %>%
filter(minor_type!="TIF") %>%
select(year, agency_name, agency_num, major_type, minor_type,
cty_cook_eav, total_final_levy, log_eav, log_levy, total_final_rate,
in_chicago, in_SSA, triad
)
#write_csv(panel_data, "panel_data_June29AWM.csv")
all_agencies<-pdata.frame(panel_data, index = c("agency_name", "year"))
# All Agencies-Agency & Year FE"
all_m1<- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = all_agencies)
model_names <- c(all_m1 = "All Agencies-Agency & Year FE")
export_summs(all_m1, robust = "HC3", model.names = model_names, statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
| All Agencies-Agency & Year FE | |
|---|---|
| log_eav | 0.30 *** |
| (0.01) | |
| N | 13516 |
| R2 | 0.03 |
| adjR2 | -0.05 |
| Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05. | |
Major vs minor type reminder!!!
If you only want the 134 municipalities within Cook County, then use minor_type == MUNI. We probably want this.
Major_type == Muni includes things paid for by the municipality but separately such as library, health, muni, township, and general assistance taxing agencies.
minortype_munis <- panel_data %>%
filter(agency_num %in% muni_agency_names$agency_num
) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
minortype_munis %>% ggplot()+
geom_point(aes(x=log(cty_cook_eav), y=log(total_final_levy), col=agency_name, alpha = .1)) +
theme(legend.position = "none")+
ggtitle("Minor Type: Logged EAV vs Logged Levy \n Homerule & non-Homerule Together")
minortype_munis %>%
ggplot(aes(x=year,
y= log(total_final_levy)/log(cty_cook_eav),
group = agency_name
) )+
geom_line(alpha = .1)+
ggtitle("134 Municipalities based on Minor Type: \nLogged EAVLogg/ed Levy Over Time \n Homerule & non-Homerule Together")
homerule_minortype_munis <- agency_data %>%
filter(agency_num %in% muni_agency_names$agency_num &
home_rule_ind==1 # Homerule only
) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
hr_muni_minor <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = homerule_minortype_munis)
plot(hr_muni_minor)
# homerule_minortype_munis %>%
# ggplot()+
# geom_point(aes(x=log(cty_cook_eav), y=log(total_final_levy), col=agency_name), alpha=.5) +
# theme(legend.position="none")+
# ggtitle("Homerule Muni: Logged EAV vs Logged Levy")
#
#
homerule_minortype_munis %>%
ggplot(aes(x=year,
y= log(total_final_levy)/log(cty_cook_eav),
group = agency_name, color= minor_type
) )+
geom_line(alpha = .3)+
ggtitle("Homerule Muni: Logged EAV/Logged Levy over Time")
non_homerule_munis <- agency_data %>%
filter(agency_num %in% muni_agency_names$agency_num &
home_rule_ind==0 ) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
nonhr_muni_minor <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = non_homerule_munis)
summary(nonhr_muni_minor)
## Twoways effects Within Model
##
## Call:
## plm(formula = log_levy ~ log_eav, data = non_homerule_munis,
## effect = "twoways", model = "within", index = c("agency_name",
## "year"))
##
## Unbalanced Panel: n = 51, T = 4-16, N = 759
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.13835036 -0.04130222 0.00051839 0.04977790 1.32303696
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log_eav 0.059375 0.095113 0.6243 0.5327
##
## Total Sum of Squares: 22.963
## Residual Sum of Squares: 22.95
## R-Squared: 0.00056283
## Adj. R-Squared: -0.094759
## F-statistic: 0.389696 on 1 and 692 DF, p-value: 0.53266
non_homerule_munis %>%
ggplot(aes(x=year,
y= log(total_final_levy)/log(cty_cook_eav),
group = agency_name, color= minor_type
) )+
geom_line(alpha = .5)+theme(legend.position = "none")+
ggtitle("Non Homerule Municipalities: Logged EAV/Logged Levy over Time")
major_type == “MUNICIPALITY/TOWNSHIP” includes items provided by the municipality but a different taxing agency associated with the “Village of..” or “City of..”.
Need to decide if this matters: Stick with the true minor_type=MUNI or combine major_type=MUNICIPALITY taxing agencies using the first 5 or 6 digits of the taxing agency? Probably the first option but just checking.
homerule_majortype_munis <- agency_data %>%
filter(major_type == "MUNICIPALITY/TOWNSHIP" &
home_rule_ind==1 # Homerule only
) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
hr_muni1 <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = homerule_majortype_munis)
nonhr_muni_major <- agency_data %>%
filter(major_type == "MUNICIPALITY/TOWNSHIP" &
home_rule_ind==0 # Homerule only
) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
nonhr_muni_maj <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = nonhr_muni_major)
majortype: Uses all taxing agencies that have a major_type == “MUNI/TOWNSHIP”. Includes many more taxing agencies than just the 134 muni taxing agencies. Split into homerule and nonhomerule.
Municipality models (minorType) use the 134 municipality taxing agencies from 2006-2021. Split into home rule and non homerule.
model_names <- c("All Agencies", "Homerule Muni (MinorType)", "Non-homerule Munis (MinorType)", "Homerule Muni (MajorType)", "Non-homerule Muni (Major Type)"
)
# coefs <- c("","") # to make names nicer
export_summs(all_m1, hr_muni_minor, nonhr_muni_minor, hr_muni1, nonhr_muni_maj, model.names = model_names, statistics = c(N = "nobs", R2 = "r.squared"))
| All Agencies | Homerule Muni (MinorType) | Non-homerule Munis (MinorType) | Homerule Muni (MajorType) | Non-homerule Muni (Major Type) | |
|---|---|---|---|---|---|
| log_eav | 0.30 *** | 0.03 | 0.06 | 0.35 *** | -0.04 |
| (0.01) | (0.04) | (0.10) | (0.02) | (0.07) | |
| N | 13516 | 1319 | 759 | 4820 | 2383 |
| R2 | 0.03 | 0.00 | 0.00 | 0.06 | 0.00 |
| *** p < 0.001; ** p < 0.01; * p < 0.05. | |||||
#export_summs(m1, m2, m3, model.names = model_names, scale=TRUE, robust = "HC1")
Does not separate home rule and non-homerule taxing agencies in the majortype_munis model.
majortype_munis_filtered excludes townships and SSAs so far.
# All Major Munis
majortype_munis <- agency_data %>%
filter(major_type == "MUNICIPALITY/TOWNSHIP" ) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
majortype_munis <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = majortype_munis)
# All Major Munis: removing SSAs and TOWNSHIPS
majortype_munis_filtered <- agency_data %>%
filter(major_type == "MUNICIPALITY/TOWNSHIP" &
!(minor_type %in% c("TOWNSHIP", "SSA"))
) %>%
select(year, agency_name, agency_num,
major_type, minor_type, cty_cook_eav,
total_final_levy, total_final_rate,
log_eav, log_levy
)
majortype_munis_filtered <- plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = majortype_munis_filtered)
The major type municipal Muni models above still include SSAs and
# Models for Major Muni types. Includes homerule and nonhomerule together!!
# one model removes SSAs and Townships (which is the better model. Included other for comparison.)
model_names <- c("All Taxing Agencies" ,"All Munis (MajorType)", "All Munis (MajorType) - Filtered"
)
# coefs <- c("","") # to make names nicer
export_summs(all_m1, majortype_munis, majortype_munis_filtered, model.names = model_names, statistics = c(N = "nobs", R2 = "r.squared"))
| All Taxing Agencies | All Munis (MajorType) | All Munis (MajorType) - Filtered | |
|---|---|---|---|
| log_eav | 0.30 *** | 0.32 *** | -0.01 |
| (0.01) | (0.02) | (0.04) | |
| N | 13516 | 7203 | 3844 |
| R2 | 0.03 | 0.04 | 0.00 |
| *** p < 0.001; ** p < 0.01; * p < 0.05. | |||
Uses the 5 or 6 digit agency code to combine additional taxing agencies within a municipality together (like libraries, taxing agencies, etc. that are included within the major_type category.)
agency_dt %>%
filter(major_type == "MUNICIPALITY/TOWNSHIP" & year == 2021) %>%
group_by(muni_name) %>% select(muni_name, agency_name, cty_cook_eav, total_final_levy)
# agency_dt %>%
# filter(major_type == "MUNICIPALITY/TOWNSHIP") %>%
# group_by(muni_name) %>%
# summarize(EAV = max(cty_cook_eav),
# Levy = sum(total_final_levy))
Manually coding grouping Muni’s that have shared EAV and share a municipality.
Example: Stickney is a town and a village. The Road and Bridge Stickney taxing agency is for the Township (I think) - it has an eav of 1,226,209,940
#handcoded_munis<-readxl::read_excel("grouped_munis_manual_entry.xlsx")
schools <- agency_data %>%
filter(major_type=="SCHOOL") %>%
select(agency_name, year, agency_num, log_levy, log_eav, minor_type)
# group schools by minor type later!
# model wouldn't work until I selected only a few variables. in line above
table(schools$minor_type)
##
## COMM COLL ELEMENTARY MISC SECONDARY UNIFIED
## 176 1888 29 464 96
schools %>%
ggplot(aes(x =year,
y = log_levy/log_eav,
col = minor_type, label = agency_name))+
geom_point(alpha=.5)# +theme(legend.position = "none")
# based on major_type == Schools. Groups all schools together.
school_mod_major <- plm(log_levy ~ log_eav, index = c("agency_name", "year"),
model = "within", effect= "twoways", data = schools)
school_mod_secondary <- plm(log_levy ~ log_eav,
subset = minor_type == "SECONDARY",
index = c("agency_name", "year"), model = "within",
effect= "twoways", data = schools)
school_mod_unified <- plm(log_levy ~ log_eav,
subset = minor_type == "UNIFIED",
index = c("agency_name", "year"), model = "within",
effect= "twoways", data = schools)
school_mod_elementary <- plm(log_levy ~ log_eav, subset = minor_type == "ELEMENTARY",
index = c("agency_name", "year"), model = "within",
effect= "twoways", data = schools)
school_mod_comcol <- plm(log_levy ~ log_eav, index = c("agency_name", "year"),
subset = minor_type == "COMM COLL", model = "within",
effect= "twoways", data = schools)
model_names = c("All Schools", "Secondary", "Elementary", "Com. Colleges", "Unified" )
export_summs(school_mod_major, school_mod_secondary, school_mod_elementary, school_mod_comcol, school_mod_unified, model.names = model_names, robust=TRUE, statistics = c(N = "nobs", adjR2 = "adj.r.squared"))
| All Schools | Secondary | Elementary | Com. Colleges | Unified | |
|---|---|---|---|---|---|
| log_eav | 0.30 *** | 0.15 *** | 0.36 *** | -0.03 | 0.08 ** |
| (0.02) | (0.04) | (0.02) | (0.08) | (0.03) | |
| N | 2648 | 464 | 1888 | 176 | 96 |
| adjR2 | 0.07 | -0.06 | 0.13 | -0.17 | -0.17 |
| Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05. | |||||
plot(school_mod_elementary)
reassessment years
in reassessment_triad_years.csv
lagged variables
Levy variables exploration:
Taxing District Tax Levy / Taxable Value in a Taxing District = District Tax Rate
Therefore, Levy/TaxRate = TaxableValue
TaxableValue = original EAV - exemptions - TIF increment
EAV exploration:
Summed EAV for each Agency - Michael’s default way of thinking
cty_total_eav = all county EAVs summed together (LaSalle, Will,
Cook, etc.)
cty_cook_eav = eav after exemptions in cook county for a taxing district
Why are the other counties included in there? - Some taxing districts extend outside of cook county. cty_lasalle_eav would have eav that is in LaSalle County.
agency_data %>% ggplot()+
geom_point(aes(x=as.numeric(cty_total_eav), y=as.numeric(cty_cook_eav), col=major_type))+
ggtitle("Total County EAV and Cook County EAV")
agency_data %>% ggplot()+
geom_point(aes(x=log(cty_cook_eav), y=log(total_final_levy), col=major_type, alpha=.1)) +
ggtitle("Logged EAV vs Logged Levy for ALL agencies and all years")
agency_data %>%
filter(year == 2021) %>% #2021 only
# not in Chicago and municipalitys + Cicero
filter(!(agency_num %in% c("030210000", "030210001", "030210002" ) )
& (minor_type == "MUNI" | agency_num =="020060000")) %>%
ggplot(aes(x=cty_cook_eav, y=total_final_levy, col=minor_type, label = agency_name))+
geom_point(aes(alpha=0.4)) +
labs(title = "2021 only: Municipality EAV & Levy")
agency_data %>%
filter(!(agency_num %in% c("030210000", "030210001", "030210002" ) )
& (minor_type == "MUNI" | agency_num =="020060000")) %>%
ggplot(aes(x=cty_cook_eav, y=total_final_levy, color=agency_name, group = agency_name))+
geom_point(alpha=.54) +
theme(legend.position = "none")+
labs(title = "All Years: Municipality EAV & Levy")
agency_data %>%
filter(!(agency_num %in% c("030210000", "030210001", "030210002" ) )
& (minor_type == "MUNI" | agency_num =="020060000")) %>%
ggplot(aes(x=log(cty_cook_eav), y=log(total_final_levy), color=agency_name, group = agency_name))+
geom_point(alpha=.5) +
theme(legend.position = "none")+
labs(title = "Logged Values All Years: Municipality EAV & Levy")
agency_data %>%
filter(year == 2021) %>%
filter(!(agency_num %in% c("030210000", "030210001", "030210002" ) )
& (minor_type == "MUNI" | agency_num =="020060000")) %>%
ggplot(aes(x=cty_cook_eav, y=total_final_levy, col=minor_type, label = agency_name))+
geom_point()+
geom_text(size = 2, vjust=-1.5)
muni_panel <- agency_data %>%
filter(!(agency_num %in% c("030210000", "030210001", "030210002" ) )
& (minor_type == "MUNI" | agency_num =="020060000")) %>%
select(year, agency_name, agency_num, major_type, minor_type, cty_cook_eav, total_final_levy, log_eav, log_levy, total_final_rate
)
plm(log_levy ~ log_eav,
index = c("agency_name", "year"),
model = "within", effect= "twoways",
data = muni_panel) %>%
summary()
## Twoways effects Within Model
##
## Call:
## plm(formula = log_levy ~ log_eav, data = muni_panel, effect = "twoways",
## model = "within", index = c("agency_name", "year"))
##
## Unbalanced Panel: n = 130, T = 5-16, N = 2062
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.79047688 -0.06183549 -0.00078695 0.06597277 0.84162228
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log_eav 0.000687 0.045502 0.0151 0.988
##
## Total Sum of Squares: 53.154
## Residual Sum of Squares: 53.154
## R-Squared: 1.1898e-07
## Adj. R-Squared: -0.075678
## F-statistic: 0.000227956 on 1 and 1916 DF, p-value: 0.98796
Total Final Levy and Cook County EAV values:
agency_data %>% ggplot()+
geom_point(aes(x=total_final_levy, y=cty_cook_eav, col=major_type))+
coord_flip()+
ggtitle("Total Final Levy and Cook County EAV")
agency_data %>%
filter(year == 2021) %>%
filter(minor_type == "MUNI" &
in_chicago == "0") %>% # Chicago was massive outlier in the uppper right corner
ggplot(aes(x=total_final_levy, y=cty_cook_eav, col=major_type, label = agency_name))+
geom_point() +#
coord_flip()+
ggtitle("Municipalities EAV & Levy")
agency_data %>% ggplot()+
geom_point(aes(y=total_final_levy, x=cty_cook_eav, col=minor_type))
agency_data %>%
filter(year== 2021) %>%
ggplot()+
geom_point(aes(y=total_final_levy, x=cty_cook_eav, col=major_type))+
ggtitle("Major Types: Agency EAV vs Final Levy for 2021")
agency_data %>%
filter(year== 2021) %>%
ggplot()+
geom_point(aes(y=log(total_final_levy), x=log(cty_cook_eav), col=minor_type))+
ggtitle("Minor Types: Agency EAV vs Final Levy for 2021")
https://www.econometrics-with-r.org/10-5-tferaaseffer.html
“Similar as for heteroskedasticity, autocorrelation invalidates the usual standard error formulas as well as heteroskedasticity-robust standard errors since these are derived under the assumption that there is no autocorrelation. When there is both heteroskedasticity and autocorrelation so-called heteroskedasticity and autocorrelation-consistent (HAC) standard errors need to be used. Clustered standard errors belong to these type of standard errors. They allow for heteroskedasticity and autocorrelated errors within an entity but not correlation across entities.
As shown in the examples throughout this chapter, it is fairly easy to specify usage of clustered standard errors in regression summaries produced by function like coeftest() in conjunction with vcovHC() from the package sandwich. Conveniently, vcovHC() recognizes panel model objects (objects of class plm) and computes clustered standard errors by default.”
The null hypothesis for the Breusch-Pagan test is homoskedasticity.
#bptest(log_levy~log_eav + factor(agency_num) + factor(year), data = balanced_panel)
Controlling for heteroskedasticity: Robust covariance matrix estimation (Sandwich estimator)
The vcovHC function estimates three
heteroskedasticity-consistent covariance estimators:
• “white1” - for general heteroskedasticity but no serial correlation.
Recommended for random effects.
• “white2” - is “white1” restricted to a common variance within groups.
Recommended for random effects.
• “arellano” - both heteroskedasticity and serial correlation.
Recommended for fixed effects.
#coeftest(m1)
#coeftest(m1, vcovHC(fixed, method = "arellano"))