setwd("~/Documents/Winter 2018/Statistics 2")
library(tidyverse)
library(tidyr)
library(haven)
info <- read_dta("~/Documents/Winter 2018/Statistics 2/afghan_fraud.DTA")
install.packages("margins")
Error in install.packages : Updating loaded packages
library("margins")
fraud_lpm = lm(fraud ~ election_viol+ num_closed_polls+ dist2kabul+ elevation,data = info)
vals <- fraud_lpm$fitted.values
range(vals)
[1] 0.05417908 1.40588290
summary(fraud_lpm)
Call:
lm(formula = fraud ~ election_viol + num_closed_polls + dist2kabul +
elevation, data = info)
Residuals:
Min 1Q Median 3Q Max
-0.80772 -0.17030 -0.12534 -0.07245 0.94156
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.061e-02 6.688e-02 0.607 0.5441
election_viol 4.583e-01 2.364e-01 1.938 0.0533 .
num_closed_polls 5.717e-03 9.669e-04 5.912 7.49e-09 ***
dist2kabul 1.151e-05 1.159e-04 0.099 0.9209
elevation 3.386e-05 2.258e-05 1.499 0.1346
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3672 on 382 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.1115, Adjusted R-squared: 0.1022
F-statistic: 11.99 on 4 and 382 DF, p-value: 3.472e-09
# 2a: An increase in the unit of the measure of violence in the district in 2009 is associated with a 4.583e01 increase in the probability of election fraud.
# 2b: See model created. The range of this variability is from 0.05 to 1.41. This is problematic because probability cannot be greater than 1.
fraud_probit <- glm(fraud ~ election_viol+ num_closed_polls+ dist2kabul+ elevation,data = info)
summary(fraud_probit)
Call:
glm(formula = fraud ~ election_viol + num_closed_polls + dist2kabul +
elevation, data = info)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.80772 -0.17030 -0.12534 -0.07245 0.94156
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.061e-02 6.688e-02 0.607 0.5441
election_viol 4.583e-01 2.364e-01 1.938 0.0533 .
num_closed_polls 5.717e-03 9.669e-04 5.912 7.49e-09 ***
dist2kabul 1.151e-05 1.159e-04 0.099 0.9209
elevation 3.386e-05 2.258e-05 1.499 0.1346
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 0.134841)
Null deviance: 57.974 on 386 degrees of freedom
Residual deviance: 51.509 on 382 degrees of freedom
(4 observations deleted due to missingness)
AIC: 329.81
Number of Fisher Scoring iterations: 2
# 2c: The range is from 0 to 1..
# the sign of the coefficient of the number of polling stations in lpm is positive.
# it is still positive in the probit model. The two values are the same. This is expected
# because the linear model is also measuring the effect of a variable on a binary variable.
# 2d:
# e
avg_violence <- mean(info$election_viol, na.rm=TRUE)
partial_effect <- 4.583e-01 * avg_violence
View(avg_violence)
# therefor the partial affect is very small
# f
marg <- margins(fraud_probit)
Error in margins(fraud_probit) : could not find function "margins"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKICB3b3JkX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKCmBgYHtyfQpzZXR3ZCgifi9Eb2N1bWVudHMvV2ludGVyIDIwMTgvU3RhdGlzdGljcyAyIikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoaGF2ZW4pCmluZm8gPC0gcmVhZF9kdGEoIn4vRG9jdW1lbnRzL1dpbnRlciAyMDE4L1N0YXRpc3RpY3MgMi9hZmdoYW5fZnJhdWQuRFRBIikKaW5zdGFsbC5wYWNrYWdlcygibWFyZ2lucyIpCmxpYnJhcnkoIm1hcmdpbnMiKQoKYGBgCgpgYGB7cn0KZnJhdWRfbHBtID0gbG0oZnJhdWQgfiBlbGVjdGlvbl92aW9sKyBudW1fY2xvc2VkX3BvbGxzKyBkaXN0MmthYnVsKyBlbGV2YXRpb24sZGF0YSA9IGluZm8pCnZhbHMgPC0gZnJhdWRfbHBtJGZpdHRlZC52YWx1ZXMKcmFuZ2UodmFscykKc3VtbWFyeShmcmF1ZF9scG0pCgojIDJhOiBBbiBpbmNyZWFzZSBpbiB0aGUgdW5pdCBvZiB0aGUgbWVhc3VyZSBvZiB2aW9sZW5jZSBpbiB0aGUgZGlzdHJpY3QgaW4gMjAwOSBpcyBhc3NvY2lhdGVkIHdpdGggYSA0LjU4M2UwMSBpbmNyZWFzZSBpbiB0aGUgcHJvYmFiaWxpdHkgb2YgZWxlY3Rpb24gZnJhdWQuIAojIDJiOiBTZWUgbW9kZWwgY3JlYXRlZC4gVGhlIHJhbmdlIG9mIHRoaXMgdmFyaWFiaWxpdHkgaXMgZnJvbSAwLjA1IHRvIDEuNDEuIFRoaXMgaXMgcHJvYmxlbWF0aWMgYmVjYXVzZSBwcm9iYWJpbGl0eSBjYW5ub3QgYmUgZ3JlYXRlciB0aGFuIDEuICAKCmBgYApgYGB7cn0KZnJhdWRfcHJvYml0IDwtIGdsbShmcmF1ZCB+IGVsZWN0aW9uX3Zpb2wrIG51bV9jbG9zZWRfcG9sbHMrIGRpc3Qya2FidWwrIGVsZXZhdGlvbixkYXRhID0gaW5mbykKc3VtbWFyeShmcmF1ZF9wcm9iaXQpCgoKCiMgMmM6IFRoZSByYW5nZSBpcyBmcm9tIDAgdG8gMS4uIAoKIyB0aGUgc2lnbiBvZiB0aGUgY29lZmZpY2llbnQgb2YgdGhlIG51bWJlciBvZiBwb2xsaW5nIHN0YXRpb25zIGluIGxwbSBpcyBwb3NpdGl2ZS4gCiMgaXQgaXMgc3RpbGwgcG9zaXRpdmUgaW4gdGhlIHByb2JpdCBtb2RlbC4gVGhlIHR3byB2YWx1ZXMgYXJlIHRoZSBzYW1lLiBUaGlzIGlzIGV4cGVjdGVkIAojIGJlY2F1c2UgdGhlIGxpbmVhciBtb2RlbCBpcyBhbHNvIG1lYXN1cmluZyB0aGUgZWZmZWN0IG9mIGEgdmFyaWFibGUgb24gYSBiaW5hcnkgdmFyaWFibGUuIAoKIyAyZDogCmBgYApgYGB7cn0KIyBlCgphdmdfdmlvbGVuY2UgPC0gbWVhbihpbmZvJGVsZWN0aW9uX3Zpb2wsIG5hLnJtPVRSVUUpCnBhcnRpYWxfZWZmZWN0IDwtIDQuNTgzZS0wMSAqIGF2Z192aW9sZW5jZQpWaWV3KGF2Z192aW9sZW5jZSkKICMgdGhlcmVmb3IgdGhlIHBhcnRpYWwgYWZmZWN0IGlzIHZlcnkgc21hbGwgCmBgYAoKYGBge3J9CiMgZgptYXJnIDwtIG1hcmdpbnMoZnJhdWRfcHJvYml0KQpzdW1tYXJ5KG1hcmcpCiMgMC40NTgzLCAKCiNnCiMgbG1wIGFuZCBmIGdpdmUgdXMgdGhlIHNhbWUgbnVtYmVyLCB3aGljaCBpcyBkaWZmZXJlbnQgZnJvbSBlLiBUaGlzIHJlc3VsdCBpcyBleHBlY3RlZCBiZWNhdXNlIHRoZSBsbXAgYW5kIHByb2JpdCBtb2RlbCBib3RoIGdpdmUgdXMgdGhlIGVzdGltYXRlcyBvZiB0aGUgY29lZmZpY2llbnRzIHRoYXQgdGVsbHMgdXMgaG93IHRoZSB2YXJpYWJsZSBpcyBhc3NvY2lhdGVkIHdpdGggdGhlIHByb2JhYmlsaXR5IG9mIHRoZSBiaW5hcnkgZGVwZW5kZW50IHZhcmlhYmxlIGJlaW5nIGVxdWFsIHRvIDEuIGUsIG9uIHRoZSBvdGhlciBoYW5kLCBnaXZlcyB1cyBhbiBlc3RpbWF0ZSBvZiB0aGUgY29lZmZpY2llbnQgYnkgYXZlcmFnaW5nIHRoZSB2YWx1ZXMgb2YgdGhlIHZhcmlhYmxlcyBmaXJzdCwgd2hpY2ggaXMgYW4gb3Bwb3NpdGUgYXBwcm9hY2guIAoKYGBg