setwd("~/Documents/Winter 2018/Statistics 2")
library(tidyverse)
library(tidyr)
library(haven)
info <- read_dta("~/Documents/Winter 2018/Statistics 2/afghan_fraud.DTA")
install.packages("margins")
Error in install.packages : Updating loaded packages
library("margins")
fraud_lpm = lm(fraud ~ election_viol+ num_closed_polls+ dist2kabul+ elevation,data = info)
vals <- fraud_lpm$fitted.values
range(vals)
[1] 0.05417908 1.40588290
summary(fraud_lpm)

Call:
lm(formula = fraud ~ election_viol + num_closed_polls + dist2kabul + 
    elevation, data = info)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.80772 -0.17030 -0.12534 -0.07245  0.94156 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      4.061e-02  6.688e-02   0.607   0.5441    
election_viol    4.583e-01  2.364e-01   1.938   0.0533 .  
num_closed_polls 5.717e-03  9.669e-04   5.912 7.49e-09 ***
dist2kabul       1.151e-05  1.159e-04   0.099   0.9209    
elevation        3.386e-05  2.258e-05   1.499   0.1346    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3672 on 382 degrees of freedom
  (4 observations deleted due to missingness)
Multiple R-squared:  0.1115,    Adjusted R-squared:  0.1022 
F-statistic: 11.99 on 4 and 382 DF,  p-value: 3.472e-09
# 2a: An increase in the unit of the measure of violence in the district in 2009 is associated with a 4.583e01 increase in the probability of election fraud. 
# 2b: See model created. The range of this variability is from 0.05 to 1.41. This is problematic because probability cannot be greater than 1.  
fraud_probit <- glm(fraud ~ election_viol+ num_closed_polls+ dist2kabul+ elevation,data = info)
summary(fraud_probit)

Call:
glm(formula = fraud ~ election_viol + num_closed_polls + dist2kabul + 
    elevation, data = info)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-0.80772  -0.17030  -0.12534  -0.07245   0.94156  

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      4.061e-02  6.688e-02   0.607   0.5441    
election_viol    4.583e-01  2.364e-01   1.938   0.0533 .  
num_closed_polls 5.717e-03  9.669e-04   5.912 7.49e-09 ***
dist2kabul       1.151e-05  1.159e-04   0.099   0.9209    
elevation        3.386e-05  2.258e-05   1.499   0.1346    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian family taken to be 0.134841)

    Null deviance: 57.974  on 386  degrees of freedom
Residual deviance: 51.509  on 382  degrees of freedom
  (4 observations deleted due to missingness)
AIC: 329.81

Number of Fisher Scoring iterations: 2
# 2c: The range is from 0 to 1.. 
# the sign of the coefficient of the number of polling stations in lpm is positive. 
# it is still positive in the probit model. The two values are the same. This is expected 
# because the linear model is also measuring the effect of a variable on a binary variable. 
# 2d: 
# e
avg_violence <- mean(info$election_viol, na.rm=TRUE)
partial_effect <- 4.583e-01 * avg_violence
View(avg_violence)
 # therefor the partial affect is very small 
# f
marg <- margins(fraud_probit)
Error in margins(fraud_probit) : could not find function "margins"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKICB3b3JkX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKCmBgYHtyfQpzZXR3ZCgifi9Eb2N1bWVudHMvV2ludGVyIDIwMTgvU3RhdGlzdGljcyAyIikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoaGF2ZW4pCmluZm8gPC0gcmVhZF9kdGEoIn4vRG9jdW1lbnRzL1dpbnRlciAyMDE4L1N0YXRpc3RpY3MgMi9hZmdoYW5fZnJhdWQuRFRBIikKaW5zdGFsbC5wYWNrYWdlcygibWFyZ2lucyIpCmxpYnJhcnkoIm1hcmdpbnMiKQoKYGBgCgpgYGB7cn0KZnJhdWRfbHBtID0gbG0oZnJhdWQgfiBlbGVjdGlvbl92aW9sKyBudW1fY2xvc2VkX3BvbGxzKyBkaXN0MmthYnVsKyBlbGV2YXRpb24sZGF0YSA9IGluZm8pCnZhbHMgPC0gZnJhdWRfbHBtJGZpdHRlZC52YWx1ZXMKcmFuZ2UodmFscykKc3VtbWFyeShmcmF1ZF9scG0pCgojIDJhOiBBbiBpbmNyZWFzZSBpbiB0aGUgdW5pdCBvZiB0aGUgbWVhc3VyZSBvZiB2aW9sZW5jZSBpbiB0aGUgZGlzdHJpY3QgaW4gMjAwOSBpcyBhc3NvY2lhdGVkIHdpdGggYSA0LjU4M2UwMSBpbmNyZWFzZSBpbiB0aGUgcHJvYmFiaWxpdHkgb2YgZWxlY3Rpb24gZnJhdWQuIAojIDJiOiBTZWUgbW9kZWwgY3JlYXRlZC4gVGhlIHJhbmdlIG9mIHRoaXMgdmFyaWFiaWxpdHkgaXMgZnJvbSAwLjA1IHRvIDEuNDEuIFRoaXMgaXMgcHJvYmxlbWF0aWMgYmVjYXVzZSBwcm9iYWJpbGl0eSBjYW5ub3QgYmUgZ3JlYXRlciB0aGFuIDEuICAKCmBgYApgYGB7cn0KZnJhdWRfcHJvYml0IDwtIGdsbShmcmF1ZCB+IGVsZWN0aW9uX3Zpb2wrIG51bV9jbG9zZWRfcG9sbHMrIGRpc3Qya2FidWwrIGVsZXZhdGlvbixkYXRhID0gaW5mbykKc3VtbWFyeShmcmF1ZF9wcm9iaXQpCgoKCiMgMmM6IFRoZSByYW5nZSBpcyBmcm9tIDAgdG8gMS4uIAoKIyB0aGUgc2lnbiBvZiB0aGUgY29lZmZpY2llbnQgb2YgdGhlIG51bWJlciBvZiBwb2xsaW5nIHN0YXRpb25zIGluIGxwbSBpcyBwb3NpdGl2ZS4gCiMgaXQgaXMgc3RpbGwgcG9zaXRpdmUgaW4gdGhlIHByb2JpdCBtb2RlbC4gVGhlIHR3byB2YWx1ZXMgYXJlIHRoZSBzYW1lLiBUaGlzIGlzIGV4cGVjdGVkIAojIGJlY2F1c2UgdGhlIGxpbmVhciBtb2RlbCBpcyBhbHNvIG1lYXN1cmluZyB0aGUgZWZmZWN0IG9mIGEgdmFyaWFibGUgb24gYSBiaW5hcnkgdmFyaWFibGUuIAoKIyAyZDogCmBgYApgYGB7cn0KIyBlCgphdmdfdmlvbGVuY2UgPC0gbWVhbihpbmZvJGVsZWN0aW9uX3Zpb2wsIG5hLnJtPVRSVUUpCnBhcnRpYWxfZWZmZWN0IDwtIDQuNTgzZS0wMSAqIGF2Z192aW9sZW5jZQpWaWV3KGF2Z192aW9sZW5jZSkKICMgdGhlcmVmb3IgdGhlIHBhcnRpYWwgYWZmZWN0IGlzIHZlcnkgc21hbGwgCmBgYAoKYGBge3J9CiMgZgptYXJnIDwtIG1hcmdpbnMoZnJhdWRfcHJvYml0KQpzdW1tYXJ5KG1hcmcpCiMgMC40NTgzLCAKCiNnCiMgbG1wIGFuZCBmIGdpdmUgdXMgdGhlIHNhbWUgbnVtYmVyLCB3aGljaCBpcyBkaWZmZXJlbnQgZnJvbSBlLiBUaGlzIHJlc3VsdCBpcyBleHBlY3RlZCBiZWNhdXNlIHRoZSBsbXAgYW5kIHByb2JpdCBtb2RlbCBib3RoIGdpdmUgdXMgdGhlIGVzdGltYXRlcyBvZiB0aGUgY29lZmZpY2llbnRzIHRoYXQgdGVsbHMgdXMgaG93IHRoZSB2YXJpYWJsZSBpcyBhc3NvY2lhdGVkIHdpdGggdGhlIHByb2JhYmlsaXR5IG9mIHRoZSBiaW5hcnkgZGVwZW5kZW50IHZhcmlhYmxlIGJlaW5nIGVxdWFsIHRvIDEuIGUsIG9uIHRoZSBvdGhlciBoYW5kLCBnaXZlcyB1cyBhbiBlc3RpbWF0ZSBvZiB0aGUgY29lZmZpY2llbnQgYnkgYXZlcmFnaW5nIHRoZSB2YWx1ZXMgb2YgdGhlIHZhcmlhYmxlcyBmaXJzdCwgd2hpY2ggaXMgYW4gb3Bwb3NpdGUgYXBwcm9hY2guIAoKYGBg