Where to find datasets?: The “datasets” library

library(datasets)

seatbelts <- datasets::Seatbelts

df <- as.data.frame(seatbelts)
# df$Date <- rownames(seatbelts)

df

Load time series modeling library (XTS and Zoo) and using them to decode the data column in the “datasets::Seatbelts” dataset

library(zoo)
library(xts)
# df$Date <- as.yearmon(time(seatbelts))
df$Date <- as.Date(time(seatbelts))

df$law <- as.factor(df$law)
sapply(df,class)
DriversKilled       drivers         front          rear           kms 
    "numeric"     "numeric"     "numeric"     "numeric"     "numeric" 
  PetrolPrice     VanKilled           law          Date 
    "numeric"     "numeric"      "factor"        "Date" 

Exploratory analysis of the difference between classes of law = 1 and law = 0 w.r.t. DriversKilled and VanKilled

boxplot(DriversKilled ~ law, data = df)

t.test(DriversKilled ~ law, data = df) #Establish the statistical significance of the difference of means between law = 1 and law = 0 w.r.t. DriversKilled

    Welch Two Sample t-test

data:  DriversKilled by law
t = 5.1253, df = 29.609, p-value = 1.693e-05
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 15.39892 35.81899
sample estimates:
mean in group 0 mean in group 1 
       125.8698        100.2609 
boxplot(VanKilled ~ law, data = df)

t.test(VanKilled ~ law, data = df) #Establish the statistical significance of the difference of means between law = 1 and law = 0 w.r.t. VanKilled

    Welch Two Sample t-test

data:  VanKilled by law
t = 9.4624, df = 47.965, p-value = 1.508e-12
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 3.474406 5.349366
sample estimates:
mean in group 0 mean in group 1 
       9.585799        5.173913 

Logistic Regression. Use “binomial” to set it as categorical data

model <- glm (formula = law ~ DriversKilled + VanKilled, family = "binomial", data = df)
summary(model)

exp(model$coefficients) # 

Create predictions of the fit logistic model against our original data - each row being an independent evalutation of the model

predictionsOfLawInEffect <- predict(model, newdata = df)
# Categorize the predicted 
predictionsOfLawInEffect_CATEGORICAL <- predictionsOfLawInEffect > -2

print(table(df$law, predictionsOfLawInEffect_CATEGORICAL)) #Confusion matrix
   predictionsOfLawInEffect_CATEGORICAL
    FALSE TRUE
  0   133   36
  1     4   19

Visualize a probability density plot of the predicted odds of the response of law = 1 in the predicted set of evaluated records

ODDS = p / (1 - p); where ODDS = exp(predictionsOfLawInEffect)

=> (1 - p)*ODDS = p

=> p = ODDS/(1+ODDS)

plot(density(exp(predictionsOfLawInEffect))) 

plot(density(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect))))  # plot of the "ODDS" of each row being classified for the law being in effect

print(summary(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect))))
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000343 0.0030743 0.0338102 0.1197917 0.1595388 0.8213848 

Boxplot to find the optimal probability cutoff for dichotimization of the law = 1 status

boxplot(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect)) ~ df$law)

Perform Linear Regression as an example use-case to predicted drivers that died as a function of the law status being in effect

# Linear Regression for REGRESSION of the number of people that died ON the Xs
#  /regressors law-being-in-effect and total number of drivers
# Y = number of people that died
# X = [law, drivers, PetrolPrice]

model2 <- lm (DriversKilled ~ law, data = df)
summary (model2)

Call:
lm(formula = DriversKilled ~ law, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-46.870 -17.870  -5.565  14.130  72.130 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  125.870      1.849  68.082  < 2e-16 ***
law1         -25.609      5.342  -4.794 3.29e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.03 on 190 degrees of freedom
Multiple R-squared:  0.1079,    Adjusted R-squared:  0.1032 
F-statistic: 22.98 on 1 and 190 DF,  p-value: 3.288e-06

Export df to a file:

write.csv(df, row.names = F, file = "britishSeatBeltStudy.csv")
LS0tCnRpdGxlOiAiQ2xhc3NpZmljYXRpb24gYW5kIFJlZ3Jlc3Npb24gQW5hbHlzaXMgdXNpbmcgQnJpdGlzaCBTZWF0YmVsdCBEYXRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBXaGVyZSB0byBmaW5kIGRhdGFzZXRzPzogVGhlICJkYXRhc2V0cyIgbGlicmFyeQpgYGB7cn0KbGlicmFyeShkYXRhc2V0cykKCnNlYXRiZWx0cyA8LSBkYXRhc2V0czo6U2VhdGJlbHRzCgpkZiA8LSBhcy5kYXRhLmZyYW1lKHNlYXRiZWx0cykKIyBkZiREYXRlIDwtIHJvd25hbWVzKHNlYXRiZWx0cykKCnByaW50KGhlYWQoZGYpKQpgYGAKCgoKIyBMb2FkIHRpbWUgc2VyaWVzIG1vZGVsaW5nIGxpYnJhcnkgKFhUUyBhbmQgWm9vKSBhbmQgdXNpbmcgdGhlbSB0byBkZWNvZGUgdGhlIGRhdGEgY29sdW1uIGluIHRoZSAiZGF0YXNldHM6OlNlYXRiZWx0cyIgZGF0YXNldApgYGB7cn0KbGlicmFyeSh6b28pCmxpYnJhcnkoeHRzKQojIGRmJERhdGUgPC0gYXMueWVhcm1vbih0aW1lKHNlYXRiZWx0cykpCmRmJERhdGUgPC0gYXMuRGF0ZSh0aW1lKHNlYXRiZWx0cykpCgpkZiRsYXcgPC0gYXMuZmFjdG9yKGRmJGxhdykKc2FwcGx5KGRmLGNsYXNzKQpgYGAKCiMjIEV4cGxvcmF0b3J5IGFuYWx5c2lzIG9mIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gY2xhc3NlcyBvZiBsYXcgPSAxIGFuZCBsYXcgPSAwIHcuci50LiBEcml2ZXJzS2lsbGVkIGFuZCBWYW5LaWxsZWQKYGBge3J9CmJveHBsb3QoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQp0LnRlc3QoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKSAjRXN0YWJsaXNoIHRoZSBzdGF0aXN0aWNhbCBzaWduaWZpY2FuY2Ugb2YgdGhlIGRpZmZlcmVuY2Ugb2YgbWVhbnMgYmV0d2VlbiBsYXcgPSAxIGFuZCBsYXcgPSAwIHcuci50LiBEcml2ZXJzS2lsbGVkCgpib3hwbG90KFZhbktpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQp0LnRlc3QoVmFuS2lsbGVkIH4gbGF3LCBkYXRhID0gZGYpICNFc3RhYmxpc2ggdGhlIHN0YXRpc3RpY2FsIHNpZ25pZmljYW5jZSBvZiB0aGUgZGlmZmVyZW5jZSBvZiBtZWFucyBiZXR3ZWVuIGxhdyA9IDEgYW5kIGxhdyA9IDAgdy5yLnQuIFZhbktpbGxlZApgYGAKCiMjICBMb2dpc3RpYyBSZWdyZXNzaW9uLiBVc2UgImJpbm9taWFsIiB0byBzZXQgaXQgYXMgY2F0ZWdvcmljYWwgZGF0YQpgYGB7cn0KbW9kZWwgPC0gZ2xtIChmb3JtdWxhID0gbGF3IH4gRHJpdmVyc0tpbGxlZCArIFZhbktpbGxlZCwgZmFtaWx5ID0gImJpbm9taWFsIiwgZGF0YSA9IGRmKQpzdW1tYXJ5KG1vZGVsKQoKZXhwKG1vZGVsJGNvZWZmaWNpZW50cykgIyAKYGBgCgojIyBDcmVhdGUgcHJlZGljdGlvbnMgb2YgdGhlIGZpdCBsb2dpc3RpYyBtb2RlbCBhZ2FpbnN0IG91ciBvcmlnaW5hbCBkYXRhIC0gZWFjaCByb3cgYmVpbmcgYW4gaW5kZXBlbmRlbnQgZXZhbHV0YXRpb24gb2YgdGhlIG1vZGVsCmBgYHtyfQpwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QgPC0gcHJlZGljdChtb2RlbCwgbmV3ZGF0YSA9IGRmKQojIENhdGVnb3JpemUgdGhlIHByZWRpY3RlZCAKcHJlZGljdGlvbnNPZkxhd0luRWZmZWN0X0NBVEVHT1JJQ0FMIDwtIHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCA+IC0yCmBgYAoKCmBgYHtyfQoKcHJpbnQodGFibGUoZGYkbGF3LCBwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3RfQ0FURUdPUklDQUwpKSAjQ29uZnVzaW9uIG1hdHJpeApgYGAKCiMjIFZpc3VhbGl6ZSBhIHByb2JhYmlsaXR5IGRlbnNpdHkgcGxvdCBvZiB0aGUgcHJlZGljdGVkIG9kZHMgb2YgdGhlIHJlc3BvbnNlIG9mIGxhdyA9IDEgaW4gdGhlIHByZWRpY3RlZCBzZXQgb2YgZXZhbHVhdGVkIHJlY29yZHMgCgojIE9ERFMgPSBwIC8gKDEgLSBwKTsgd2hlcmUgT0REUyA9IGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpCiMgPT4gKDEgLSBwKSpPRERTID0gcAojID0+IHAgPSBPRERTLygxK09ERFMpCmBgYHtyfQpwbG90KGRlbnNpdHkoZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkpKSAKcGxvdChkZW5zaXR5KGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpLygxK2V4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpKSkpICAjIHBsb3Qgb2YgdGhlICJPRERTIiBvZiBlYWNoIHJvdyBiZWluZyBjbGFzc2lmaWVkIGZvciB0aGUgbGF3IGJlaW5nIGluIGVmZmVjdApwcmludChzdW1tYXJ5KGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpLygxK2V4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpKSkpCmBgYAoKCiMjIEJveHBsb3QgdG8gZmluZCB0aGUgb3B0aW1hbCBwcm9iYWJpbGl0eSBjdXRvZmYgZm9yIGRpY2hvdGltaXphdGlvbiBvZiB0aGUgbGF3ID0gMSBzdGF0dXMKYGBge3J9CmJveHBsb3QoZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkvKDErZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkpIH4gZGYkbGF3KQpgYGAKCiMjIFBlcmZvcm0gTGluZWFyIFJlZ3Jlc3Npb24gYXMgYW4gZXhhbXBsZSB1c2UtY2FzZSB0byBwcmVkaWN0ZWQgZHJpdmVycyB0aGF0IGRpZWQgYXMgYSBmdW5jdGlvbiBvZiB0aGUgbGF3IHN0YXR1cyBiZWluZyBpbiBlZmZlY3QKYGBge3J9CiMgTGluZWFyIFJlZ3Jlc3Npb24gZm9yIFJFR1JFU1NJT04gb2YgdGhlIG51bWJlciBvZiBwZW9wbGUgdGhhdCBkaWVkIE9OIHRoZSBYcwojICAvcmVncmVzc29ycyBsYXctYmVpbmctaW4tZWZmZWN0IGFuZCB0b3RhbCBudW1iZXIgb2YgZHJpdmVycwojIFkgPSBudW1iZXIgb2YgcGVvcGxlIHRoYXQgZGllZAojIFggPSBbbGF3LCBkcml2ZXJzLCBQZXRyb2xQcmljZV0KCm1vZGVsMiA8LSBsbSAoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQpzdW1tYXJ5IChtb2RlbDIpCgpgYGAKCiMjIEV4cG9ydCBkZiB0byBhIGZpbGU6CmBgYHtyfQp3cml0ZS5jc3YoZGYsIHJvdy5uYW1lcyA9IEYsIGZpbGUgPSAiYnJpdGlzaFNlYXRCZWx0U3R1ZHkuY3N2IikKYGBg