Where to find datasets?: The “datasets” library
library(datasets)
seatbelts <- datasets::Seatbelts
df <- as.data.frame(seatbelts)
# df$Date <- rownames(seatbelts)
df
Load time series modeling library (XTS and Zoo) and using them to decode the data column in the “datasets::Seatbelts” dataset
library(zoo)
library(xts)
# df$Date <- as.yearmon(time(seatbelts))
df$Date <- as.Date(time(seatbelts))
df$law <- as.factor(df$law)
sapply(df,class)
DriversKilled drivers front rear kms
"numeric" "numeric" "numeric" "numeric" "numeric"
PetrolPrice VanKilled law Date
"numeric" "numeric" "factor" "Date"
Exploratory analysis of the difference between classes of law = 1 and law = 0 w.r.t. DriversKilled and VanKilled
boxplot(DriversKilled ~ law, data = df)

t.test(DriversKilled ~ law, data = df) #Establish the statistical significance of the difference of means between law = 1 and law = 0 w.r.t. DriversKilled
Welch Two Sample t-test
data: DriversKilled by law
t = 5.1253, df = 29.609, p-value = 1.693e-05
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
15.39892 35.81899
sample estimates:
mean in group 0 mean in group 1
125.8698 100.2609
boxplot(VanKilled ~ law, data = df)

t.test(VanKilled ~ law, data = df) #Establish the statistical significance of the difference of means between law = 1 and law = 0 w.r.t. VanKilled
Welch Two Sample t-test
data: VanKilled by law
t = 9.4624, df = 47.965, p-value = 1.508e-12
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
3.474406 5.349366
sample estimates:
mean in group 0 mean in group 1
9.585799 5.173913
Logistic Regression. Use “binomial” to set it as categorical data
model <- glm (formula = law ~ DriversKilled + VanKilled, family = "binomial", data = df)
summary(model)
exp(model$coefficients) #
Create predictions of the fit logistic model against our original data - each row being an independent evalutation of the model
predictionsOfLawInEffect <- predict(model, newdata = df)
# Categorize the predicted
predictionsOfLawInEffect_CATEGORICAL <- predictionsOfLawInEffect > -2
print(table(df$law, predictionsOfLawInEffect_CATEGORICAL)) #Confusion matrix
predictionsOfLawInEffect_CATEGORICAL
FALSE TRUE
0 133 36
1 4 19
Visualize a probability density plot of the predicted odds of the response of law = 1 in the predicted set of evaluated records
ODDS = p / (1 - p); where ODDS = exp(predictionsOfLawInEffect)
=> (1 - p)*ODDS = p
=> p = ODDS/(1+ODDS)
plot(density(exp(predictionsOfLawInEffect)))

plot(density(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect)))) # plot of the "ODDS" of each row being classified for the law being in effect

print(summary(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect))))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000343 0.0030743 0.0338102 0.1197917 0.1595388 0.8213848
Boxplot to find the optimal probability cutoff for dichotimization of the law = 1 status
boxplot(exp(predictionsOfLawInEffect)/(1+exp(predictionsOfLawInEffect)) ~ df$law)

Export df to a file:
write.csv(df, row.names = F, file = "britishSeatBeltStudy.csv")
LS0tCnRpdGxlOiAiQ2xhc3NpZmljYXRpb24gYW5kIFJlZ3Jlc3Npb24gQW5hbHlzaXMgdXNpbmcgQnJpdGlzaCBTZWF0YmVsdCBEYXRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBXaGVyZSB0byBmaW5kIGRhdGFzZXRzPzogVGhlICJkYXRhc2V0cyIgbGlicmFyeQpgYGB7cn0KbGlicmFyeShkYXRhc2V0cykKCnNlYXRiZWx0cyA8LSBkYXRhc2V0czo6U2VhdGJlbHRzCgpkZiA8LSBhcy5kYXRhLmZyYW1lKHNlYXRiZWx0cykKIyBkZiREYXRlIDwtIHJvd25hbWVzKHNlYXRiZWx0cykKCnByaW50KGhlYWQoZGYpKQpgYGAKCgoKIyBMb2FkIHRpbWUgc2VyaWVzIG1vZGVsaW5nIGxpYnJhcnkgKFhUUyBhbmQgWm9vKSBhbmQgdXNpbmcgdGhlbSB0byBkZWNvZGUgdGhlIGRhdGEgY29sdW1uIGluIHRoZSAiZGF0YXNldHM6OlNlYXRiZWx0cyIgZGF0YXNldApgYGB7cn0KbGlicmFyeSh6b28pCmxpYnJhcnkoeHRzKQojIGRmJERhdGUgPC0gYXMueWVhcm1vbih0aW1lKHNlYXRiZWx0cykpCmRmJERhdGUgPC0gYXMuRGF0ZSh0aW1lKHNlYXRiZWx0cykpCgpkZiRsYXcgPC0gYXMuZmFjdG9yKGRmJGxhdykKc2FwcGx5KGRmLGNsYXNzKQpgYGAKCiMjIEV4cGxvcmF0b3J5IGFuYWx5c2lzIG9mIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gY2xhc3NlcyBvZiBsYXcgPSAxIGFuZCBsYXcgPSAwIHcuci50LiBEcml2ZXJzS2lsbGVkIGFuZCBWYW5LaWxsZWQKYGBge3J9CmJveHBsb3QoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQp0LnRlc3QoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKSAjRXN0YWJsaXNoIHRoZSBzdGF0aXN0aWNhbCBzaWduaWZpY2FuY2Ugb2YgdGhlIGRpZmZlcmVuY2Ugb2YgbWVhbnMgYmV0d2VlbiBsYXcgPSAxIGFuZCBsYXcgPSAwIHcuci50LiBEcml2ZXJzS2lsbGVkCgpib3hwbG90KFZhbktpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQp0LnRlc3QoVmFuS2lsbGVkIH4gbGF3LCBkYXRhID0gZGYpICNFc3RhYmxpc2ggdGhlIHN0YXRpc3RpY2FsIHNpZ25pZmljYW5jZSBvZiB0aGUgZGlmZmVyZW5jZSBvZiBtZWFucyBiZXR3ZWVuIGxhdyA9IDEgYW5kIGxhdyA9IDAgdy5yLnQuIFZhbktpbGxlZApgYGAKCiMjICBMb2dpc3RpYyBSZWdyZXNzaW9uLiBVc2UgImJpbm9taWFsIiB0byBzZXQgaXQgYXMgY2F0ZWdvcmljYWwgZGF0YQpgYGB7cn0KbW9kZWwgPC0gZ2xtIChmb3JtdWxhID0gbGF3IH4gRHJpdmVyc0tpbGxlZCArIFZhbktpbGxlZCwgZmFtaWx5ID0gImJpbm9taWFsIiwgZGF0YSA9IGRmKQpzdW1tYXJ5KG1vZGVsKQoKZXhwKG1vZGVsJGNvZWZmaWNpZW50cykgIyAKYGBgCgojIyBDcmVhdGUgcHJlZGljdGlvbnMgb2YgdGhlIGZpdCBsb2dpc3RpYyBtb2RlbCBhZ2FpbnN0IG91ciBvcmlnaW5hbCBkYXRhIC0gZWFjaCByb3cgYmVpbmcgYW4gaW5kZXBlbmRlbnQgZXZhbHV0YXRpb24gb2YgdGhlIG1vZGVsCmBgYHtyfQpwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QgPC0gcHJlZGljdChtb2RlbCwgbmV3ZGF0YSA9IGRmKQojIENhdGVnb3JpemUgdGhlIHByZWRpY3RlZCAKcHJlZGljdGlvbnNPZkxhd0luRWZmZWN0X0NBVEVHT1JJQ0FMIDwtIHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCA+IC0yCmBgYAoKCmBgYHtyfQoKcHJpbnQodGFibGUoZGYkbGF3LCBwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3RfQ0FURUdPUklDQUwpKSAjQ29uZnVzaW9uIG1hdHJpeApgYGAKCiMjIFZpc3VhbGl6ZSBhIHByb2JhYmlsaXR5IGRlbnNpdHkgcGxvdCBvZiB0aGUgcHJlZGljdGVkIG9kZHMgb2YgdGhlIHJlc3BvbnNlIG9mIGxhdyA9IDEgaW4gdGhlIHByZWRpY3RlZCBzZXQgb2YgZXZhbHVhdGVkIHJlY29yZHMgCgojIE9ERFMgPSBwIC8gKDEgLSBwKTsgd2hlcmUgT0REUyA9IGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpCiMgPT4gKDEgLSBwKSpPRERTID0gcAojID0+IHAgPSBPRERTLygxK09ERFMpCmBgYHtyfQpwbG90KGRlbnNpdHkoZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkpKSAKcGxvdChkZW5zaXR5KGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpLygxK2V4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpKSkpICAjIHBsb3Qgb2YgdGhlICJPRERTIiBvZiBlYWNoIHJvdyBiZWluZyBjbGFzc2lmaWVkIGZvciB0aGUgbGF3IGJlaW5nIGluIGVmZmVjdApwcmludChzdW1tYXJ5KGV4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpLygxK2V4cChwcmVkaWN0aW9uc09mTGF3SW5FZmZlY3QpKSkpCmBgYAoKCiMjIEJveHBsb3QgdG8gZmluZCB0aGUgb3B0aW1hbCBwcm9iYWJpbGl0eSBjdXRvZmYgZm9yIGRpY2hvdGltaXphdGlvbiBvZiB0aGUgbGF3ID0gMSBzdGF0dXMKYGBge3J9CmJveHBsb3QoZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkvKDErZXhwKHByZWRpY3Rpb25zT2ZMYXdJbkVmZmVjdCkpIH4gZGYkbGF3KQpgYGAKCiMjIFBlcmZvcm0gTGluZWFyIFJlZ3Jlc3Npb24gYXMgYW4gZXhhbXBsZSB1c2UtY2FzZSB0byBwcmVkaWN0ZWQgZHJpdmVycyB0aGF0IGRpZWQgYXMgYSBmdW5jdGlvbiBvZiB0aGUgbGF3IHN0YXR1cyBiZWluZyBpbiBlZmZlY3QKYGBge3J9CiMgTGluZWFyIFJlZ3Jlc3Npb24gZm9yIFJFR1JFU1NJT04gb2YgdGhlIG51bWJlciBvZiBwZW9wbGUgdGhhdCBkaWVkIE9OIHRoZSBYcwojICAvcmVncmVzc29ycyBsYXctYmVpbmctaW4tZWZmZWN0IGFuZCB0b3RhbCBudW1iZXIgb2YgZHJpdmVycwojIFkgPSBudW1iZXIgb2YgcGVvcGxlIHRoYXQgZGllZAojIFggPSBbbGF3LCBkcml2ZXJzLCBQZXRyb2xQcmljZV0KCm1vZGVsMiA8LSBsbSAoRHJpdmVyc0tpbGxlZCB+IGxhdywgZGF0YSA9IGRmKQpzdW1tYXJ5IChtb2RlbDIpCgpgYGAKCiMjIEV4cG9ydCBkZiB0byBhIGZpbGU6CmBgYHtyfQp3cml0ZS5jc3YoZGYsIHJvdy5uYW1lcyA9IEYsIGZpbGUgPSAiYnJpdGlzaFNlYXRCZWx0U3R1ZHkuY3N2IikKYGBg