library(cvms) #plot confusion matrix
library(broom) # tidy()
library(tibble) # tibble()
library(caret) #confusionMatrix
I will read the csv file from the input dataste.
df<- read.csv("Kid.csv")
table(is.na(df))
##
## FALSE
## 11441
names(df)
## [1] "Buy" "Income" "Is.Female" "Is.Married"
## [5] "Has.College" "Is.Professional" "Is.Retired" "Unemployed"
## [9] "Residence.Length" "Dual.Income" "Minors" "Own"
## [13] "House" "Is.USBorn" "English" "Prev.Child.Mag"
## [17] "Prev.Parent.Mag"
df$Buy<- factor(df$Buy, c("0", "1"))
set.seed(100)
train_idx<- sample(dim(df)[1], 0.8*dim(df)[1] )
train<- df[train_idx, ]
test<- df[-train_idx, ]
dim(train)
## [1] 538 17
dim(test)
## [1] 135 17
logreg<- glm(Buy ~ ., data= train, family= "binomial" )
summary (logreg)
##
## Call:
## glm(formula = Buy ~ ., family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.30667 -0.08190 -0.00856 -0.00135 2.70709
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.829e+01 2.600e+00 -7.035 1.99e-12 ***
## Income 2.005e-04 2.723e-05 7.364 1.78e-13 ***
## Is.Female 1.522e+00 5.262e-01 2.892 0.003823 **
## Is.Married -1.599e-01 6.571e-01 -0.243 0.807757
## Has.College -9.530e-02 4.969e-01 -0.192 0.847914
## Is.Professional 2.755e-01 5.322e-01 0.518 0.604727
## Is.Retired -1.189e+00 9.665e-01 -1.230 0.218663
## Unemployed 1.019e+00 4.594e+00 0.222 0.824521
## Residence.Length 3.626e-02 1.642e-02 2.208 0.027268 *
## Dual.Income 1.221e+00 6.267e-01 1.949 0.051326 .
## Minors 1.330e+00 5.397e-01 2.464 0.013732 *
## Own 1.224e+00 6.278e-01 1.950 0.051210 .
## House -8.633e-01 7.041e-01 -1.226 0.220164
## Is.USBorn 2.160e+00 6.482e-01 3.332 0.000862 ***
## English 1.024e+00 1.020e+00 1.004 0.315417
## Prev.Child.Mag 2.057e+00 9.406e-01 2.187 0.028733 *
## Prev.Parent.Mag 7.776e-01 7.416e-01 1.049 0.294407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 498.50 on 537 degrees of freedom
## Residual deviance: 143.15 on 521 degrees of freedom
## AIC: 177.15
##
## Number of Fisher Scoring iterations: 9
df_reduced<- df[, c(1, 2,3,10, 11, 14, 16)]
train_idx<- sample(dim(df_reduced)[1], 0.8*dim(df_reduced)[1] )
train<- df_reduced[train_idx, ]
test<- df_reduced[-train_idx, ]
dim(train)
## [1] 538 7
dim(test)
## [1] 135 7
logreg<- glm(Buy ~ ., data= train, family= "binomial" )
summary (logreg)
##
## Call:
## glm(formula = Buy ~ ., family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.35908 -0.14094 -0.02498 -0.00647 2.64045
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.347e+01 1.578e+00 -8.532 < 2e-16 ***
## Income 1.664e-04 1.895e-05 8.781 < 2e-16 ***
## Is.Female 1.335e+00 4.207e-01 3.173 0.00151 **
## Dual.Income 1.185e+00 4.061e-01 2.918 0.00352 **
## Minors 8.735e-01 4.160e-01 2.100 0.03574 *
## Is.USBorn 1.689e+00 5.531e-01 3.053 0.00227 **
## Prev.Child.Mag 1.499e+00 7.035e-01 2.131 0.03311 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 522.54 on 537 degrees of freedom
## Residual deviance: 174.26 on 531 degrees of freedom
## AIC: 188.26
##
## Number of Fisher Scoring iterations: 8
pr<- predict(logreg, test, type= "response")
pr.classes<- ifelse(pr > 0.5, "1", "0")
pr.classes<- factor(pr.classes, c("0", "1"))
cm<- confusionMatrix(pr.classes, test$Buy)
d_binomial <- tibble("target" = test$Buy,"prediction" = pr.classes)
basic_table <- table(d_binomial)
cfm <- tidy(basic_table)
plot_confusion_matrix(cfm, target_col = "target", prediction_col = "prediction", counts_col = "n")
Logistic regression: StatQuest - Youtube https://www.youtube.com/watch?v=C4N3_XJJ-jU&t=36s
Linear models https://www.statmethods.net/advstats/glm.html
Logistic regression http://www.sthda.com/english/articles/36-classification-methods-essentials/151-logistic-regression-essentials-in-r/ Plotting confusion matrix https://cran.r-project.org/web/packages/cvms/vignettes/Creating_a_confusion_matrix.html