Dataset me

library(mlbench)     # for PimaIndians dataset
library(dplyr)       # for data manipulation (dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(broom)       # for making model summary tidy
library(margins)     # to calculate Average Marginal Effects
library(rcompanion)  # to calculate pseudo R2
library(ROCR)        # to compute and plot Reciever Opering Curve

## 
## Attaching package: 'ROCR'

## The following object is masked from 'package:margins':
## 
##     prediction

dataset<-read.csv("D:/Rencana Tugas Akhir/DLI.csv",sep=";")
dataset

##     X.Y. X.X1. X.X2. X.X3. X.X4. X.X5. X.X6. X.X7.
## 1      1     0     0     2     1     1     1     0
## 2      0     1     0     2     1     0     1     0
## 3      0     1     0     2     0     1     0     0
## 4      0     1     0     2     0     0     1     0
## 5      1     0     0     2     1     1     1     0
## 6      0     1     1     1     1     1     1     2
## 7      0     0     1     0     0     0     1     0
## 8      1     0     0     1     0     1     0     0
## 9      0     1     0     2     1     0     1     0
## 10     0     1     0     2     0     0     1     2
## 11     1     0     1     2     1     1     1     1
## 12     1     0     0     2     1     1     0     1
## 13     0     1     1     1     1     0     1     1
## 14     0     1     0     2     0     0     1     0
## 15     0     0     0     2     0     0     1     0
## 16     0     0     0     0     0     1     1     2
## 17     0     0     1     2     0     0     0     0
## 18     0     1     0     2     0     0     1     0
## 19     1     1     1     2     1     0     1     0
## 20     1     1     0     0     1     0     1     0
## 21     0     1     1     0     0     0     1     0
## 22     0     1     0     0     0     0     0     1
## 23     0     1     1     2     0     1     1     0
## 24     0     0     1     2     1     0     1     0
## 25     0     1     1     2     1     0     1     0
## 26     0     1     0     2     1     1     1     1
## 27     1     1     1     2     1     1     1     1
## 28     1     0     1     1     1     1     1     2
## 29     0     1     0     0     0     1     1     2
## 30     0     0     1     2     1     1     0     0
## 31     0     1     0     2     0     1     1     0
## 32     0     0     1     2     0     0     1     1
## 33     1     0     0     2     1     1     1     2
## 34     1     1     0     2     1     1     1     2
## 35     0     1     1     0     0     1     1     0
## 36     0     0     1     2     1     0     1     1
## 37     0     1     0     2     0     0     1     1
## 38     1     1     1     2     0     0     1     1
## 39     0     0     1     2     0     0     1     0
## 40     0     1     1     2     0     0     1     1
## 41     0     1     0     2     0     0     1     0
## 42     0     1     1     2     0     0     1     1
## 43     0     0     0     2     0     0     1     0
## 44     0     1     0     2     1     1     1     0
## 45     1     0     1     1     0     0     1     2
## 46     0     1     0     2     0     0     1     0
## 47     0     0     0     2     1     1     1     0
## 48     0     1     1     2     1     0     1     2
## 49     0     0     0     0     0     0     0     0
## 50     0     0     0     2     0     0     1     0
## 51     0     0     1     2     0     0     0     1
## 52     1     1     1     2     0     0     1     0
## 53     0     0     0     0     0     0     1     0
## 54     0     1     0     2     1     1     1     0
## 55     0     0     1     2     0     0     1     0
## 56     0     0     0     2     0     1     1     0
## 57     0     1     1     0     0     0     1     1
## 58     0     1     0     2     0     1     1     0
## 59     0     1     0     2     0     0     1     0
## 60     0     0     1     0     0     0     1     0
## 61     0     0     1     2     0     0     1     2
## 62     0     1     1     2     1     0     1     0
## 63     0     0     1     2     0     0     1     2
## 64     1     0     1     0     0     0     0     2
## 65     0     1     1     0     0     0     0     1
## 66     0     0     0     2     1     1     1     0
## 67     0     1     0     0     0     1     0     0
## 68     0     0     0     2     0     0     1     2
## 69     0     0     1     2     0     0     1     0
## 70     0     1     0     1     0     0     0     0
## 71     1     0     0     1     0     0     1     2
## 72     0     0     1     2     0     1     1     0
## 73     0     0     1     2     0     0     1     0
## 74     0     1     1     2     0     0     1     2
## 75     0     0     1     1     0     1     1     0
## 76     0     1     0     2     0     1     1     0
## 77     0     1     1     2     0     0     1     0
## 78     0     0     1     2     0     0     1     1
## 79     0     0     1     0     0     0     0     1
## 80     0     0     0     2     1     0     1     2
## 81     0     1     0     2     1     0     1     0
## 82     0     1     1     0     0     0     1     0
## 83     0     1     1     2     0     0     1     0
## 84     1     0     1     1     0     0     0     2
## 85     0     1     0     0     0     0     0     0
## 86     0     0     0     2     1     0     1     2
## 87     0     1     1     0     1     0     0     0
## 88     0     1     0     2     1     1     1     1
## 89     0     0     1     2     1     0     1     0
## 90     1     1     1     0     0     1     1     2
## 91     0     1     1     2     0     0     1     2
## 92     0     0     1     2     0     1     1     1
## 93     0     1     0     2     0     0     1     2
## 94     0     1     1     2     1     0     1     0
## 95     0     1     1     2     0     0     1     0
## 96     0     0     0     2     0     0     1     0
## 97     0     0     0     2     1     0     0     2
## 98     0     0     0     1     0     0     1     0
## 99     0     0     1     1     0     1     0     0
## 100    0     0     0     2     0     0     1     0

glimpse(dataset)

## Rows: 100
## Columns: 8
## $ X.Y.  <int> 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0~
## $ X.X1. <int> 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1~
## $ X.X2. <int> 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0~
## $ X.X3. <int> 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 0, 0, 0~
## $ X.X4. <int> 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0~
## $ X.X5. <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0~
## $ X.X6. <int> 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0~
## $ X.X7. <int> 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1~

# Total number of rows in the Diabetes data frame
n <- nrow(dataset)
n

## [1] 100

# Number of rows for the training set (80% of the dataset)
n_train <- round(0.80 * n)
# Create a vector of indices which is an 80% random sample
set.seed(123)
train_indices <- sample(1:n, n_train)
# Subset the Diabetes data frame to training indices only
train <- dataset[train_indices, ]
# Exclude the training indices to create the test set
test <- dataset[-train_indices, ]
test

##    X.Y. X.X1. X.X2. X.X3. X.X4. X.X5. X.X6. X.X7.
## 1     1     0     0     2     1     1     1     0
## 2     0     1     0     2     1     0     1     0
## 10    0     1     0     2     0     0     1     2
## 11    1     0     1     2     1     1     1     1
## 24    0     0     1     2     1     0     1     0
## 28    1     0     1     1     1     1     1     2
## 35    0     1     1     0     0     1     1     0
## 37    0     1     0     2     0     0     1     1
## 44    0     1     0     2     1     1     1     0
## 45    1     0     1     1     0     0     1     2
## 48    0     1     1     2     1     0     1     2
## 52    1     1     1     2     0     0     1     0
## 55    0     0     1     2     0     0     1     0
## 56    0     0     0     2     0     1     1     0
## 59    0     1     0     2     0     0     1     0
## 68    0     0     0     2     0     0     1     2
## 74    0     1     1     2     0     0     1     2
## 81    0     1     0     2     1     0     1     0
## 88    0     1     0     2     1     1     1     1
## 98    0     0     0     1     0     0     1     0

paste("train sample size: ", dim(train)[1])

## [1] "train sample size:  80"

paste("test sample size: ", dim(test)[1])

## [1] "test sample size:  20"

model_biner<-glm(X.Y.~ X.X1.+ X.X2.+X.X3.+X.X4.+X.X5.+X.X6.+X.X7.,family=binomial,train,)
summary(model_biner)

## 
## Call:
## glm(formula = X.Y. ~ X.X1. + X.X2. + X.X3. + X.X4. + X.X5. + 
##     X.X6. + X.X7., family = binomial, data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5443  -0.5604  -0.3421  -0.2116   2.4201  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -2.29898    1.05315  -2.183   0.0290 *
## X.X1.       -0.15206    0.73701  -0.206   0.8365  
## X.X2.        0.07293    0.71531   0.102   0.9188  
## X.X3.       -0.39269    0.47888  -0.820   0.4122  
## X.X4.        1.34505    0.78850   1.706   0.0880 .
## X.X5.        1.05194    0.71721   1.467   0.1425  
## X.X6.       -0.62434    0.81836  -0.763   0.4455  
## X.X7.        0.91447    0.40346   2.267   0.0234 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 71.007  on 79  degrees of freedom
## Residual deviance: 56.804  on 72  degrees of freedom
## AIC: 72.804
## 
## Number of Fisher Scoring iterations: 5

nagelkerke(model_biner)

## $Models
##                                                                                            
## Model: "glm, X.Y. ~ X.X1. + X.X2. + X.X3. + X.X4. + X.X5. + X.X6. + X.X7., binomial, train"
## Null:  "glm, X.Y. ~ 1, binomial, train"                                                    
## 
## $Pseudo.R.squared.for.model.vs.null
##                              Pseudo.R.squared
## McFadden                             0.200023
## Cox and Snell (ML)                   0.162671
## Nagelkerke (Cragg and Uhler)         0.276486
## 
## $Likelihood.ratio.test
##  Df.diff LogLik.diff  Chisq  p.value
##       -7     -7.1015 14.203 0.047686
## 
## $Number.of.observations
##          
## Model: 80
## Null:  80
## 
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
## 
## $Warnings
## [1] "None"

#plot(model_biner)
return

## .Primitive("return")

#predict the test dataset
pred <- predict(model_biner, test, type="response")
predicted <- round(pred) # round of the value; >0.5 will convert to 
# 1 else 0
# Creating a contigency table
tab <- table(predicted=predicted, Reference = test$X.Y.)
tab

##          Reference
## predicted  0  1
##         0 15  4
##         1  0  1

accuracy(tab)

## $Models
##   Call           
## 1 "Not supported"
## 2 "Not supported"
## 3 "Not supported"
## 4 "Not supported"
## 
## $Fit.criteria
##   Min.max.accuracy MAE MAPE MSE RMSE NRMSE.mean NRMSE.median
## 1               NA  NA   NA  NA   NA         NA           NA
## 2               NA  NA   NA  NA   NA         NA           NA
## 3               NA  NA   NA  NA   NA         NA           NA
## 4               NA  NA   NA  NA   NA         NA           NA
##   NRMSE.mean.accuracy NRMSE.median.accuracy Efron.r.squared CV.prcnt
## 1                  NA                    NA              NA       NA
## 2                  NA                    NA              NA       NA
## 3                  NA                    NA              NA       NA
## 4                  NA                    NA              NA       NA

# Creating a dataframe of observed and predicted data
act_pred <- data.frame(observed = test$X.Y., predicted = 
                         factor(predicted))
# Calculating Accuracy
accuracy_est <- accuracy(act_pred, observed, predicted)
print(accuracy_est)

## $Models
##   Call           
## 1 "Not supported"
## 2 "Not supported"
## 
## $Fit.criteria
##   Min.max.accuracy MAE MAPE MSE RMSE NRMSE.mean NRMSE.median
## 1               NA  NA   NA  NA   NA         NA           NA
## 2               NA  NA   NA  NA   NA         NA           NA
##   NRMSE.mean.accuracy NRMSE.median.accuracy Efron.r.squared CV.prcnt
## 1                  NA                    NA              NA       NA
## 2                  NA                    NA              NA       NA

# Note: the accuracy( ) function gives rounded estimate i.e. 0.885
pred.rocr <- prediction(pred, test$X.Y.)
eval <- performance(pred.rocr, "acc")
plot(eval)

# Identifying the best cutoff value that maximizes accuracy
max <- which.max(slot(eval, "y.values")[[1]])
acc <- slot(eval, "y.values")[[1]][max] #y.values are accuracy 
#measures
cut <- slot(eval, "x.values")[[1]][max] #x.values are cutoff 
#measures
print(c(Accuracy = acc, Cutoff = cut))

##  Accuracy Cutoff.11 
## 0.8500000 0.4196218

perf_rocr <- performance(pred.rocr, measure = "auc",
                         x.measure = "cutoff")
perf_rocr@y.values[[1]] <- round(perf_rocr@y.values[[1]], digits = 
                                   4)
perf.tpr.fpr.rocr <- performance(pred.rocr, "tpr", "fpr")
plot(perf.tpr.fpr.rocr, colorize=T, 
     main = paste("AUC:", (perf_rocr@y.values)))
abline(a = 0, b = 1)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Dataset me

f1A1

8/25/2022

R Markdown

Including Plots