Introduction

The dataset contains information collected from an online food ordering platform over a period of time.

It shows Demographic Information, Location Information and Order details.

Attributes:

Demographic Information:

Location Information:

Order Details:

Load the Dataset

library(readr)
onlinefood <- read_csv("~/Downloads/CS 583/R_Project/onlinefood.csv")
## Rows: 388 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Gender, Marital Status, Occupation, Monthly Income, Educational Qua...
## dbl (5): Age, Family size, latitude, longitude, Pin code
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
onlinefood
## # A tibble: 388 × 12
##      Age Gender `Marital Status` Occupation `Monthly Income`
##    <dbl> <chr>  <chr>            <chr>      <chr>           
##  1    20 Female Single           Student    No Income       
##  2    24 Female Single           Student    Below Rs.10000  
##  3    22 Male   Single           Student    Below Rs.10000  
##  4    22 Female Single           Student    No Income       
##  5    22 Male   Single           Student    Below Rs.10000  
##  6    27 Female Married          Employee   More than 50000 
##  7    22 Male   Single           Student    No Income       
##  8    24 Female Single           Student    No Income       
##  9    23 Female Single           Student    No Income       
## 10    23 Female Single           Student    No Income       
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## #   latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## #   Feedback <chr>

Analysis

dim(onlinefood)
## [1] 388  12
summary(onlinefood)
##       Age           Gender          Marital Status      Occupation       
##  Min.   :18.00   Length:388         Length:388         Length:388        
##  1st Qu.:23.00   Class :character   Class :character   Class :character  
##  Median :24.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :24.63                                                           
##  3rd Qu.:26.00                                                           
##  Max.   :33.00                                                           
##  Monthly Income     Educational Qualifications  Family size       latitude    
##  Length:388         Length:388                 Min.   :1.000   Min.   :12.87  
##  Class :character   Class :character           1st Qu.:2.000   1st Qu.:12.94  
##  Mode  :character   Mode  :character           Median :3.000   Median :12.98  
##                                                Mean   :3.281   Mean   :12.97  
##                                                3rd Qu.:4.000   3rd Qu.:13.00  
##                                                Max.   :6.000   Max.   :13.10  
##    longitude        Pin code         Output            Feedback        
##  Min.   :77.48   Min.   :560001   Length:388         Length:388        
##  1st Qu.:77.57   1st Qu.:560011   Class :character   Class :character  
##  Median :77.59   Median :560034   Mode  :character   Mode  :character  
##  Mean   :77.60   Mean   :560040                                        
##  3rd Qu.:77.63   3rd Qu.:560068                                        
##  Max.   :77.76   Max.   :560109
Check for na’s and data types.
any(is.na(onlinefood)) 
## [1] FALSE
sapply(onlinefood, class)
##                        Age                     Gender 
##                  "numeric"                "character" 
##             Marital Status                 Occupation 
##                "character"                "character" 
##             Monthly Income Educational Qualifications 
##                "character"                "character" 
##                Family size                   latitude 
##                  "numeric"                  "numeric" 
##                  longitude                   Pin code 
##                  "numeric"                  "numeric" 
##                     Output                   Feedback 
##                "character"                "character"
Checking Proportions
#checking proportions
table(onlinefood$Occupation)
## 
##       Employee     House wife Self Employeed        Student 
##            118              9             54            207
prop.table(table(onlinefood$Occupation))
## 
##       Employee     House wife Self Employeed        Student 
##     0.30412371     0.02319588     0.13917526     0.53350515
table(onlinefood$`Monthly Income`)
## 
##  10001 to 25000  25001 to 50000  Below Rs.10000 More than 50000       No Income 
##              45              69              25              62             187
prop.table(table(onlinefood$`Monthly Income`))
## 
##  10001 to 25000  25001 to 50000  Below Rs.10000 More than 50000       No Income 
##      0.11597938      0.17783505      0.06443299      0.15979381      0.48195876
table(onlinefood$`Marital Status`)
## 
##           Married Prefer not to say            Single 
##               108                12               268
prop.table(table(onlinefood$`Marital Status`))
## 
##           Married Prefer not to say            Single 
##        0.27835052        0.03092784        0.69072165
table(onlinefood$`Educational Qualifications`)
## 
##      Graduate          Ph.D Post Graduate        School    Uneducated 
##           177            23           174            12             2
prop.table(table(onlinefood$`Educational Qualifications`))
## 
##      Graduate          Ph.D Post Graduate        School    Uneducated 
##   0.456185567   0.059278351   0.448453608   0.030927835   0.005154639
Standard Deviation
#standard deviation
sapply(onlinefood, sd)
##                        Age                     Gender 
##                 2.97559266                         NA 
##             Marital Status                 Occupation 
##                         NA                         NA 
##             Monthly Income Educational Qualifications 
##                         NA                         NA 
##                Family size                   latitude 
##                 1.35102494                 0.04448925 
##                  longitude                   Pin code 
##                 0.05135392                31.39960871 
##                     Output                   Feedback 
##                         NA                         NA
Correlation
cor(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])
##                     Age  Family size     latitude   longitude     Pin code
## Age         1.000000000  0.169981512  0.006594549  0.04749956  0.137294125
## Family size 0.169981512  1.000000000 -0.053536733  0.07012644 -0.009402396
## latitude    0.006594549 -0.053536733  1.000000000 -0.14423361 -0.201813497
## longitude   0.047499559  0.070126439 -0.144233613  1.00000000  0.156119479
## Pin code    0.137294125 -0.009402396 -0.201813497  0.15611948  1.000000000
pairs(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])

library(psych)
pairs.panels(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])

Education Qualification Count
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(data = onlinefood, aes(x = `Educational Qualifications`, fill = `Educational Qualifications`)) + 
  geom_bar(stat = "count", color = "black") +
  geom_text(stat = "count", aes(label = stat(count)), vjust = -0.5) +
  labs(x = "Educational Qualifications", y = "Count", title = "Educational Qualifications Count") +
  guides(fill = FALSE)

Monthly Income
ggplot(data = onlinefood, aes(x = `Monthly Income`, fill = `Monthly Income`)) + 
  geom_bar(stat = "count", color = "black") +
  geom_text(stat = "count", aes(label = stat(count)), vjust = -0.5) +
  labs(x = "Monthly Income", y = "Count", title = "Monthly Income Count") +
  guides(fill = FALSE)

Age Distribution
ggplot(data = onlinefood, aes(x=Age)) +
  geom_histogram(binwidth = 1, fill = "red", color="black") +
  geom_text(stat = "count", aes(label=stat(count)), vjust = -0.5, color ="black") +
  labs(x = "Age", y = "Frequency", title = "Age Distribution")

Feedback based on output
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(explore)
## 
## Attaching package: 'explore'
## The following object is masked from 'package:psych':
## 
##     describe
onlinefood %>% explore(Feedback, target=Output)

  • Positve yes: 94% of the people who said yes had a positive feedback possibly due to timely arrival of their food.

  • Positive no: 39.1% of the people who said no had a positive feedback possible due to satisfactory resolution to delivery issues or order cancellation.

  • Negative yes: 6% of the people who said yes had a negative feedback possibly due to complications with food delivery or dislike for the food.

  • Negative no: 60.9% of the people who said no had a negative feedback possibly due to order cancellation or the order never arriving.

Feedback based on gender
onlinefood %>% explore(Feedback, target=Gender)

  • The females had slightly more positive feedback(83.7) than the males(80.2%).

  • The males had more negative feedback(19.8) than the females (16.3).

  • However there are more males than females in this dataset so regardless of the females having a higher percentage of positive feedbacks, the number of males were still higher than that of females for positive feedback.

Family Size Distribution
ggplot(data = onlinefood, aes(x=`Family size`)) +
  geom_histogram(binwidth = 1, fill = "blue", color="black") +
  geom_text(stat = "count", aes(label=stat(count)), vjust = -0.5, color ="black") +
  labs(x = "Family Size", y = "Frequency", title = "Family Size Distribution")

  • The majority of family sizes that order food fall between 2 and 4 which possibly indicates a regular couple or a family one parent and one kid or parents with one or two kids…etc

Data Preparation

online_food <- onlinefood
online_food
## # A tibble: 388 × 12
##      Age Gender `Marital Status` Occupation `Monthly Income`
##    <dbl> <chr>  <chr>            <chr>      <chr>           
##  1    20 Female Single           Student    No Income       
##  2    24 Female Single           Student    Below Rs.10000  
##  3    22 Male   Single           Student    Below Rs.10000  
##  4    22 Female Single           Student    No Income       
##  5    22 Male   Single           Student    Below Rs.10000  
##  6    27 Female Married          Employee   More than 50000 
##  7    22 Male   Single           Student    No Income       
##  8    24 Female Single           Student    No Income       
##  9    23 Female Single           Student    No Income       
## 10    23 Female Single           Student    No Income       
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## #   latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## #   Feedback <chr>
Factors
#gender
str(online_food$Gender)
##  chr [1:388] "Female" "Female" "Male" "Female" "Male" "Female" "Male" ...
online_food$Gender<-factor(online_food$Gender, levels=c("Female","Male"), labels=c("Female", "Male"))
str(online_food$Gender)
##  Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
#marital status
str(online_food$`Marital Status`)
##  chr [1:388] "Single" "Single" "Single" "Single" "Single" "Married" ...
online_food$`Marital Status`<-factor(online_food$`Marital Status`, levels=c("Married", "Prefer not to say","Single"), labels=c("Married", "Prefer not to say","Single") )
str(online_food$`Marital Status`)
##  Factor w/ 3 levels "Married","Prefer not to say",..: 3 3 3 3 3 1 3 3 3 3 ...
#Occupation
str(online_food$Occupation)
##  chr [1:388] "Student" "Student" "Student" "Student" "Student" "Employee" ...
online_food$Occupation<-factor(online_food$Occupation, levels=c("Employee", "House wife", "Self Employeed", "Student"), labels=c("Employee", "House wife", "Self Employeed", "Student") )
str(online_food$Occupation)
##  Factor w/ 4 levels "Employee","House wife",..: 4 4 4 4 4 1 4 4 4 4 ...
#Monthly Income
str(online_food$`Monthly Income`)
##  chr [1:388] "No Income" "Below Rs.10000" "Below Rs.10000" "No Income" ...
online_food$`Monthly Income`<-factor(online_food$`Monthly Income`, levels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income"), labels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income") )
str(online_food$`Monthly Income`)
##  Factor w/ 5 levels "10001 to 25000",..: 5 3 3 5 3 4 5 5 5 5 ...
#Educational Qualifications
str(online_food$`Educational Qualifications`)
##  chr [1:388] "Post Graduate" "Graduate" "Post Graduate" "Graduate" ...
online_food$`Educational Qualifications`<-factor(online_food$`Educational Qualifications`, levels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"), labels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"))
str(online_food$`Educational Qualifications`)
##  Factor w/ 5 levels "Graduate","Ph.D",..: 3 1 3 1 3 3 1 3 3 3 ...
#Output
str(online_food$Output)
##  chr [1:388] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" ...
online_food$Output<-factor(online_food$Output, levels=c("No", "Yes"), labels=c("No", "Yes"))
str(online_food$Output)
##  Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
#Feedback
str(online_food$Feedback)
##  chr [1:388] "Positive" "Positive" "Negative" "Positive" "Positive" ...
online_food$Feedback<-factor(online_food$Feedback, levels=c("Negative", "Positive"), labels=c("Negative", "Positive"))
str(online_food$Feedback)
##  Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
  • Check to see if everything matches
str(online_food)
## spc_tbl_ [388 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age                       : num [1:388] 20 24 22 22 22 27 22 24 23 23 ...
##  $ Gender                    : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
##  $ Marital Status            : Factor w/ 3 levels "Married","Prefer not to say",..: 3 3 3 3 3 1 3 3 3 3 ...
##  $ Occupation                : Factor w/ 4 levels "Employee","House wife",..: 4 4 4 4 4 1 4 4 4 4 ...
##  $ Monthly Income            : Factor w/ 5 levels "10001 to 25000",..: 5 3 3 5 3 4 5 5 5 5 ...
##  $ Educational Qualifications: Factor w/ 5 levels "Graduate","Ph.D",..: 3 1 3 1 3 3 1 3 3 3 ...
##  $ Family size               : num [1:388] 4 3 3 6 4 2 3 3 2 4 ...
##  $ latitude                  : num [1:388] 13 13 13 12.9 13 ...
##  $ longitude                 : num [1:388] 77.6 77.6 77.7 77.6 77.6 ...
##  $ Pin code                  : num [1:388] 560001 560009 560017 560019 560010 ...
##  $ Output                    : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Feedback                  : Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_double(),
##   ..   Gender = col_character(),
##   ..   `Marital Status` = col_character(),
##   ..   Occupation = col_character(),
##   ..   `Monthly Income` = col_character(),
##   ..   `Educational Qualifications` = col_character(),
##   ..   `Family size` = col_double(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   `Pin code` = col_double(),
##   ..   Output = col_character(),
##   ..   Feedback = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
Dependent Variable
online_food$Feedback
##   [1] Positive Positive Negative Positive Positive Positive Positive Positive
##   [9] Positive Positive Positive Negative Positive Positive Positive Positive
##  [17] Positive Negative Negative Positive Positive Positive Positive Positive
##  [25] Positive Positive Positive Positive Positive Positive Positive Positive
##  [33] Positive Positive Positive Positive Positive Negative Positive Positive
##  [41] Negative Positive Positive Positive Positive Positive Positive Positive
##  [49] Negative Positive Positive Positive Positive Positive Positive Positive
##  [57] Positive Positive Positive Positive Positive Positive Positive Positive
##  [65] Positive Positive Positive Positive Positive Positive Positive Positive
##  [73] Positive Positive Positive Positive Positive Positive Positive Positive
##  [81] Positive Positive Positive Positive Positive Positive Positive Positive
##  [89] Positive Negative Positive Positive Positive Positive Positive Positive
##  [97] Positive Positive Positive Positive Positive Positive Positive Positive
## [105] Negative Positive Positive Positive Positive Positive Positive Positive
## [113] Positive Positive Positive Negative Positive Positive Positive Positive
## [121] Positive Positive Positive Negative Negative Positive Positive Positive
## [129] Positive Positive Positive Positive Positive Negative Positive Positive
## [137] Positive Negative Positive Positive Positive Negative Positive Positive
## [145] Negative Positive Positive Positive Positive Positive Positive Positive
## [153] Positive Positive Positive Positive Positive Positive Negative Positive
## [161] Positive Negative Positive Positive Positive Positive Negative Positive
## [169] Positive Negative Positive Positive Positive Positive Positive Positive
## [177] Positive Negative Negative Positive Positive Negative Positive Positive
## [185] Positive Positive Positive Positive Negative Positive Negative Positive
## [193] Positive Positive Positive Positive Positive Positive Positive Negative
## [201] Positive Positive Positive Positive Positive Positive Negative Positive
## [209] Positive Negative Negative Positive Negative Positive Positive Negative
## [217] Positive Negative Positive Positive Positive Positive Positive Positive
## [225] Positive Positive Positive Positive Negative Positive Positive Positive
## [233] Negative Positive Positive Negative Positive Positive Positive Positive
## [241] Positive Positive Positive Positive Negative Positive Negative Positive
## [249] Negative Positive Positive Positive Positive Positive Positive Positive
## [257] Negative Negative Positive Negative Positive Negative Negative Negative
## [265] Positive Positive Negative Positive Positive Positive Positive Negative
## [273] Positive Positive Positive Positive Positive Positive Negative Positive
## [281] Positive Positive Positive Positive Positive Positive Positive Negative
## [289] Positive Positive Positive Positive Negative Negative Negative Positive
## [297] Positive Positive Negative Negative Negative Positive Positive Positive
## [305] Positive Positive Positive Positive Positive Negative Positive Positive
## [313] Positive Positive Positive Negative Positive Positive Positive Negative
## [321] Positive Positive Negative Positive Positive Positive Positive Positive
## [329] Positive Positive Positive Positive Positive Negative Positive Positive
## [337] Negative Positive Positive Negative Positive Positive Negative Positive
## [345] Negative Positive Positive Positive Positive Negative Positive Positive
## [353] Positive Positive Positive Positive Positive Negative Positive Positive
## [361] Positive Negative Positive Positive Negative Positive Positive Positive
## [369] Positive Positive Positive Positive Positive Negative Negative Negative
## [377] Positive Negative Positive Positive Positive Positive Positive Positive
## [385] Positive Positive Positive Positive
## Levels: Negative Positive
table(online_food$Feedback)
## 
## Negative Positive 
##       71      317
prop.table(table(online_food$Feedback))
## 
##  Negative  Positive 
## 0.1829897 0.8170103

Support Vector Machine

library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:psych':
## 
##     alpha
onlinefood_svm<-online_food
Normalization
normalize<-function(x){
  return((x-min(x))/(max(x)-min(x)))
}
normalized_column<-c(1, 7, 8, 9, 10)
onlinefood_svm[normalized_column]<-(lapply(onlinefood_svm[normalized_column], normalize))
summary(onlinefood_svm)
##       Age            Gender              Marital Status          Occupation 
##  Min.   :0.0000   Female:166   Married          :108    Employee      :118  
##  1st Qu.:0.3333   Male  :222   Prefer not to say: 12    House wife    :  9  
##  Median :0.4000                Single           :268    Self Employeed: 54  
##  Mean   :0.4419                                         Student       :207  
##  3rd Qu.:0.5333                                                             
##  Max.   :1.0000                                                             
##          Monthly Income Educational Qualifications  Family size    
##  10001 to 25000 : 45    Graduate     :177          Min.   :0.0000  
##  25001 to 50000 : 69    Ph.D         : 23          1st Qu.:0.2000  
##  Below Rs.10000 : 25    Post Graduate:174          Median :0.4000  
##  More than 50000: 62    School       : 12          Mean   :0.4562  
##  No Income      :187    Uneducated   :  2          3rd Qu.:0.6000  
##                                                    Max.   :1.0000  
##     latitude        longitude         Pin code       Output        Feedback  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   No : 87   Negative: 71  
##  1st Qu.:0.3028   1st Qu.:0.2959   1st Qu.:0.09028   Yes:301   Positive:317  
##  Median :0.4721   Median :0.3938   Median :0.30093                           
##  Mean   :0.4513   Mean   :0.4232   Mean   :0.36216                           
##  3rd Qu.:0.5567   3rd Qu.:0.5354   3rd Qu.:0.62037                           
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
Create Training and Test Dataset
set.seed(42)
onlinefood_index_svm <- sample(1:nrow(onlinefood_svm), size = floor(0.80*nrow(onlinefood_svm)))
onlinefood_train_svm<-onlinefood_svm[onlinefood_index_svm, ]
onlinefood_test_svm<-onlinefood_svm[-onlinefood_index_svm, ]
Create Classifier
onlinefood_classifier<-ksvm(Feedback~.,data=onlinefood_train_svm, kernel="vanilladot")
##  Setting default kernel parameters
onlinefood_classifier
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 137 
## 
## Objective Function Value : -82 
## Training error : 0.129032
Evaluate Onlinefood Prediction
onlinefood_predictions<-predict(onlinefood_classifier, onlinefood_test_svm)
onlinefood_predictions
##  [1] Positive Positive Positive Positive Positive Positive Positive Positive
##  [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Negative Negative Positive Positive
## [25] Positive Positive Positive Negative Positive Negative Negative Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Positive
## [41] Negative Positive Negative Positive Negative Negative Negative Positive
## [49] Positive Positive Negative Positive Positive Positive Positive Positive
## [57] Negative Negative Negative Positive Positive Negative Negative Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Negative
## [73] Positive Positive Positive Negative Negative Positive
## Levels: Negative Positive
Understanding Classification Results
table(onlinefood_predictions, onlinefood_test_svm$Feedback)
##                       
## onlinefood_predictions Negative Positive
##               Negative       13        8
##               Positive        4       53
o_food<-onlinefood_predictions==onlinefood_test_svm$Feedback
o_food
##  [1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
## [49]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [61]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [73]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
table(o_food)
## o_food
## FALSE  TRUE 
##    12    66
  • There were 12 incorrectly classified and 66 correctly classified feedbacks.
prop.table(table(o_food))
## o_food
##     FALSE      TRUE 
## 0.1538462 0.8461538
#15% incorrectly classified and 84% correctly classified
  • 15% of the feedbacks were incorrectly classified and 84% of the feedbacks were correctly classified.
Improving model performance
  • The previous SVM model used the simple linear kernel function.
  • Using a more complex kernel function, we can map the data into a higher dimensional space and potentially obtain a better model fit.
  • Using Gaussian RBF kernel which has shown to perform well for many types of data.
onlinefood_classifier_rbf<-ksvm(Feedback~.,data=onlinefood_train_svm, kernel="rbfdot")
onlinefood_classifier_rbf
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.0589947205070788 
## 
## Number of Support Vectors : 138 
## 
## Objective Function Value : -88.8184 
## Training error : 0.122581
Prediction
onlinefood_predictions_rbf<-predict(onlinefood_classifier_rbf,onlinefood_test_svm)
onlinefood_predictions_rbf
##  [1] Positive Positive Positive Positive Positive Positive Positive Positive
##  [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Positive
## [25] Positive Positive Positive Positive Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Positive
## [41] Positive Positive Positive Positive Positive Negative Positive Positive
## [49] Positive Positive Positive Positive Positive Positive Positive Positive
## [57] Negative Positive Positive Positive Positive Positive Positive Positive
## [65] Positive Positive Positive Positive Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
Classification Results
table(onlinefood_predictions_rbf, onlinefood_test_svm$Feedback)
##                           
## onlinefood_predictions_rbf Negative Positive
##                   Negative        3        0
##                   Positive       14       61
o_food_rbf<-onlinefood_predictions_rbf==onlinefood_test_svm$Feedback
o_food_rbf
##  [1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [61]  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
table(o_food_rbf)
## o_food_rbf
## FALSE  TRUE 
##    14    64
  • There were 14 incorrectly classified feedbacks and 64 correctly classified feedbacks.

  • The linear kernel function performed better.

prop.table(table(o_food_rbf))
## o_food_rbf
##     FALSE      TRUE 
## 0.1794872 0.8205128
  • There were 17% incorrectly classified and 82% correctly classified.
Confusion Matrix
library(gmodels)
CrossTable(x = onlinefood_test_svm$Feedback, y=onlinefood_predictions, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  78 
## 
##  
##                              | onlinefood_predictions 
## onlinefood_test_svm$Feedback |  Negative |  Positive | Row Total | 
## -----------------------------|-----------|-----------|-----------|
##                     Negative |        13 |         4 |        17 | 
##                              |     0.765 |     0.235 |     0.218 | 
##                              |     0.619 |     0.070 |           | 
##                              |     0.167 |     0.051 |           | 
## -----------------------------|-----------|-----------|-----------|
##                     Positive |         8 |        53 |        61 | 
##                              |     0.131 |     0.869 |     0.782 | 
##                              |     0.381 |     0.930 |           | 
##                              |     0.103 |     0.679 |           | 
## -----------------------------|-----------|-----------|-----------|
##                 Column Total |        21 |        57 |        78 | 
##                              |     0.269 |     0.731 |           | 
## -----------------------------|-----------|-----------|-----------|
## 
## 

KNN

onlinefood_knn<-onlinefood
onlinefood_knn
## # A tibble: 388 × 12
##      Age Gender `Marital Status` Occupation `Monthly Income`
##    <dbl> <chr>  <chr>            <chr>      <chr>           
##  1    20 Female Single           Student    No Income       
##  2    24 Female Single           Student    Below Rs.10000  
##  3    22 Male   Single           Student    Below Rs.10000  
##  4    22 Female Single           Student    No Income       
##  5    22 Male   Single           Student    Below Rs.10000  
##  6    27 Female Married          Employee   More than 50000 
##  7    22 Male   Single           Student    No Income       
##  8    24 Female Single           Student    No Income       
##  9    23 Female Single           Student    No Income       
## 10    23 Female Single           Student    No Income       
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## #   latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## #   Feedback <chr>
Data Preparation
#tweak the factors for the factor columns
#Gender
str(onlinefood_knn$Gender)
##  chr [1:388] "Female" "Female" "Male" "Female" "Male" "Female" "Male" ...
onlinefood_knn$Gender<-factor(onlinefood_knn$Gender, levels=c("Female","Male"), labels=c("1", "2"))
#1 for female, 2 for male
str(onlinefood_knn$Gender)
##  Factor w/ 2 levels "1","2": 1 1 2 1 2 1 2 1 1 1 ...
#change from factor to numeric
onlinefood_knn$Gender<-as.numeric(onlinefood_knn$Gender)
#confirm
str(onlinefood_knn$Gender)
##  num [1:388] 1 1 2 1 2 1 2 1 1 1 ...
#Marital Status
str(onlinefood_knn$`Marital Status`)
##  chr [1:388] "Single" "Single" "Single" "Single" "Single" "Married" ...
onlinefood_knn$`Marital Status`<-factor(onlinefood_knn$`Marital Status`, levels=c("Married", "Prefer not to say","Single"), labels=c("1", "2","3") )
str(onlinefood_knn$`Marital Status`)
##  Factor w/ 3 levels "1","2","3": 3 3 3 3 3 1 3 3 3 3 ...
#factor to numeric
onlinefood_knn$`Marital Status`<-as.numeric(onlinefood_knn$`Marital Status`)
#confirm
str(onlinefood_knn$`Marital Status`)
##  num [1:388] 3 3 3 3 3 1 3 3 3 3 ...
#Occupation
str(onlinefood_knn$Occupation)
##  chr [1:388] "Student" "Student" "Student" "Student" "Student" "Employee" ...
onlinefood_knn$Occupation<-factor(onlinefood_knn$Occupation, levels=c("Employee", "House wife", "Self Employeed", "Student"), labels=c("1", "2", "3", "4") )
str(onlinefood_knn$Occupation)
##  Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 1 4 4 4 4 ...
#factor to numeric
onlinefood_knn$Occupation<-as.numeric(onlinefood_knn$Occupation)
#confirm
str(onlinefood_knn$Occupation)
##  num [1:388] 4 4 4 4 4 1 4 4 4 4 ...
#Monthly Income
str(onlinefood_knn$`Monthly Income`)
##  chr [1:388] "No Income" "Below Rs.10000" "Below Rs.10000" "No Income" ...
onlinefood_knn$`Monthly Income`<-factor(onlinefood_knn$`Monthly Income`, levels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income"), labels=c("1", "2", "3", "4", "5") )
str(onlinefood_knn$`Monthly Income`)
##  Factor w/ 5 levels "1","2","3","4",..: 5 3 3 5 3 4 5 5 5 5 ...
#factor to numeric
onlinefood_knn$`Monthly Income`<-as.numeric(onlinefood_knn$`Monthly Income`)
#confirm
str(onlinefood_knn$`Monthly Income`)
##  num [1:388] 5 3 3 5 3 4 5 5 5 5 ...
#Educational Qualifications
str(onlinefood_knn$`Educational Qualifications`)
##  chr [1:388] "Post Graduate" "Graduate" "Post Graduate" "Graduate" ...
onlinefood_knn$`Educational Qualifications`<-factor(onlinefood_knn$`Educational Qualifications`, levels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"), labels=c("1", "2", "3", "4", "5"))
str(onlinefood_knn$`Educational Qualifications`)
##  Factor w/ 5 levels "1","2","3","4",..: 3 1 3 1 3 3 1 3 3 3 ...
#factor to numeric
onlinefood_knn$`Educational Qualifications`<-as.numeric(onlinefood_knn$`Educational Qualifications`)
#confirm
str(onlinefood_knn$`Educational Qualifications`)
##  num [1:388] 3 1 3 1 3 3 1 3 3 3 ...
#Output
str(onlinefood_knn$Output)
##  chr [1:388] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" ...
onlinefood_knn$Output<-factor(onlinefood_knn$Output, levels=c("No", "Yes"), labels=c("1", "2"))
str(onlinefood_knn$Output)
##  Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
#factor to numeric
onlinefood_knn$Output<-as.numeric(onlinefood_knn$Output)
#confirm
str(onlinefood_knn$Output)
##  num [1:388] 2 2 2 2 2 2 2 2 2 2 ...
#Feedback
str(onlinefood_knn$Feedback)
##  chr [1:388] "Positive" "Positive" "Negative" "Positive" "Positive" ...
onlinefood_knn$Feedback<-factor(onlinefood_knn$Feedback, levels=c("Negative", "Positive"), labels=c("Negative", "Positive"))
str(onlinefood_knn$Feedback)
##  Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...

Check structure of dataset

str(onlinefood_knn)
## spc_tbl_ [388 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Age                       : num [1:388] 20 24 22 22 22 27 22 24 23 23 ...
##  $ Gender                    : num [1:388] 1 1 2 1 2 1 2 1 1 1 ...
##  $ Marital Status            : num [1:388] 3 3 3 3 3 1 3 3 3 3 ...
##  $ Occupation                : num [1:388] 4 4 4 4 4 1 4 4 4 4 ...
##  $ Monthly Income            : num [1:388] 5 3 3 5 3 4 5 5 5 5 ...
##  $ Educational Qualifications: num [1:388] 3 1 3 1 3 3 1 3 3 3 ...
##  $ Family size               : num [1:388] 4 3 3 6 4 2 3 3 2 4 ...
##  $ latitude                  : num [1:388] 13 13 13 12.9 13 ...
##  $ longitude                 : num [1:388] 77.6 77.6 77.7 77.6 77.6 ...
##  $ Pin code                  : num [1:388] 560001 560009 560017 560019 560010 ...
##  $ Output                    : num [1:388] 2 2 2 2 2 2 2 2 2 2 ...
##  $ Feedback                  : Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Age = col_double(),
##   ..   Gender = col_character(),
##   ..   `Marital Status` = col_character(),
##   ..   Occupation = col_character(),
##   ..   `Monthly Income` = col_character(),
##   ..   `Educational Qualifications` = col_character(),
##   ..   `Family size` = col_double(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   `Pin code` = col_double(),
##   ..   Output = col_character(),
##   ..   Feedback = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
Normalization
normalize<-function(x){
  return((x-min(x))/(max(x)-min(x)))
}
Training and Test Dataset
#before that, create a new dataset without the labels and one with labels
summary(onlinefood_knn)
##       Age            Gender      Marital Status    Occupation   
##  Min.   :18.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:23.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :24.00   Median :2.000   Median :3.000   Median :4.000  
##  Mean   :24.63   Mean   :1.572   Mean   :2.412   Mean   :2.902  
##  3rd Qu.:26.00   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:4.000  
##  Max.   :33.00   Max.   :2.000   Max.   :3.000   Max.   :4.000  
##  Monthly Income  Educational Qualifications  Family size       latitude    
##  Min.   :1.000   Min.   :1.00               Min.   :1.000   Min.   :12.87  
##  1st Qu.:2.000   1st Qu.:1.00               1st Qu.:2.000   1st Qu.:12.94  
##  Median :4.000   Median :2.00               Median :3.000   Median :12.98  
##  Mean   :3.714   Mean   :2.07               Mean   :3.281   Mean   :12.97  
##  3rd Qu.:5.000   3rd Qu.:3.00               3rd Qu.:4.000   3rd Qu.:13.00  
##  Max.   :5.000   Max.   :5.00               Max.   :6.000   Max.   :13.10  
##    longitude        Pin code          Output          Feedback  
##  Min.   :77.48   Min.   :560001   Min.   :1.000   Negative: 71  
##  1st Qu.:77.57   1st Qu.:560011   1st Qu.:2.000   Positive:317  
##  Median :77.59   Median :560034   Median :2.000                 
##  Mean   :77.60   Mean   :560040   Mean   :1.776                 
##  3rd Qu.:77.63   3rd Qu.:560068   3rd Qu.:2.000                 
##  Max.   :77.76   Max.   :560109   Max.   :2.000
online_food1<-onlinefood_knn[-12]
online_food1#dataset without dependent variable
## # A tibble: 388 × 11
##      Age Gender `Marital Status` Occupation `Monthly Income`
##    <dbl>  <dbl>            <dbl>      <dbl>            <dbl>
##  1    20      1                3          4                5
##  2    24      1                3          4                3
##  3    22      2                3          4                3
##  4    22      1                3          4                5
##  5    22      2                3          4                3
##  6    27      1                1          1                4
##  7    22      2                3          4                5
##  8    24      1                3          4                5
##  9    23      1                3          4                5
## 10    23      1                3          4                5
## # ℹ 378 more rows
## # ℹ 6 more variables: `Educational Qualifications` <dbl>, `Family size` <dbl>,
## #   latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <dbl>
summary(online_food1)
##       Age            Gender      Marital Status    Occupation   
##  Min.   :18.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:23.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :24.00   Median :2.000   Median :3.000   Median :4.000  
##  Mean   :24.63   Mean   :1.572   Mean   :2.412   Mean   :2.902  
##  3rd Qu.:26.00   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:4.000  
##  Max.   :33.00   Max.   :2.000   Max.   :3.000   Max.   :4.000  
##  Monthly Income  Educational Qualifications  Family size       latitude    
##  Min.   :1.000   Min.   :1.00               Min.   :1.000   Min.   :12.87  
##  1st Qu.:2.000   1st Qu.:1.00               1st Qu.:2.000   1st Qu.:12.94  
##  Median :4.000   Median :2.00               Median :3.000   Median :12.98  
##  Mean   :3.714   Mean   :2.07               Mean   :3.281   Mean   :12.97  
##  3rd Qu.:5.000   3rd Qu.:3.00               3rd Qu.:4.000   3rd Qu.:13.00  
##  Max.   :5.000   Max.   :5.00               Max.   :6.000   Max.   :13.10  
##    longitude        Pin code          Output     
##  Min.   :77.48   Min.   :560001   Min.   :1.000  
##  1st Qu.:77.57   1st Qu.:560011   1st Qu.:2.000  
##  Median :77.59   Median :560034   Median :2.000  
##  Mean   :77.60   Mean   :560040   Mean   :1.776  
##  3rd Qu.:77.63   3rd Qu.:560068   3rd Qu.:2.000  
##  Max.   :77.76   Max.   :560109   Max.   :2.000

Standardize the dataset without labels

online_food1<-as.data.frame(lapply(online_food1, normalize))
summary(online_food1)
##       Age             Gender       Marital.Status     Occupation   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.3333   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.4000   Median :1.0000   Median :1.0000   Median :1.000  
##  Mean   :0.4419   Mean   :0.5722   Mean   :0.7062   Mean   :0.634  
##  3rd Qu.:0.5333   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##  Monthly.Income   Educational.Qualifications  Family.size        latitude     
##  Min.   :0.0000   Min.   :0.0000             Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.2500   1st Qu.:0.0000             1st Qu.:0.2000   1st Qu.:0.3028  
##  Median :0.7500   Median :0.2500             Median :0.4000   Median :0.4721  
##  Mean   :0.6785   Mean   :0.2674             Mean   :0.4562   Mean   :0.4513  
##  3rd Qu.:1.0000   3rd Qu.:0.5000             3rd Qu.:0.6000   3rd Qu.:0.5567  
##  Max.   :1.0000   Max.   :1.0000             Max.   :1.0000   Max.   :1.0000  
##    longitude         Pin.code           Output      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.2959   1st Qu.:0.09028   1st Qu.:1.0000  
##  Median :0.3938   Median :0.30093   Median :1.0000  
##  Mean   :0.4232   Mean   :0.36216   Mean   :0.7758  
##  3rd Qu.:0.5354   3rd Qu.:0.62037   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000

Labelled Dataset

onlinefood_labels<-online_food
onlinefood_labels
## # A tibble: 388 × 12
##      Age Gender `Marital Status` Occupation `Monthly Income`
##    <dbl> <fct>  <fct>            <fct>      <fct>           
##  1    20 Female Single           Student    No Income       
##  2    24 Female Single           Student    Below Rs.10000  
##  3    22 Male   Single           Student    Below Rs.10000  
##  4    22 Female Single           Student    No Income       
##  5    22 Male   Single           Student    Below Rs.10000  
##  6    27 Female Married          Employee   More than 50000 
##  7    22 Male   Single           Student    No Income       
##  8    24 Female Single           Student    No Income       
##  9    23 Female Single           Student    No Income       
## 10    23 Female Single           Student    No Income       
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <fct>, `Family size` <dbl>,
## #   latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <fct>,
## #   Feedback <fct>
Training & Test (No label)
onlinefood_train<-online_food1[1:310, ]
onlinefood_test<-online_food1[311:388, ]
Training & Test (Labelled)
onlinefood_train_labels<-onlinefood_labels[1:310, 12]
onlinefood_test_labels<-onlinefood_labels[311:388, 12]
Choosing best k-value
library(class)
predictions <- list()

# Loop through each k from 2 to 18
for (k in 2:18) {
  # Run knn with the current value of k
  predictions[[paste("k=", k, sep="")]] <- knn(train=onlinefood_train, test=onlinefood_test, cl=onlinefood_train_labels$Feedback, k=k)
}
predictions
## $`k=2`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Positive
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Positive
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Positive Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
## 
## $`k=3`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
## 
## $`k=4`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Positive
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Negative Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=5`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=6`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
## 
## $`k=7`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
## 
## $`k=8`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=9`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=10`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=11`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=12`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=13`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=14`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=15`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=16`
##  [1] Positive Positive Positive Positive Positive Positive Positive Positive
##  [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=17`
##  [1] Positive Positive Positive Positive Positive Negative Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
## 
## $`k=18`
##  [1] Positive Positive Positive Positive Positive Positive Positive Positive
##  [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
library(dplyr)
accuracies <- sapply(predictions, function(pred, true_labels) {
  mean(pred == true_labels)
}, true_labels = onlinefood_test_labels$Feedback)

accuracy_data <- data.frame(
  k = 2:18,
  Accuracy = accuracies
)
Visualizing k-values
library(ggplot2)
ggplot(accuracy_data, aes(x = k, y = Accuracy)) +
  geom_line() + # Line plot to show the trend
  geom_point() + # Points to mark each k-value's accuracy
  labs(title = "Accuracy vs. k in k-NN", x = "k (Number of Neighbors)", y = "Accuracy") +
  theme_minimal()

  • The best value for k is 9 as it has the highest accuracy out of all the values between 2-18.
Evaluate Model Performance
library(gmodels)
CrossTable(x = onlinefood_test_labels$Feedback, y=predictions$`k=9`, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  78 
## 
##  
##                                 | predictions$`k=9` 
## onlinefood_test_labels$Feedback |  Negative |  Positive | Row Total | 
## --------------------------------|-----------|-----------|-----------|
##                        Negative |        14 |         2 |        16 | 
##                                 |     0.875 |     0.125 |     0.205 | 
##                                 |     0.778 |     0.033 |           | 
##                                 |     0.179 |     0.026 |           | 
## --------------------------------|-----------|-----------|-----------|
##                        Positive |         4 |        58 |        62 | 
##                                 |     0.065 |     0.935 |     0.795 | 
##                                 |     0.222 |     0.967 |           | 
##                                 |     0.051 |     0.744 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Column Total |        18 |        60 |        78 | 
##                                 |     0.231 |     0.769 |           | 
## --------------------------------|-----------|-----------|-----------|
## 
## 
table(predictions$`k=9`, onlinefood_test_labels$Feedback)
##           
##            Negative Positive
##   Negative       14        4
##   Positive        2       58
o_food_knn<-predictions$`k=9`==onlinefood_test_labels$Feedback
o_food_knn
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [61]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
table(o_food_knn)
## o_food_knn
## FALSE  TRUE 
##     6    72

Decision Tree

suppressPackageStartupMessages(library(rattle))
suppressPackageStartupMessages(library(rpart.plot))
library(RColorBrewer)
onlinefood_DT<-online_food
Training & Test Dataset
set.seed(42)
training_indices_DT <- sample(1:nrow(onlinefood_DT), 0.8 * nrow(onlinefood_DT))
traindata_DT <- onlinefood_DT[training_indices_DT, ]
testdata_DT <- onlinefood_DT[-training_indices_DT, ]
Training the model
tree_model_train <- rpart(Feedback ~ ., data = traindata_DT, method = "class")
tree_model_train
## n= 310 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 310 54 Positive (0.17419355 0.82580645)  
##    2) Output=No 66 26 Negative (0.60606061 0.39393939)  
##      4) Pin code>=560066.5 16  2 Negative (0.87500000 0.12500000) *
##      5) Pin code< 560066.5 50 24 Negative (0.52000000 0.48000000)  
##       10) Pin code< 560059.5 43 17 Negative (0.60465116 0.39534884)  
##         20) Occupation=Employee,Student 33 10 Negative (0.69696970 0.30303030) *
##         21) Occupation=House wife,Self Employeed 10  3 Positive (0.30000000 0.70000000) *
##       11) Pin code>=560059.5 7  0 Positive (0.00000000 1.00000000) *
##    3) Output=Yes 244 14 Positive (0.05737705 0.94262295) *
fancyRpartPlot(tree_model_train)

Prediction
predictions_DT <- predict(tree_model_train, testdata_DT, type = "class")
predictions_DT
##        1        2        3        4        5        6        7        8 
## Positive Positive Positive Positive Positive Positive Positive Positive 
##        9       10       11       12       13       14       15       16 
## Positive Positive Positive Positive Positive Positive Positive Positive 
##       17       18       19       20       21       22       23       24 
## Positive Positive Positive Positive Negative Negative Positive Positive 
##       25       26       27       28       29       30       31       32 
## Positive Positive Positive Positive Positive Negative Negative Positive 
##       33       34       35       36       37       38       39       40 
## Positive Positive Positive Positive Positive Positive Positive Positive 
##       41       42       43       44       45       46       47       48 
## Negative Positive Negative Positive Negative Negative Positive Positive 
##       49       50       51       52       53       54       55       56 
## Positive Positive Positive Positive Positive Positive Positive Positive 
##       57       58       59       60       61       62       63       64 
## Negative Positive Negative Positive Positive Negative Negative Negative 
##       65       66       67       68       69       70       71       72 
## Positive Positive Positive Negative Positive Positive Positive Negative 
##       73       74       75       76       77       78 
## Positive Positive Positive Negative Negative Positive 
## Levels: Negative Positive
table(predictions_DT)
## predictions_DT
## Negative Positive 
##       17       61
Evaluating Model Performance
library(gmodels)
CrossTable(x=testdata_DT$Feedback, y=predictions_DT, prop.chisq = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  78 
## 
##  
##                      | predictions_DT 
## testdata_DT$Feedback |  Negative |  Positive | Row Total | 
## ---------------------|-----------|-----------|-----------|
##             Negative |        11 |         6 |        17 | 
##                      |     0.647 |     0.353 |     0.218 | 
##                      |     0.647 |     0.098 |           | 
##                      |     0.141 |     0.077 |           | 
## ---------------------|-----------|-----------|-----------|
##             Positive |         6 |        55 |        61 | 
##                      |     0.098 |     0.902 |     0.782 | 
##                      |     0.353 |     0.902 |           | 
##                      |     0.077 |     0.705 |           | 
## ---------------------|-----------|-----------|-----------|
##         Column Total |        17 |        61 |        78 | 
##                      |     0.218 |     0.782 |           | 
## ---------------------|-----------|-----------|-----------|
## 
## 
onlinefood_decisontree<-predictions_DT==testdata_DT$Feedback
onlinefood_decisontree
##  [1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [61]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [73]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
table(onlinefood_decisontree)
## onlinefood_decisontree
## FALSE  TRUE 
##    12    66
prop.table(table(onlinefood_decisontree))
## onlinefood_decisontree
##     FALSE      TRUE 
## 0.1538462 0.8461538
  • There were 12 incorrectly classified feedbacks and 66 correctly classified feedbacks.

Comparing Machine Learning Models

svm_accuracy <- mean(o_food)
knn_accuracy <- mean(o_food_knn)
dt_accuracy <- mean(onlinefood_decisontree)
Accuracy
comparison <- data.frame(
  Model = c("SVM", "KNN", "Decision Tree"),
  Accuracy = c(svm_accuracy, knn_accuracy, dt_accuracy)
)
comparison
##           Model  Accuracy
## 1           SVM 0.8461538
## 2           KNN 0.9230769
## 3 Decision Tree 0.8461538
Accuracy Plot
library(ggplot2)
ggplot(comparison, aes(x = Model, y = Accuracy, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of Model Accuracies", x = "Model", y = "Accuracy") +
  theme_minimal()

Conclusion