The dataset contains information collected from an online food ordering platform over a period of time.
It shows Demographic Information, Location Information and Order details.
Attributes:
Demographic Information:
Age: Age of the customer.
Gender: Gender of the customer.
Marital Status: Marital status of the customer.
Occupation: Occupation of the customer.
Monthly Income: Monthly income of the customer.
Educational Qualifications: Educational qualifications of the customer.
Family Size: Number of individuals in the customer’s family.
Location Information:
Latitude: Latitude of the customer’s location.
Longitude: Longitude of the customer’s location.
Pin Code: Pin code of the customer’s location.
Order Details:
Output: Current status of the order (e.g., pending, confirmed, delivered).
Feedback: Feedback provided by the customer after receiving the order.
library(readr)
onlinefood <- read_csv("~/Downloads/CS 583/R_Project/onlinefood.csv")
## Rows: 388 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Gender, Marital Status, Occupation, Monthly Income, Educational Qua...
## dbl (5): Age, Family size, latitude, longitude, Pin code
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
onlinefood
## # A tibble: 388 × 12
## Age Gender `Marital Status` Occupation `Monthly Income`
## <dbl> <chr> <chr> <chr> <chr>
## 1 20 Female Single Student No Income
## 2 24 Female Single Student Below Rs.10000
## 3 22 Male Single Student Below Rs.10000
## 4 22 Female Single Student No Income
## 5 22 Male Single Student Below Rs.10000
## 6 27 Female Married Employee More than 50000
## 7 22 Male Single Student No Income
## 8 24 Female Single Student No Income
## 9 23 Female Single Student No Income
## 10 23 Female Single Student No Income
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## # latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## # Feedback <chr>
dim(onlinefood)
## [1] 388 12
summary(onlinefood)
## Age Gender Marital Status Occupation
## Min. :18.00 Length:388 Length:388 Length:388
## 1st Qu.:23.00 Class :character Class :character Class :character
## Median :24.00 Mode :character Mode :character Mode :character
## Mean :24.63
## 3rd Qu.:26.00
## Max. :33.00
## Monthly Income Educational Qualifications Family size latitude
## Length:388 Length:388 Min. :1.000 Min. :12.87
## Class :character Class :character 1st Qu.:2.000 1st Qu.:12.94
## Mode :character Mode :character Median :3.000 Median :12.98
## Mean :3.281 Mean :12.97
## 3rd Qu.:4.000 3rd Qu.:13.00
## Max. :6.000 Max. :13.10
## longitude Pin code Output Feedback
## Min. :77.48 Min. :560001 Length:388 Length:388
## 1st Qu.:77.57 1st Qu.:560011 Class :character Class :character
## Median :77.59 Median :560034 Mode :character Mode :character
## Mean :77.60 Mean :560040
## 3rd Qu.:77.63 3rd Qu.:560068
## Max. :77.76 Max. :560109
any(is.na(onlinefood))
## [1] FALSE
sapply(onlinefood, class)
## Age Gender
## "numeric" "character"
## Marital Status Occupation
## "character" "character"
## Monthly Income Educational Qualifications
## "character" "character"
## Family size latitude
## "numeric" "numeric"
## longitude Pin code
## "numeric" "numeric"
## Output Feedback
## "character" "character"
#checking proportions
table(onlinefood$Occupation)
##
## Employee House wife Self Employeed Student
## 118 9 54 207
prop.table(table(onlinefood$Occupation))
##
## Employee House wife Self Employeed Student
## 0.30412371 0.02319588 0.13917526 0.53350515
table(onlinefood$`Monthly Income`)
##
## 10001 to 25000 25001 to 50000 Below Rs.10000 More than 50000 No Income
## 45 69 25 62 187
prop.table(table(onlinefood$`Monthly Income`))
##
## 10001 to 25000 25001 to 50000 Below Rs.10000 More than 50000 No Income
## 0.11597938 0.17783505 0.06443299 0.15979381 0.48195876
table(onlinefood$`Marital Status`)
##
## Married Prefer not to say Single
## 108 12 268
prop.table(table(onlinefood$`Marital Status`))
##
## Married Prefer not to say Single
## 0.27835052 0.03092784 0.69072165
table(onlinefood$`Educational Qualifications`)
##
## Graduate Ph.D Post Graduate School Uneducated
## 177 23 174 12 2
prop.table(table(onlinefood$`Educational Qualifications`))
##
## Graduate Ph.D Post Graduate School Uneducated
## 0.456185567 0.059278351 0.448453608 0.030927835 0.005154639
#standard deviation
sapply(onlinefood, sd)
## Age Gender
## 2.97559266 NA
## Marital Status Occupation
## NA NA
## Monthly Income Educational Qualifications
## NA NA
## Family size latitude
## 1.35102494 0.04448925
## longitude Pin code
## 0.05135392 31.39960871
## Output Feedback
## NA NA
cor(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])
## Age Family size latitude longitude Pin code
## Age 1.000000000 0.169981512 0.006594549 0.04749956 0.137294125
## Family size 0.169981512 1.000000000 -0.053536733 0.07012644 -0.009402396
## latitude 0.006594549 -0.053536733 1.000000000 -0.14423361 -0.201813497
## longitude 0.047499559 0.070126439 -0.144233613 1.00000000 0.156119479
## Pin code 0.137294125 -0.009402396 -0.201813497 0.15611948 1.000000000
pairs(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])
library(psych)
pairs.panels(onlinefood[c('Age', 'Family size', 'latitude', 'longitude', 'Pin code')])
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(data = onlinefood, aes(x = `Educational Qualifications`, fill = `Educational Qualifications`)) +
geom_bar(stat = "count", color = "black") +
geom_text(stat = "count", aes(label = stat(count)), vjust = -0.5) +
labs(x = "Educational Qualifications", y = "Count", title = "Educational Qualifications Count") +
guides(fill = FALSE)
ggplot(data = onlinefood, aes(x = `Monthly Income`, fill = `Monthly Income`)) +
geom_bar(stat = "count", color = "black") +
geom_text(stat = "count", aes(label = stat(count)), vjust = -0.5) +
labs(x = "Monthly Income", y = "Count", title = "Monthly Income Count") +
guides(fill = FALSE)
ggplot(data = onlinefood, aes(x=Age)) +
geom_histogram(binwidth = 1, fill = "red", color="black") +
geom_text(stat = "count", aes(label=stat(count)), vjust = -0.5, color ="black") +
labs(x = "Age", y = "Frequency", title = "Age Distribution")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(explore)
##
## Attaching package: 'explore'
## The following object is masked from 'package:psych':
##
## describe
onlinefood %>% explore(Feedback, target=Output)
Positve yes: 94% of the people who said yes had a positive feedback possibly due to timely arrival of their food.
Positive no: 39.1% of the people who said no had a positive feedback possible due to satisfactory resolution to delivery issues or order cancellation.
Negative yes: 6% of the people who said yes had a negative feedback possibly due to complications with food delivery or dislike for the food.
Negative no: 60.9% of the people who said no had a negative feedback possibly due to order cancellation or the order never arriving.
onlinefood %>% explore(Feedback, target=Gender)
The females had slightly more positive feedback(83.7) than the males(80.2%).
The males had more negative feedback(19.8) than the females (16.3).
However there are more males than females in this dataset so regardless of the females having a higher percentage of positive feedbacks, the number of males were still higher than that of females for positive feedback.
ggplot(data = onlinefood, aes(x=`Family size`)) +
geom_histogram(binwidth = 1, fill = "blue", color="black") +
geom_text(stat = "count", aes(label=stat(count)), vjust = -0.5, color ="black") +
labs(x = "Family Size", y = "Frequency", title = "Family Size Distribution")
online_food <- onlinefood
online_food
## # A tibble: 388 × 12
## Age Gender `Marital Status` Occupation `Monthly Income`
## <dbl> <chr> <chr> <chr> <chr>
## 1 20 Female Single Student No Income
## 2 24 Female Single Student Below Rs.10000
## 3 22 Male Single Student Below Rs.10000
## 4 22 Female Single Student No Income
## 5 22 Male Single Student Below Rs.10000
## 6 27 Female Married Employee More than 50000
## 7 22 Male Single Student No Income
## 8 24 Female Single Student No Income
## 9 23 Female Single Student No Income
## 10 23 Female Single Student No Income
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## # latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## # Feedback <chr>
#gender
str(online_food$Gender)
## chr [1:388] "Female" "Female" "Male" "Female" "Male" "Female" "Male" ...
online_food$Gender<-factor(online_food$Gender, levels=c("Female","Male"), labels=c("Female", "Male"))
str(online_food$Gender)
## Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
#marital status
str(online_food$`Marital Status`)
## chr [1:388] "Single" "Single" "Single" "Single" "Single" "Married" ...
online_food$`Marital Status`<-factor(online_food$`Marital Status`, levels=c("Married", "Prefer not to say","Single"), labels=c("Married", "Prefer not to say","Single") )
str(online_food$`Marital Status`)
## Factor w/ 3 levels "Married","Prefer not to say",..: 3 3 3 3 3 1 3 3 3 3 ...
#Occupation
str(online_food$Occupation)
## chr [1:388] "Student" "Student" "Student" "Student" "Student" "Employee" ...
online_food$Occupation<-factor(online_food$Occupation, levels=c("Employee", "House wife", "Self Employeed", "Student"), labels=c("Employee", "House wife", "Self Employeed", "Student") )
str(online_food$Occupation)
## Factor w/ 4 levels "Employee","House wife",..: 4 4 4 4 4 1 4 4 4 4 ...
#Monthly Income
str(online_food$`Monthly Income`)
## chr [1:388] "No Income" "Below Rs.10000" "Below Rs.10000" "No Income" ...
online_food$`Monthly Income`<-factor(online_food$`Monthly Income`, levels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income"), labels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income") )
str(online_food$`Monthly Income`)
## Factor w/ 5 levels "10001 to 25000",..: 5 3 3 5 3 4 5 5 5 5 ...
#Educational Qualifications
str(online_food$`Educational Qualifications`)
## chr [1:388] "Post Graduate" "Graduate" "Post Graduate" "Graduate" ...
online_food$`Educational Qualifications`<-factor(online_food$`Educational Qualifications`, levels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"), labels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"))
str(online_food$`Educational Qualifications`)
## Factor w/ 5 levels "Graduate","Ph.D",..: 3 1 3 1 3 3 1 3 3 3 ...
#Output
str(online_food$Output)
## chr [1:388] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" ...
online_food$Output<-factor(online_food$Output, levels=c("No", "Yes"), labels=c("No", "Yes"))
str(online_food$Output)
## Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
#Feedback
str(online_food$Feedback)
## chr [1:388] "Positive" "Positive" "Negative" "Positive" "Positive" ...
online_food$Feedback<-factor(online_food$Feedback, levels=c("Negative", "Positive"), labels=c("Negative", "Positive"))
str(online_food$Feedback)
## Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
str(online_food)
## spc_tbl_ [388 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:388] 20 24 22 22 22 27 22 24 23 23 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
## $ Marital Status : Factor w/ 3 levels "Married","Prefer not to say",..: 3 3 3 3 3 1 3 3 3 3 ...
## $ Occupation : Factor w/ 4 levels "Employee","House wife",..: 4 4 4 4 4 1 4 4 4 4 ...
## $ Monthly Income : Factor w/ 5 levels "10001 to 25000",..: 5 3 3 5 3 4 5 5 5 5 ...
## $ Educational Qualifications: Factor w/ 5 levels "Graduate","Ph.D",..: 3 1 3 1 3 3 1 3 3 3 ...
## $ Family size : num [1:388] 4 3 3 6 4 2 3 3 2 4 ...
## $ latitude : num [1:388] 13 13 13 12.9 13 ...
## $ longitude : num [1:388] 77.6 77.6 77.7 77.6 77.6 ...
## $ Pin code : num [1:388] 560001 560009 560017 560019 560010 ...
## $ Output : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Feedback : Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Gender = col_character(),
## .. `Marital Status` = col_character(),
## .. Occupation = col_character(),
## .. `Monthly Income` = col_character(),
## .. `Educational Qualifications` = col_character(),
## .. `Family size` = col_double(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. `Pin code` = col_double(),
## .. Output = col_character(),
## .. Feedback = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
online_food$Feedback
## [1] Positive Positive Negative Positive Positive Positive Positive Positive
## [9] Positive Positive Positive Negative Positive Positive Positive Positive
## [17] Positive Negative Negative Positive Positive Positive Positive Positive
## [25] Positive Positive Positive Positive Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Negative Positive Positive
## [41] Negative Positive Positive Positive Positive Positive Positive Positive
## [49] Negative Positive Positive Positive Positive Positive Positive Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Positive
## [65] Positive Positive Positive Positive Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive Positive Positive
## [81] Positive Positive Positive Positive Positive Positive Positive Positive
## [89] Positive Negative Positive Positive Positive Positive Positive Positive
## [97] Positive Positive Positive Positive Positive Positive Positive Positive
## [105] Negative Positive Positive Positive Positive Positive Positive Positive
## [113] Positive Positive Positive Negative Positive Positive Positive Positive
## [121] Positive Positive Positive Negative Negative Positive Positive Positive
## [129] Positive Positive Positive Positive Positive Negative Positive Positive
## [137] Positive Negative Positive Positive Positive Negative Positive Positive
## [145] Negative Positive Positive Positive Positive Positive Positive Positive
## [153] Positive Positive Positive Positive Positive Positive Negative Positive
## [161] Positive Negative Positive Positive Positive Positive Negative Positive
## [169] Positive Negative Positive Positive Positive Positive Positive Positive
## [177] Positive Negative Negative Positive Positive Negative Positive Positive
## [185] Positive Positive Positive Positive Negative Positive Negative Positive
## [193] Positive Positive Positive Positive Positive Positive Positive Negative
## [201] Positive Positive Positive Positive Positive Positive Negative Positive
## [209] Positive Negative Negative Positive Negative Positive Positive Negative
## [217] Positive Negative Positive Positive Positive Positive Positive Positive
## [225] Positive Positive Positive Positive Negative Positive Positive Positive
## [233] Negative Positive Positive Negative Positive Positive Positive Positive
## [241] Positive Positive Positive Positive Negative Positive Negative Positive
## [249] Negative Positive Positive Positive Positive Positive Positive Positive
## [257] Negative Negative Positive Negative Positive Negative Negative Negative
## [265] Positive Positive Negative Positive Positive Positive Positive Negative
## [273] Positive Positive Positive Positive Positive Positive Negative Positive
## [281] Positive Positive Positive Positive Positive Positive Positive Negative
## [289] Positive Positive Positive Positive Negative Negative Negative Positive
## [297] Positive Positive Negative Negative Negative Positive Positive Positive
## [305] Positive Positive Positive Positive Positive Negative Positive Positive
## [313] Positive Positive Positive Negative Positive Positive Positive Negative
## [321] Positive Positive Negative Positive Positive Positive Positive Positive
## [329] Positive Positive Positive Positive Positive Negative Positive Positive
## [337] Negative Positive Positive Negative Positive Positive Negative Positive
## [345] Negative Positive Positive Positive Positive Negative Positive Positive
## [353] Positive Positive Positive Positive Positive Negative Positive Positive
## [361] Positive Negative Positive Positive Negative Positive Positive Positive
## [369] Positive Positive Positive Positive Positive Negative Negative Negative
## [377] Positive Negative Positive Positive Positive Positive Positive Positive
## [385] Positive Positive Positive Positive
## Levels: Negative Positive
table(online_food$Feedback)
##
## Negative Positive
## 71 317
prop.table(table(online_food$Feedback))
##
## Negative Positive
## 0.1829897 0.8170103
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:psych':
##
## alpha
onlinefood_svm<-online_food
normalize<-function(x){
return((x-min(x))/(max(x)-min(x)))
}
normalized_column<-c(1, 7, 8, 9, 10)
onlinefood_svm[normalized_column]<-(lapply(onlinefood_svm[normalized_column], normalize))
summary(onlinefood_svm)
## Age Gender Marital Status Occupation
## Min. :0.0000 Female:166 Married :108 Employee :118
## 1st Qu.:0.3333 Male :222 Prefer not to say: 12 House wife : 9
## Median :0.4000 Single :268 Self Employeed: 54
## Mean :0.4419 Student :207
## 3rd Qu.:0.5333
## Max. :1.0000
## Monthly Income Educational Qualifications Family size
## 10001 to 25000 : 45 Graduate :177 Min. :0.0000
## 25001 to 50000 : 69 Ph.D : 23 1st Qu.:0.2000
## Below Rs.10000 : 25 Post Graduate:174 Median :0.4000
## More than 50000: 62 School : 12 Mean :0.4562
## No Income :187 Uneducated : 2 3rd Qu.:0.6000
## Max. :1.0000
## latitude longitude Pin code Output Feedback
## Min. :0.0000 Min. :0.0000 Min. :0.00000 No : 87 Negative: 71
## 1st Qu.:0.3028 1st Qu.:0.2959 1st Qu.:0.09028 Yes:301 Positive:317
## Median :0.4721 Median :0.3938 Median :0.30093
## Mean :0.4513 Mean :0.4232 Mean :0.36216
## 3rd Qu.:0.5567 3rd Qu.:0.5354 3rd Qu.:0.62037
## Max. :1.0000 Max. :1.0000 Max. :1.00000
set.seed(42)
onlinefood_index_svm <- sample(1:nrow(onlinefood_svm), size = floor(0.80*nrow(onlinefood_svm)))
onlinefood_train_svm<-onlinefood_svm[onlinefood_index_svm, ]
onlinefood_test_svm<-onlinefood_svm[-onlinefood_index_svm, ]
onlinefood_classifier<-ksvm(Feedback~.,data=onlinefood_train_svm, kernel="vanilladot")
## Setting default kernel parameters
onlinefood_classifier
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Linear (vanilla) kernel function.
##
## Number of Support Vectors : 137
##
## Objective Function Value : -82
## Training error : 0.129032
onlinefood_predictions<-predict(onlinefood_classifier, onlinefood_test_svm)
onlinefood_predictions
## [1] Positive Positive Positive Positive Positive Positive Positive Positive
## [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Negative Negative Positive Positive
## [25] Positive Positive Positive Negative Positive Negative Negative Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Positive
## [41] Negative Positive Negative Positive Negative Negative Negative Positive
## [49] Positive Positive Negative Positive Positive Positive Positive Positive
## [57] Negative Negative Negative Positive Positive Negative Negative Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Negative
## [73] Positive Positive Positive Negative Negative Positive
## Levels: Negative Positive
table(onlinefood_predictions, onlinefood_test_svm$Feedback)
##
## onlinefood_predictions Negative Positive
## Negative 13 8
## Positive 4 53
o_food<-onlinefood_predictions==onlinefood_test_svm$Feedback
o_food
## [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
## [49] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE FALSE TRUE TRUE
table(o_food)
## o_food
## FALSE TRUE
## 12 66
prop.table(table(o_food))
## o_food
## FALSE TRUE
## 0.1538462 0.8461538
#15% incorrectly classified and 84% correctly classified
onlinefood_classifier_rbf<-ksvm(Feedback~.,data=onlinefood_train_svm, kernel="rbfdot")
onlinefood_classifier_rbf
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.0589947205070788
##
## Number of Support Vectors : 138
##
## Objective Function Value : -88.8184
## Training error : 0.122581
onlinefood_predictions_rbf<-predict(onlinefood_classifier_rbf,onlinefood_test_svm)
onlinefood_predictions_rbf
## [1] Positive Positive Positive Positive Positive Positive Positive Positive
## [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Positive
## [25] Positive Positive Positive Positive Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Positive
## [41] Positive Positive Positive Positive Positive Negative Positive Positive
## [49] Positive Positive Positive Positive Positive Positive Positive Positive
## [57] Negative Positive Positive Positive Positive Positive Positive Positive
## [65] Positive Positive Positive Positive Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
table(onlinefood_predictions_rbf, onlinefood_test_svm$Feedback)
##
## onlinefood_predictions_rbf Negative Positive
## Negative 3 0
## Positive 14 61
o_food_rbf<-onlinefood_predictions_rbf==onlinefood_test_svm$Feedback
o_food_rbf
## [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [61] TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE
table(o_food_rbf)
## o_food_rbf
## FALSE TRUE
## 14 64
There were 14 incorrectly classified feedbacks and 64 correctly classified feedbacks.
The linear kernel function performed better.
prop.table(table(o_food_rbf))
## o_food_rbf
## FALSE TRUE
## 0.1794872 0.8205128
library(gmodels)
CrossTable(x = onlinefood_test_svm$Feedback, y=onlinefood_predictions, prop.chisq=FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 78
##
##
## | onlinefood_predictions
## onlinefood_test_svm$Feedback | Negative | Positive | Row Total |
## -----------------------------|-----------|-----------|-----------|
## Negative | 13 | 4 | 17 |
## | 0.765 | 0.235 | 0.218 |
## | 0.619 | 0.070 | |
## | 0.167 | 0.051 | |
## -----------------------------|-----------|-----------|-----------|
## Positive | 8 | 53 | 61 |
## | 0.131 | 0.869 | 0.782 |
## | 0.381 | 0.930 | |
## | 0.103 | 0.679 | |
## -----------------------------|-----------|-----------|-----------|
## Column Total | 21 | 57 | 78 |
## | 0.269 | 0.731 | |
## -----------------------------|-----------|-----------|-----------|
##
##
onlinefood_knn<-onlinefood
onlinefood_knn
## # A tibble: 388 × 12
## Age Gender `Marital Status` Occupation `Monthly Income`
## <dbl> <chr> <chr> <chr> <chr>
## 1 20 Female Single Student No Income
## 2 24 Female Single Student Below Rs.10000
## 3 22 Male Single Student Below Rs.10000
## 4 22 Female Single Student No Income
## 5 22 Male Single Student Below Rs.10000
## 6 27 Female Married Employee More than 50000
## 7 22 Male Single Student No Income
## 8 24 Female Single Student No Income
## 9 23 Female Single Student No Income
## 10 23 Female Single Student No Income
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <chr>, `Family size` <dbl>,
## # latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <chr>,
## # Feedback <chr>
#tweak the factors for the factor columns
#Gender
str(onlinefood_knn$Gender)
## chr [1:388] "Female" "Female" "Male" "Female" "Male" "Female" "Male" ...
onlinefood_knn$Gender<-factor(onlinefood_knn$Gender, levels=c("Female","Male"), labels=c("1", "2"))
#1 for female, 2 for male
str(onlinefood_knn$Gender)
## Factor w/ 2 levels "1","2": 1 1 2 1 2 1 2 1 1 1 ...
#change from factor to numeric
onlinefood_knn$Gender<-as.numeric(onlinefood_knn$Gender)
#confirm
str(onlinefood_knn$Gender)
## num [1:388] 1 1 2 1 2 1 2 1 1 1 ...
#Marital Status
str(onlinefood_knn$`Marital Status`)
## chr [1:388] "Single" "Single" "Single" "Single" "Single" "Married" ...
onlinefood_knn$`Marital Status`<-factor(onlinefood_knn$`Marital Status`, levels=c("Married", "Prefer not to say","Single"), labels=c("1", "2","3") )
str(onlinefood_knn$`Marital Status`)
## Factor w/ 3 levels "1","2","3": 3 3 3 3 3 1 3 3 3 3 ...
#factor to numeric
onlinefood_knn$`Marital Status`<-as.numeric(onlinefood_knn$`Marital Status`)
#confirm
str(onlinefood_knn$`Marital Status`)
## num [1:388] 3 3 3 3 3 1 3 3 3 3 ...
#Occupation
str(onlinefood_knn$Occupation)
## chr [1:388] "Student" "Student" "Student" "Student" "Student" "Employee" ...
onlinefood_knn$Occupation<-factor(onlinefood_knn$Occupation, levels=c("Employee", "House wife", "Self Employeed", "Student"), labels=c("1", "2", "3", "4") )
str(onlinefood_knn$Occupation)
## Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 1 4 4 4 4 ...
#factor to numeric
onlinefood_knn$Occupation<-as.numeric(onlinefood_knn$Occupation)
#confirm
str(onlinefood_knn$Occupation)
## num [1:388] 4 4 4 4 4 1 4 4 4 4 ...
#Monthly Income
str(onlinefood_knn$`Monthly Income`)
## chr [1:388] "No Income" "Below Rs.10000" "Below Rs.10000" "No Income" ...
onlinefood_knn$`Monthly Income`<-factor(onlinefood_knn$`Monthly Income`, levels=c("10001 to 25000", "25001 to 50000", "Below Rs.10000", "More than 50000", "No Income"), labels=c("1", "2", "3", "4", "5") )
str(onlinefood_knn$`Monthly Income`)
## Factor w/ 5 levels "1","2","3","4",..: 5 3 3 5 3 4 5 5 5 5 ...
#factor to numeric
onlinefood_knn$`Monthly Income`<-as.numeric(onlinefood_knn$`Monthly Income`)
#confirm
str(onlinefood_knn$`Monthly Income`)
## num [1:388] 5 3 3 5 3 4 5 5 5 5 ...
#Educational Qualifications
str(onlinefood_knn$`Educational Qualifications`)
## chr [1:388] "Post Graduate" "Graduate" "Post Graduate" "Graduate" ...
onlinefood_knn$`Educational Qualifications`<-factor(onlinefood_knn$`Educational Qualifications`, levels=c("Graduate", "Ph.D", "Post Graduate", "School", "Uneducated"), labels=c("1", "2", "3", "4", "5"))
str(onlinefood_knn$`Educational Qualifications`)
## Factor w/ 5 levels "1","2","3","4",..: 3 1 3 1 3 3 1 3 3 3 ...
#factor to numeric
onlinefood_knn$`Educational Qualifications`<-as.numeric(onlinefood_knn$`Educational Qualifications`)
#confirm
str(onlinefood_knn$`Educational Qualifications`)
## num [1:388] 3 1 3 1 3 3 1 3 3 3 ...
#Output
str(onlinefood_knn$Output)
## chr [1:388] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" ...
onlinefood_knn$Output<-factor(onlinefood_knn$Output, levels=c("No", "Yes"), labels=c("1", "2"))
str(onlinefood_knn$Output)
## Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
#factor to numeric
onlinefood_knn$Output<-as.numeric(onlinefood_knn$Output)
#confirm
str(onlinefood_knn$Output)
## num [1:388] 2 2 2 2 2 2 2 2 2 2 ...
#Feedback
str(onlinefood_knn$Feedback)
## chr [1:388] "Positive" "Positive" "Negative" "Positive" "Positive" ...
onlinefood_knn$Feedback<-factor(onlinefood_knn$Feedback, levels=c("Negative", "Positive"), labels=c("Negative", "Positive"))
str(onlinefood_knn$Feedback)
## Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
Check structure of dataset
str(onlinefood_knn)
## spc_tbl_ [388 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:388] 20 24 22 22 22 27 22 24 23 23 ...
## $ Gender : num [1:388] 1 1 2 1 2 1 2 1 1 1 ...
## $ Marital Status : num [1:388] 3 3 3 3 3 1 3 3 3 3 ...
## $ Occupation : num [1:388] 4 4 4 4 4 1 4 4 4 4 ...
## $ Monthly Income : num [1:388] 5 3 3 5 3 4 5 5 5 5 ...
## $ Educational Qualifications: num [1:388] 3 1 3 1 3 3 1 3 3 3 ...
## $ Family size : num [1:388] 4 3 3 6 4 2 3 3 2 4 ...
## $ latitude : num [1:388] 13 13 13 12.9 13 ...
## $ longitude : num [1:388] 77.6 77.6 77.7 77.6 77.6 ...
## $ Pin code : num [1:388] 560001 560009 560017 560019 560010 ...
## $ Output : num [1:388] 2 2 2 2 2 2 2 2 2 2 ...
## $ Feedback : Factor w/ 2 levels "Negative","Positive": 2 2 1 2 2 2 2 2 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Gender = col_character(),
## .. `Marital Status` = col_character(),
## .. Occupation = col_character(),
## .. `Monthly Income` = col_character(),
## .. `Educational Qualifications` = col_character(),
## .. `Family size` = col_double(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. `Pin code` = col_double(),
## .. Output = col_character(),
## .. Feedback = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
normalize<-function(x){
return((x-min(x))/(max(x)-min(x)))
}
#before that, create a new dataset without the labels and one with labels
summary(onlinefood_knn)
## Age Gender Marital Status Occupation
## Min. :18.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:23.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :24.00 Median :2.000 Median :3.000 Median :4.000
## Mean :24.63 Mean :1.572 Mean :2.412 Mean :2.902
## 3rd Qu.:26.00 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :33.00 Max. :2.000 Max. :3.000 Max. :4.000
## Monthly Income Educational Qualifications Family size latitude
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :12.87
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:2.000 1st Qu.:12.94
## Median :4.000 Median :2.00 Median :3.000 Median :12.98
## Mean :3.714 Mean :2.07 Mean :3.281 Mean :12.97
## 3rd Qu.:5.000 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:13.00
## Max. :5.000 Max. :5.00 Max. :6.000 Max. :13.10
## longitude Pin code Output Feedback
## Min. :77.48 Min. :560001 Min. :1.000 Negative: 71
## 1st Qu.:77.57 1st Qu.:560011 1st Qu.:2.000 Positive:317
## Median :77.59 Median :560034 Median :2.000
## Mean :77.60 Mean :560040 Mean :1.776
## 3rd Qu.:77.63 3rd Qu.:560068 3rd Qu.:2.000
## Max. :77.76 Max. :560109 Max. :2.000
online_food1<-onlinefood_knn[-12]
online_food1#dataset without dependent variable
## # A tibble: 388 × 11
## Age Gender `Marital Status` Occupation `Monthly Income`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20 1 3 4 5
## 2 24 1 3 4 3
## 3 22 2 3 4 3
## 4 22 1 3 4 5
## 5 22 2 3 4 3
## 6 27 1 1 1 4
## 7 22 2 3 4 5
## 8 24 1 3 4 5
## 9 23 1 3 4 5
## 10 23 1 3 4 5
## # ℹ 378 more rows
## # ℹ 6 more variables: `Educational Qualifications` <dbl>, `Family size` <dbl>,
## # latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <dbl>
summary(online_food1)
## Age Gender Marital Status Occupation
## Min. :18.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:23.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :24.00 Median :2.000 Median :3.000 Median :4.000
## Mean :24.63 Mean :1.572 Mean :2.412 Mean :2.902
## 3rd Qu.:26.00 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :33.00 Max. :2.000 Max. :3.000 Max. :4.000
## Monthly Income Educational Qualifications Family size latitude
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :12.87
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:2.000 1st Qu.:12.94
## Median :4.000 Median :2.00 Median :3.000 Median :12.98
## Mean :3.714 Mean :2.07 Mean :3.281 Mean :12.97
## 3rd Qu.:5.000 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:13.00
## Max. :5.000 Max. :5.00 Max. :6.000 Max. :13.10
## longitude Pin code Output
## Min. :77.48 Min. :560001 Min. :1.000
## 1st Qu.:77.57 1st Qu.:560011 1st Qu.:2.000
## Median :77.59 Median :560034 Median :2.000
## Mean :77.60 Mean :560040 Mean :1.776
## 3rd Qu.:77.63 3rd Qu.:560068 3rd Qu.:2.000
## Max. :77.76 Max. :560109 Max. :2.000
Standardize the dataset without labels
online_food1<-as.data.frame(lapply(online_food1, normalize))
summary(online_food1)
## Age Gender Marital.Status Occupation
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.3333 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.4000 Median :1.0000 Median :1.0000 Median :1.000
## Mean :0.4419 Mean :0.5722 Mean :0.7062 Mean :0.634
## 3rd Qu.:0.5333 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## Monthly.Income Educational.Qualifications Family.size latitude
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2500 1st Qu.:0.0000 1st Qu.:0.2000 1st Qu.:0.3028
## Median :0.7500 Median :0.2500 Median :0.4000 Median :0.4721
## Mean :0.6785 Mean :0.2674 Mean :0.4562 Mean :0.4513
## 3rd Qu.:1.0000 3rd Qu.:0.5000 3rd Qu.:0.6000 3rd Qu.:0.5567
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## longitude Pin.code Output
## Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.2959 1st Qu.:0.09028 1st Qu.:1.0000
## Median :0.3938 Median :0.30093 Median :1.0000
## Mean :0.4232 Mean :0.36216 Mean :0.7758
## 3rd Qu.:0.5354 3rd Qu.:0.62037 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000
Labelled Dataset
onlinefood_labels<-online_food
onlinefood_labels
## # A tibble: 388 × 12
## Age Gender `Marital Status` Occupation `Monthly Income`
## <dbl> <fct> <fct> <fct> <fct>
## 1 20 Female Single Student No Income
## 2 24 Female Single Student Below Rs.10000
## 3 22 Male Single Student Below Rs.10000
## 4 22 Female Single Student No Income
## 5 22 Male Single Student Below Rs.10000
## 6 27 Female Married Employee More than 50000
## 7 22 Male Single Student No Income
## 8 24 Female Single Student No Income
## 9 23 Female Single Student No Income
## 10 23 Female Single Student No Income
## # ℹ 378 more rows
## # ℹ 7 more variables: `Educational Qualifications` <fct>, `Family size` <dbl>,
## # latitude <dbl>, longitude <dbl>, `Pin code` <dbl>, Output <fct>,
## # Feedback <fct>
onlinefood_train<-online_food1[1:310, ]
onlinefood_test<-online_food1[311:388, ]
onlinefood_train_labels<-onlinefood_labels[1:310, 12]
onlinefood_test_labels<-onlinefood_labels[311:388, 12]
library(class)
predictions <- list()
# Loop through each k from 2 to 18
for (k in 2:18) {
# Run knn with the current value of k
predictions[[paste("k=", k, sep="")]] <- knn(train=onlinefood_train, test=onlinefood_test, cl=onlinefood_train_labels$Feedback, k=k)
}
predictions
## $`k=2`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Positive Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Positive
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Positive
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Positive Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
##
## $`k=3`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
##
## $`k=4`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Positive
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Negative Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=5`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=6`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
##
## $`k=7`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Negative Positive
## Levels: Negative Positive
##
## $`k=8`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=9`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=10`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=11`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=12`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=13`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=14`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=15`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Negative Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=16`
## [1] Positive Positive Positive Positive Positive Positive Positive Positive
## [9] Positive Negative Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Negative Positive Positive
## [33] Positive Positive Negative Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Negative Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=17`
## [1] Positive Positive Positive Positive Positive Negative Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Negative Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
##
## $`k=18`
## [1] Positive Positive Positive Positive Positive Positive Positive Positive
## [9] Positive Positive Positive Positive Negative Positive Positive Positive
## [17] Positive Positive Positive Positive Positive Positive Positive Negative
## [25] Positive Positive Negative Negative Positive Positive Positive Positive
## [33] Positive Positive Positive Positive Positive Positive Positive Negative
## [41] Positive Positive Negative Negative Positive Positive Positive Negative
## [49] Positive Positive Positive Positive Positive Positive Negative Positive
## [57] Positive Positive Positive Positive Positive Positive Positive Negative
## [65] Positive Positive Positive Negative Positive Positive Positive Positive
## [73] Positive Positive Positive Positive Positive Positive
## Levels: Negative Positive
library(dplyr)
accuracies <- sapply(predictions, function(pred, true_labels) {
mean(pred == true_labels)
}, true_labels = onlinefood_test_labels$Feedback)
accuracy_data <- data.frame(
k = 2:18,
Accuracy = accuracies
)
library(ggplot2)
ggplot(accuracy_data, aes(x = k, y = Accuracy)) +
geom_line() + # Line plot to show the trend
geom_point() + # Points to mark each k-value's accuracy
labs(title = "Accuracy vs. k in k-NN", x = "k (Number of Neighbors)", y = "Accuracy") +
theme_minimal()
library(gmodels)
CrossTable(x = onlinefood_test_labels$Feedback, y=predictions$`k=9`, prop.chisq=FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 78
##
##
## | predictions$`k=9`
## onlinefood_test_labels$Feedback | Negative | Positive | Row Total |
## --------------------------------|-----------|-----------|-----------|
## Negative | 14 | 2 | 16 |
## | 0.875 | 0.125 | 0.205 |
## | 0.778 | 0.033 | |
## | 0.179 | 0.026 | |
## --------------------------------|-----------|-----------|-----------|
## Positive | 4 | 58 | 62 |
## | 0.065 | 0.935 | 0.795 |
## | 0.222 | 0.967 | |
## | 0.051 | 0.744 | |
## --------------------------------|-----------|-----------|-----------|
## Column Total | 18 | 60 | 78 |
## | 0.231 | 0.769 | |
## --------------------------------|-----------|-----------|-----------|
##
##
table(predictions$`k=9`, onlinefood_test_labels$Feedback)
##
## Negative Positive
## Negative 14 4
## Positive 2 58
o_food_knn<-predictions$`k=9`==onlinefood_test_labels$Feedback
o_food_knn
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE
table(o_food_knn)
## o_food_knn
## FALSE TRUE
## 6 72
suppressPackageStartupMessages(library(rattle))
suppressPackageStartupMessages(library(rpart.plot))
library(RColorBrewer)
onlinefood_DT<-online_food
set.seed(42)
training_indices_DT <- sample(1:nrow(onlinefood_DT), 0.8 * nrow(onlinefood_DT))
traindata_DT <- onlinefood_DT[training_indices_DT, ]
testdata_DT <- onlinefood_DT[-training_indices_DT, ]
tree_model_train <- rpart(Feedback ~ ., data = traindata_DT, method = "class")
tree_model_train
## n= 310
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 310 54 Positive (0.17419355 0.82580645)
## 2) Output=No 66 26 Negative (0.60606061 0.39393939)
## 4) Pin code>=560066.5 16 2 Negative (0.87500000 0.12500000) *
## 5) Pin code< 560066.5 50 24 Negative (0.52000000 0.48000000)
## 10) Pin code< 560059.5 43 17 Negative (0.60465116 0.39534884)
## 20) Occupation=Employee,Student 33 10 Negative (0.69696970 0.30303030) *
## 21) Occupation=House wife,Self Employeed 10 3 Positive (0.30000000 0.70000000) *
## 11) Pin code>=560059.5 7 0 Positive (0.00000000 1.00000000) *
## 3) Output=Yes 244 14 Positive (0.05737705 0.94262295) *
fancyRpartPlot(tree_model_train)
predictions_DT <- predict(tree_model_train, testdata_DT, type = "class")
predictions_DT
## 1 2 3 4 5 6 7 8
## Positive Positive Positive Positive Positive Positive Positive Positive
## 9 10 11 12 13 14 15 16
## Positive Positive Positive Positive Positive Positive Positive Positive
## 17 18 19 20 21 22 23 24
## Positive Positive Positive Positive Negative Negative Positive Positive
## 25 26 27 28 29 30 31 32
## Positive Positive Positive Positive Positive Negative Negative Positive
## 33 34 35 36 37 38 39 40
## Positive Positive Positive Positive Positive Positive Positive Positive
## 41 42 43 44 45 46 47 48
## Negative Positive Negative Positive Negative Negative Positive Positive
## 49 50 51 52 53 54 55 56
## Positive Positive Positive Positive Positive Positive Positive Positive
## 57 58 59 60 61 62 63 64
## Negative Positive Negative Positive Positive Negative Negative Negative
## 65 66 67 68 69 70 71 72
## Positive Positive Positive Negative Positive Positive Positive Negative
## 73 74 75 76 77 78
## Positive Positive Positive Negative Negative Positive
## Levels: Negative Positive
table(predictions_DT)
## predictions_DT
## Negative Positive
## 17 61
library(gmodels)
CrossTable(x=testdata_DT$Feedback, y=predictions_DT, prop.chisq = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 78
##
##
## | predictions_DT
## testdata_DT$Feedback | Negative | Positive | Row Total |
## ---------------------|-----------|-----------|-----------|
## Negative | 11 | 6 | 17 |
## | 0.647 | 0.353 | 0.218 |
## | 0.647 | 0.098 | |
## | 0.141 | 0.077 | |
## ---------------------|-----------|-----------|-----------|
## Positive | 6 | 55 | 61 |
## | 0.098 | 0.902 | 0.782 |
## | 0.353 | 0.902 | |
## | 0.077 | 0.705 | |
## ---------------------|-----------|-----------|-----------|
## Column Total | 17 | 61 | 78 |
## | 0.218 | 0.782 | |
## ---------------------|-----------|-----------|-----------|
##
##
onlinefood_decisontree<-predictions_DT==testdata_DT$Feedback
onlinefood_decisontree
## [1] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [25] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE FALSE TRUE TRUE
table(onlinefood_decisontree)
## onlinefood_decisontree
## FALSE TRUE
## 12 66
prop.table(table(onlinefood_decisontree))
## onlinefood_decisontree
## FALSE TRUE
## 0.1538462 0.8461538
svm_accuracy <- mean(o_food)
knn_accuracy <- mean(o_food_knn)
dt_accuracy <- mean(onlinefood_decisontree)
comparison <- data.frame(
Model = c("SVM", "KNN", "Decision Tree"),
Accuracy = c(svm_accuracy, knn_accuracy, dt_accuracy)
)
comparison
## Model Accuracy
## 1 SVM 0.8461538
## 2 KNN 0.9230769
## 3 Decision Tree 0.8461538
library(ggplot2)
ggplot(comparison, aes(x = Model, y = Accuracy, fill = Model)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Model Accuracies", x = "Model", y = "Accuracy") +
theme_minimal()