This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(reshape2)
library(class)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
library(naivebayes)
## naivebayes 0.9.6 loaded
##
## Attaching package: 'naivebayes'
## The following object is masked from 'package:data.table':
##
## tables
getwd()
## [1] "C:/Users/prasnaya/Desktop/Personal/python/Data_Science/DS With R-Saharan/Projects"
data<-read.csv("locations_data.csv")
str(data)
## 'data.frame': 2184 obs. of 7 variables:
## $ month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int 4 4 4 4 4 4 4 4 4 4 ...
## $ weekday : Factor w/ 7 levels "friday","monday",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ daytype : Factor w/ 2 levels "weekday","weekend": 1 1 1 1 1 1 1 1 1 1 ...
## $ hour : int 0 1 2 3 4 5 6 7 8 9 ...
## $ hourtype: Factor w/ 4 levels "afternoon","evening",..: 4 4 4 4 4 4 3 3 3 3 ...
## $ location: Factor w/ 7 levels "appointment",..: 3 3 3 3 3 3 3 3 3 4 ...
set.seed(1000)
rand_run<- runif(nrow(data))
data<-data[order(rand_run),]
head(data)
scatter <- ggplot(data=subset(data,daytype=="weekend"), aes(x = weekday, y = hourtype))
scatter + geom_point(aes(color=location, shape=daytype)) +
xlab("days") + ylab("hourtype") +
ggtitle("location")
scatter <- ggplot(data=subset(data,daytype=="weekday"), aes(x = weekday, y = hourtype))
scatter + geom_point(aes(color=location, shape=daytype)) +
xlab("days") + ylab("hourtype") +
ggtitle("location")
data_barchat <- data %>%
count(daytype,hourtype,location) %>%
rename(avg_count = n)
bar1 <- ggplot(data=data_barchat, aes(x=location, y=avg_count, fill=hourtype))
bar1 + geom_bar(stat="identity", position="dodge") +
scale_fill_manual(values=c("orange", "blue", "darkgreen", "purple"),
name="Measurements"
)
bar1 <- ggplot(data=data_barchat, aes(x=location, y=avg_count, fill=daytype))
bar1 + geom_bar(stat="identity", position="dodge") +
scale_fill_manual(values=c("orange", "blue", "darkgreen", "purple"),
name="Measurements"
)
ggtitle("location")
## $title
## [1] "location"
##
## attr(,"class")
## [1] "labels"
nrow(data)
## [1] 2184
training_count <-nrow(data)*.75
training_count
## [1] 1638
training_data<-data[1:training_count,]
str(training_data)
## 'data.frame': 1638 obs. of 7 variables:
## $ month : int 2 1 3 3 2 3 1 3 3 3 ...
## $ day : int 22 24 24 11 15 22 30 24 6 24 ...
## $ weekday : Factor w/ 7 levels "friday","monday",..: 7 6 1 3 7 7 2 1 2 1 ...
## $ daytype : Factor w/ 2 levels "weekday","weekend": 1 1 1 2 1 1 1 1 1 1 ...
## $ hour : int 14 15 6 12 21 4 0 8 11 20 ...
## $ hourtype: Factor w/ 4 levels "afternoon","evening",..: 1 1 3 1 2 4 4 3 3 2 ...
## $ location: Factor w/ 7 levels "appointment",..: 4 4 3 3 3 3 3 2 4 3 ...
testing_data_label<-data[(training_count+1):2184,6]
testing_data_label1<-data[2174:2184,6]
testing_data<-data[(training_count+1):2184,1:4]
testing_data1<-data[2174:2184,1:4]
str(testing_data)
## 'data.frame': 546 obs. of 4 variables:
## $ month : int 2 3 2 2 2 3 1 2 3 1 ...
## $ day : int 25 8 11 4 9 4 28 22 1 26 ...
## $ weekday: Factor w/ 7 levels "friday","monday",..: 3 7 3 3 5 3 3 7 7 5 ...
## $ daytype: Factor w/ 2 levels "weekday","weekend": 2 1 2 2 1 2 2 1 1 1 ...
location_modell <- naive_bayes(location~daytype, data = training_data)
## Warning: naive_bayes(): Feature daytype - zero probabilities are present.
## Consider Laplace smoothing.
location_modell
##
## ================================ Naive Bayes =================================
##
## Call:
## naive_bayes.formula(formula = location ~ daytype, data = training_data)
##
## ------------------------------------------------------------------------------
##
## Laplace smoothing: 0
##
## ------------------------------------------------------------------------------
##
## A priori probabilities:
##
## appointment campus home office restaurant store
## 0.003052503 0.031135531 0.727716728 0.176434676 0.039682540 0.018925519
## theater
## 0.003052503
##
## ------------------------------------------------------------------------------
##
## Tables:
##
## ------------------------------------------------------------------------------
## ::: daytype (Bernoulli)
## ------------------------------------------------------------------------------
##
## daytype appointment campus home office restaurant store
## weekday 0.2000000 1.0000000 0.6224832 1.0000000 0.8000000 0.7096774
## weekend 0.8000000 0.0000000 0.3775168 0.0000000 0.2000000 0.2903226
##
## daytype theater
## weekday 0.0000000
## weekend 1.0000000
##
## ------------------------------------------------------------------------------
location_model2 <- naive_bayes(location~daytype + hourtype, data = training_data)
## Warning: naive_bayes(): Feature daytype - zero probabilities are present.
## Consider Laplace smoothing.
## Warning: naive_bayes(): Feature hourtype - zero probabilities are present.
## Consider Laplace smoothing.
location_model2
##
## ================================ Naive Bayes =================================
##
## Call:
## naive_bayes.formula(formula = location ~ daytype + hourtype,
## data = training_data)
##
## ------------------------------------------------------------------------------
##
## Laplace smoothing: 0
##
## ------------------------------------------------------------------------------
##
## A priori probabilities:
##
## appointment campus home office restaurant store
## 0.003052503 0.031135531 0.727716728 0.176434676 0.039682540 0.018925519
## theater
## 0.003052503
##
## ------------------------------------------------------------------------------
##
## Tables:
##
## ------------------------------------------------------------------------------
## ::: daytype (Bernoulli)
## ------------------------------------------------------------------------------
##
## daytype appointment campus home office restaurant store
## weekday 0.2000000 1.0000000 0.6224832 1.0000000 0.8000000 0.7096774
## weekend 0.8000000 0.0000000 0.3775168 0.0000000 0.2000000 0.2903226
##
## daytype theater
## weekday 0.0000000
## weekend 1.0000000
##
## ------------------------------------------------------------------------------
## ::: hourtype (Categorical)
## ------------------------------------------------------------------------------
##
## hourtype appointment campus home office restaurant
## afternoon 0.80000000 0.52941176 0.11409396 0.67474048 0.52307692
## evening 0.20000000 0.00000000 0.19463087 0.03806228 0.15384615
## morning 0.00000000 0.47058824 0.22651007 0.28719723 0.30769231
## night 0.00000000 0.00000000 0.46476510 0.00000000 0.01538462
##
## hourtype store theater
## afternoon 0.06451613 0.00000000
## evening 0.90322581 1.00000000
## morning 0.00000000 0.00000000
## night 0.03225806 0.00000000
##
## ------------------------------------------------------------------------------
location_model3 <- naive_bayes(location~daytype + hourtype, data = training_data,laplace = 1)
location_model3
##
## ================================ Naive Bayes =================================
##
## Call:
## naive_bayes.formula(formula = location ~ daytype + hourtype,
## data = training_data, laplace = 1)
##
## ------------------------------------------------------------------------------
##
## Laplace smoothing: 1
##
## ------------------------------------------------------------------------------
##
## A priori probabilities:
##
## appointment campus home office restaurant store
## 0.003052503 0.031135531 0.727716728 0.176434676 0.039682540 0.018925519
## theater
## 0.003052503
##
## ------------------------------------------------------------------------------
##
## Tables:
##
## ------------------------------------------------------------------------------
## ::: daytype (Bernoulli)
## ------------------------------------------------------------------------------
##
## daytype appointment campus home office restaurant
## weekday 0.285714286 0.981132075 0.622278057 0.996563574 0.791044776
## weekend 0.714285714 0.018867925 0.377721943 0.003436426 0.208955224
##
## daytype store theater
## weekday 0.696969697 0.142857143
## weekend 0.303030303 0.857142857
##
## ------------------------------------------------------------------------------
## ::: hourtype (Categorical)
## ------------------------------------------------------------------------------
##
## hourtype appointment campus home office restaurant
## afternoon 0.555555556 0.509090909 0.114548495 0.668941980 0.507246377
## evening 0.222222222 0.018181818 0.194816054 0.040955631 0.159420290
## morning 0.111111111 0.454545455 0.226588629 0.286689420 0.304347826
## night 0.111111111 0.018181818 0.464046823 0.003412969 0.028985507
##
## hourtype store theater
## afternoon 0.085714286 0.111111111
## evening 0.828571429 0.666666667
## morning 0.028571429 0.111111111
## night 0.057142857 0.111111111
##
## ------------------------------------------------------------------------------
location_model3
##
## ================================ Naive Bayes =================================
##
## Call:
## naive_bayes.formula(formula = location ~ daytype + hourtype,
## data = training_data, laplace = 1)
##
## ------------------------------------------------------------------------------
##
## Laplace smoothing: 1
##
## ------------------------------------------------------------------------------
##
## A priori probabilities:
##
## appointment campus home office restaurant store
## 0.003052503 0.031135531 0.727716728 0.176434676 0.039682540 0.018925519
## theater
## 0.003052503
##
## ------------------------------------------------------------------------------
##
## Tables:
##
## ------------------------------------------------------------------------------
## ::: daytype (Bernoulli)
## ------------------------------------------------------------------------------
##
## daytype appointment campus home office restaurant
## weekday 0.285714286 0.981132075 0.622278057 0.996563574 0.791044776
## weekend 0.714285714 0.018867925 0.377721943 0.003436426 0.208955224
##
## daytype store theater
## weekday 0.696969697 0.142857143
## weekend 0.303030303 0.857142857
##
## ------------------------------------------------------------------------------
## ::: hourtype (Categorical)
## ------------------------------------------------------------------------------
##
## hourtype appointment campus home office restaurant
## afternoon 0.555555556 0.509090909 0.114548495 0.668941980 0.507246377
## evening 0.222222222 0.018181818 0.194816054 0.040955631 0.159420290
## morning 0.111111111 0.454545455 0.226588629 0.286689420 0.304347826
## night 0.111111111 0.018181818 0.464046823 0.003412969 0.028985507
##
## hourtype store theater
## afternoon 0.085714286 0.111111111
## evening 0.828571429 0.666666667
## morning 0.028571429 0.111111111
## night 0.057142857 0.111111111
##
## ------------------------------------------------------------------------------
test_result1<-predict(location_model3,testing_data)
## Warning: predict.naive_bayes(): Only 1 feature(s) out of 2 defined in the naive_bayes object "location_model3" are used for prediction.
## Warning: predict.naive_bayes(): More features in the newdata are provided
## as there are probability tables in the object. Calculation is performed
## based on features to be found in the tables.
test_result2<-predict(location_model2,testing_data)
## Warning: predict.naive_bayes(): Only 1 feature(s) out of 2 defined in the naive_bayes object "location_model2" are used for prediction.
## Warning: predict.naive_bayes(): More features in the newdata are provided
## as there are probability tables in the object. Calculation is performed
## based on features to be found in the tables.
testing_data_label1
## [1] afternoon night night evening morning night evening
## [8] evening morning afternoon evening
## Levels: afternoon evening morning night
test_result3<-predict(location_modell,testing_data1)
## Warning: predict.naive_bayes(): More features in the newdata are provided
## as there are probability tables in the object. Calculation is performed
## based on features to be found in the tables.
test_result3
## [1] home home home home home home home home home home home
## Levels: appointment campus home office restaurant store theater
testing_data_label1
## [1] afternoon night night evening morning night evening
## [8] evening morning afternoon evening
## Levels: afternoon evening morning night
library(gmodels)
CrossTable(test_result3, testing_data_label1,
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## |-------------------------|
##
##
## Total Observations in Table: 11
##
##
## | testing_data_label1
## test_result3 | afternoon | evening | morning | night | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## home | 2 | 4 | 2 | 3 | 11 |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 2 | 4 | 2 | 3 | 11 |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##