library(ggplot2) # Data visualization
## Warning: package 'ggplot2' was built under R version 3.5.1
library(readr) # CSV file I/O, e.g. the read_csv function
library("e1071") # SVM library 
## Warning: package 'e1071' was built under R version 3.5.1
# Importing training and testing data from current working directory
traindata<-read.csv("D:\\Projects\\Level2\\HumanResource\\UCI HAR Dataset\\NewData\\train.csv",header=T)

testdata<-read.csv("D:\\Projects\\Level2\\HumanResource\\UCI HAR Dataset\\NewData\\test.csv",header=T)
# Combining the training and testing dataset


data<-rbind(traindata,testdata)
#Applying nameing transformation to entire dataset
nameVec <- make.names(names(data),unique=TRUE)
names(data) <- nameVec
#Splitting data into training and testing dataset
traindata<-data[1:7352,]
testdata<-data[-c(1:7352),]
# Generating dimensions of entire dataset
dim(data)
## [1] 10299   563
# Applying Principal Component Analysis Technique to reduce number of dimensions of dataset
pc <- prcomp(traindata[,-563], center=TRUE, scale=TRUE)
pc.var <- pc$sdev^2
pc.pvar <- pc.var/sum(pc.var)
# Plotting Cummulative proportions of Principal Components to decide number of components to be taken into consideration
plot(cumsum(pc.pvar),xlab="Principal component", ylab="Cumulative Proportion of variance explained",type='b',main="Principal Components proportions",col="red")
abline(h=0.95)
abline(v=100)

# First 100 principal components explain 95% of variance in dataset
# Selecting first 100 principal components 
train.data<-data.frame(activity=traindata$Activity,pc$x)
train.data<-train.data[,1:100]
#Training our model with Support Vector Machine Algorithm 

svm_model <- svm(activity ~ ., data=train.data)
#Preparing testing data for modelling with PCA(Principal Component Analysis)
test.data<-predict(pc,newdata=testdata)
test.data<-as.data.frame(test.data)
test.data<-test.data[,1:100]
#Predicting testing data with train SVM model
result<-predict(svm_model,test.data,type="class")
#Generating Confusion Matrix
test.data$Activity=testdata$Activity
references<-test.data$Activity
t<-table(references,result)
t
##                     result
## references           LAYING SITTING STANDING WALKING WALKING_DOWNSTAIRS
##   LAYING                534       0        0       0                  3
##   SITTING                 4     419       63       0                  4
##   STANDING                0      29      500       1                  2
##   WALKING                 0       0        0     477                 19
##   WALKING_DOWNSTAIRS      0       0        0       3                409
##   WALKING_UPSTAIRS        0       0        0      21                 23
##                     result
## references           WALKING_UPSTAIRS
##   LAYING                            0
##   SITTING                           1
##   STANDING                          0
##   WALKING                           0
##   WALKING_DOWNSTAIRS                8
##   WALKING_UPSTAIRS                427
#Calculating error from confusion matrix
Accuracy <- (t[1,1]+t[2,2]+t[3,3]+t[4,4]+t[5,5]+t[6,6])/sum(t)
AccuracyRate <- Accuracy*100
c("Accuracy",AccuracyRate)
## [1] "Accuracy"         "93.8581608415338"