BB Week 10
covid<- read.csv("covid-19-survey-responses-sample.csv", header = TRUE)
covid<-subset(covid,select=(names(covid[12:ncol(covid)-2])))
intrain<- createDataPartition(y = covid$q03_symptoms, p=.7, list=FALSE)
training <- covid[intrain,]
test<- covid[-intrain, ]
dim(training)
## [1] 15 11
dim(test)
## [1] 5 11
anyNA(covid)
## [1] FALSE
glimpse(covid)
## Observations: 20
## Variables: 11
## $ q01_exposed <fct> f, f, f, f, f, f, f, f, f, f, f, f, f, f,...
## $ q02_quarantine <fct> t, t, t, t, t, f, t, f, t, f, t, f, t, t,...
## $ q03_symptoms <fct> f, f, f, f, f, f, f, f, f, f, f, f, f, t,...
## $ q04_tested <fct> no_i_never_needed_it, no_i_never_needed_i...
## $ q05_denied_testing <fct> , , , , , , , , , , , , , , , i_had_no_kn...
## $ q06_family_positive <fct> no, no, no, no, no, no, no, no, no, no, n...
## $ q07_keep_med_appt <fct> no_appointments_were_cancelled, not_appli...
## $ q08_public_transportation <fct> no_never_used_it, no_never_used_it, no_ne...
## $ q09_perfer_med_appts <fct> at_home_phone_call_video_chat, at_home_ph...
## $ q10_prefer_med_treatments <fct> at_home, at_a_hospital, at_home, at_a_hos...
## $ q11_prefer_labs <fct> at_home, at_a_hospital, at_home, at_a_hos...
trctrl<- trainControl(method="repeatedcv", number=10,repeats=3)
svm_lin<- caret::train(q03_symptoms~., data=training, method="svmLinear",
trControl=trctrl, preProcess=c("center","scale"),
tuneLength =10)
svm_lin
## Support Vector Machines with Linear Kernel
##
## 15 samples
## 10 predictors
## 2 classes: 'f', 't'
##
## Pre-processing: centered (26), scaled (26)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 13, 14, 14, 12, 14, 14, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8666667 0.5
##
## Tuning parameter 'C' was held constant at a value of 1
test_pred<- predict(svm_lin, newdata=test)
test_pred
## [1] f f f f f
## Levels: f t
confusionMatrix(table(test_pred,test$q03_symptoms))
## Confusion Matrix and Statistics
##
##
## test_pred f t
## f 4 1
## t 0 0
##
## Accuracy : 0.8
## 95% CI : (0.2836, 0.9949)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.7373
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 1.0
## Specificity : 0.0
## Pos Pred Value : 0.8
## Neg Pred Value : NaN
## Prevalence : 0.8
## Detection Rate : 0.8
## Detection Prevalence : 1.0
## Balanced Accuracy : 0.5
##
## 'Positive' Class : f
##
set.seed(1)
tune.out<-tune(svm,q03_symptoms~.,data=covid, kernel="linear",
ranges=list(cost=c(.00001,.0001, .001, .01, 1,10),
gamma=c(1,2,3,4,5))) #get best model with tune
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 1
##
## - best performance: 0.15
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-05 1 0.20 0.2581989
## 2 1e-04 1 0.20 0.2581989
## 3 1e-03 1 0.20 0.2581989
## 4 1e-02 1 0.20 0.2581989
## 5 1e+00 1 0.15 0.2415229
## 6 1e+01 1 0.15 0.2415229
## 7 1e-05 2 0.20 0.2581989
## 8 1e-04 2 0.20 0.2581989
## 9 1e-03 2 0.20 0.2581989
## 10 1e-02 2 0.20 0.2581989
## 11 1e+00 2 0.15 0.2415229
## 12 1e+01 2 0.15 0.2415229
## 13 1e-05 3 0.20 0.2581989
## 14 1e-04 3 0.20 0.2581989
## 15 1e-03 3 0.20 0.2581989
## 16 1e-02 3 0.20 0.2581989
## 17 1e+00 3 0.15 0.2415229
## 18 1e+01 3 0.15 0.2415229
## 19 1e-05 4 0.20 0.2581989
## 20 1e-04 4 0.20 0.2581989
## 21 1e-03 4 0.20 0.2581989
## 22 1e-02 4 0.20 0.2581989
## 23 1e+00 4 0.15 0.2415229
## 24 1e+01 4 0.15 0.2415229
## 25 1e-05 5 0.20 0.2581989
## 26 1e-04 5 0.20 0.2581989
## 27 1e-03 5 0.20 0.2581989
## 28 1e-02 5 0.20 0.2581989
## 29 1e+00 5 0.15 0.2415229
## 30 1e+01 5 0.15 0.2415229
bestmod<-tune.out$best.model
summary(bestmod)
##
## Call:
## best.tune(method = svm, train.x = q03_symptoms ~ ., data = covid,
## ranges = list(cost = c(1e-05, 1e-04, 0.001, 0.01, 1, 10), gamma = c(1,
## 2, 3, 4, 5)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 12
##
## ( 8 4 )
##
##
## Number of Classes: 2
##
## Levels:
## f t
grid <- expand.grid(C = c(1,10,100, 1000, 10000))
svm_Linear_Grid <- caret::train(q03_symptoms ~., data = training, method = "svmLinear", trControl=trctrl, preProcess = c("center", "scale"),tuneGrid = grid, tuneLength = 10, cost=bestmod$cost,gamma=bestmod$gamma)
svm_Linear_Grid
## Support Vector Machines with Linear Kernel
##
## 15 samples
## 10 predictors
## 2 classes: 'f', 't'
##
## Pre-processing: centered (26), scaled (26)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 14, 14, 14, 13, 13, 13, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 1 0.9166667 0.5
## 10 0.9166667 0.5
## 100 0.9166667 0.5
## 1000 0.9166667 0.5
## 10000 0.9166667 0.5
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
plot(svm_Linear_Grid)
Code used in analysis
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
#knitr::opts_chunk$set(echo = TRUE)
require(knitr)
library(ggplot2)
library(tidyr)
library(MASS)
library(psych)
library(kableExtra)
library(dplyr)
library(faraway)
library(gridExtra)
library(reshape2)
library(leaps)
library(pROC)
library(caret)
library(naniar)
library(pander)
library(pROC)
library(mlbench)
library(e1071)
library(fpp2)
library(mlr)
covid<- read.csv("covid-19-survey-responses-sample.csv", header = TRUE)
covid<-subset(covid,select=(names(covid[12:ncol(covid)-2])))
intrain<- createDataPartition(y = covid$q03_symptoms, p=.7, list=FALSE)
training <- covid[intrain,]
test<- covid[-intrain, ]
dim(training)
dim(test)
anyNA(covid)
glimpse(covid)
trctrl<- trainControl(method="repeatedcv", number=10,repeats=3)
svm_lin<- caret::train(q03_symptoms~., data=training, method="svmLinear",
trControl=trctrl, preProcess=c("center","scale"),
tuneLength =10)
svm_lin
test_pred<- predict(svm_lin, newdata=test)
test_pred
confusionMatrix(table(test_pred,test$q03_symptoms))
set.seed(1)
tune.out<-tune(svm,q03_symptoms~.,data=covid, kernel="linear",
ranges=list(cost=c(.00001,.0001, .001, .01, 1,10),
gamma=c(1,2,3,4,5))) #get best model with tune
summary(tune.out)
bestmod<-tune.out$best.model
summary(bestmod)
grid <- expand.grid(C = c(1,10,100, 1000, 10000))
svm_Linear_Grid <- caret::train(q03_symptoms ~., data = training, method = "svmLinear", trControl=trctrl, preProcess = c("center", "scale"),tuneGrid = grid, tuneLength = 10, cost=bestmod$cost,gamma=bestmod$gamma)
svm_Linear_Grid
plot(svm_Linear_Grid)