This project takes sample data from a survey conducted by Statistics Canada. This data is then further broken down into a smaller sample correlated to the issue being addressed. This analysis looks to create models that can predict the associated outcomes listed in the survey.
“Why did you more frequently engage in the following cyber security measure or measures during the COVID-19 pandemic?
CS_Q30A: “Changed passwords or passphrases”
CS_Q30B: “Created strong passwords or passphrases”
CS_Q30C: “Updated software or operating systems”
List of libraries used in this data analysis
Creating the sample from the appropriate rows:
setwd("C:/Users/Reed/Desktop/R Project")
df <- read.csv("rawsurveydata.csv", header = T)
df <- df[, -1]
sample <- df[ ,c(46:60,138:146)]
Define functions used to replace missing values
colRecode269 <- function(startCol, endCol) { #Replace values in sample
for (i in startCol:endCol) {
sample[, i] <<- ifelse(sample[, i] == 2, 0, sample[, i])
sample[, i] <<- ifelse(sample[, i] == 6, NA, sample[, i])
sample[, i] <<- ifelse(sample[, i] == 9, NA, sample[, i])
}
}
colFreqTable <- function(startCol, endCol) { #Frequency of values in sample
for (i in startCol:endCol) {
cat("-----------------\n")
cat(colnames(sample[i]),"\n")
print(table(sample[, i]))
cat("\n-----------------")
}
}
recode269Tab <- function(stColName, endColName) { #Recreate sample with new values
x <- which(colnames(sample) == stColName)
y <- which(colnames(sample) == endColName)
colRecode269(x, y)
colFreqTable(x, y)
sample <<- sample
}
Begin by giving all variables a more distinguishable header:
#CS_30A
sample <- rename(sample, chdPsdAwarenessA = CS_30AA )
sample <- rename(sample, chdPsdDrnWkA = CS_30AB )
sample <- rename(sample, chdPsdAdvFrnFamA = CS_30AC )
sample <- rename(sample, chdPsdIncdA = CS_30AD )
sample <- rename(sample, chdPsdOtherA = CS_30AE )
#CS_30B
sample <- rename(sample, chdPsdAwarenessB = CS_30BA )
sample <- rename(sample, chdPsdDrnWkB = CS_30BB )
sample <- rename(sample, chdPsdAdvFrnFamB = CS_30BC )
sample <- rename(sample, chdPsdIncdB = CS_30BD )
sample <- rename(sample, chdPsdOtherB = CS_30BE )
#CS_30C
sample <- rename(sample, chdPsdAwarenessC = CS_30CA )
sample <- rename(sample, chdPsdDrnWkC = CS_30CB )
sample <- rename(sample, chdPsdAdvFrnFamC = CS_30CC )
sample <- rename(sample, chdPsdIncdC = CS_30CD )
sample <- rename(sample, chdPsdOtherC = CS_30CE )
recode269Tab('chdPsdAwarenessA', 'chdPsdOtherC') #record all survey data columns
## -----------------
## chdPsdAwarenessA
##
## 0 1
## 125 294
##
## ----------------------------------
## chdPsdDrnWkA
##
## 0 1
## 357 62
##
## ----------------------------------
## chdPsdAdvFrnFamA
##
## 0 1
## 364 55
##
## ----------------------------------
## chdPsdIncdA
##
## 0 1
## 338 81
##
## ----------------------------------
## chdPsdOtherA
##
## 0 1
## 382 37
##
## ----------------------------------
## chdPsdAwarenessB
##
## 0 1
## 107 421
##
## ----------------------------------
## chdPsdDrnWkB
##
## 0 1
## 457 71
##
## ----------------------------------
## chdPsdAdvFrnFamB
##
## 0 1
## 454 74
##
## ----------------------------------
## chdPsdIncdB
##
## 0 1
## 451 77
##
## ----------------------------------
## chdPsdOtherB
##
## 0 1
## 489 39
##
## ----------------------------------
## chdPsdAwarenessC
##
## 0 1
## 134 211
##
## ----------------------------------
## chdPsdDrnWkC
##
## 0 1
## 274 71
##
## ----------------------------------
## chdPsdAdvFrnFamC
##
## 0 1
## 297 48
##
## ----------------------------------
## chdPsdIncdC
##
## 0 1
## 311 34
##
## ----------------------------------
## chdPsdOtherC
##
## 0 1
## 281 64
##
## -----------------
#Rename demographic column data
sample <- rename(sample, FamilySize = HHLDSIZC )
sample <- rename(sample, AgeGroup = AGEGRP )
sample <- rename(sample, isCanadaBorn = IMMIGRNC )
sample <- rename(sample, isMale = SEX )
sample <- rename(sample, Under18 = PCHILD )
sample <- rename(sample, MaritalStatus = MARSTATC )
sample <- rename(sample, dwellingType = PDWELCDC )
sample <- rename(sample, highestEd = PEDUC_LC )
sample <- rename(sample, isRural = RURURB )
#Recode demographic column data
sample$isCanadaBorn <- ifelse(sample$isCanadaBorn == 2, 0, 1)
sample$isMale <- ifelse(sample$isMale == 2, 0, 1)
sample$isRural <- ifelse(sample$isRural == 2, 0, 1)
After the sample is created, a correlation matrix is used to determine which variables can be combined:
cormatSample <- cor(sample)
melted_cormatSample <- melt(cormatSample)
ggplot (data = melted_cormatSample, aes (x=Var1, y=Var2, fill=value)) + geom_tile()
New variables are created as a result of this correlation:
#Change due to Password Awareness
sample <- mutate(sample, Awareness = chdPsdAwarenessA + chdPsdAwarenessB + chdPsdAwarenessC)
sample$Awareness <- ifelse(sample$Awareness > 1, 1, 0) #create binary variable
#Change due to Work Order
sample <-mutate(sample, Work = chdPsdDrnWkA + chdPsdDrnWkB + chdPsdDrnWkC)
sample$Work <- ifelse(sample$Work > 1, 1, 0) #create binary variable
#Change due to Advice from Others
sample <- mutate(sample, Advice = chdPsdAdvFrnFamA + chdPsdAdvFrnFamB + chdPsdAdvFrnFamC)
sample$Advice <- ifelse(sample$Advice > 1, 1, 0) #create binary variable
#Change due to Cyber Security Incident
sample <- mutate (sample, Incident = chdPsdIncdA + chdPsdIncdB + chdPsdIncdC)
sample$Incident <- ifelse(sample$Incident > 1, 1, 0) #create binary variable
#Change due to Other Reasons
sample <- mutate (sample, Other = chdPsdOtherA + chdPsdOtherB + chdPsdOtherC)
sample$Other <- ifelse(sample$Other > 1, 1, 0) #create binary variable
Newly Created Variables:
Creation of New Demographic Variables:
sample$isAlone <- ifelse(sample$MaritalStatus %in% c(3, 4), 1, 0) #create a binary variable to determine if a user is alone or is married
sample$psEducation <- ifelse(sample$highestEd %in% c(1,2), 0, 1) #create a binary variable to determine if a user has post-secondary education
sample$House <-ifelse(sample$dwellingType %in% c(2,3,4), 0, 1) #create a variable to determine if the user lives in a house
sample$over45 <-ifelse(sample$AgeGroup %in% c(4,5,6,7), 1, 0) #create a variable to determine if a user is 45 or older
New Demographic Variables:
Remove NA values from sample:
noOmissions <- sample[rowSums(is.na(sample))==0, ]
Final correlation analysis to determine appropraite variables for analysis:
finalSample <- noOmissions[ ,c(25:29,16:20,33,32,24,30:31)]
cormatF <- cor(finalSample)
melted_cormatF <- melt(cormatF)
ggplot (data = melted_cormatF, aes (x=Var1, y=Var2, fill=value)) + geom_tile()
summary(finalSample)
## Awareness Work Advice Incident
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.8145 Mean :0.1532 Mean :0.1532 Mean :0.2177
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Other FamilySize AgeGroup isCanadaBorn
## Min. :0.00000 Min. :1.000 Min. :1.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:1.0000
## Median :0.00000 Median :2.000 Median :5.000 Median :1.0000
## Mean :0.08065 Mean :1.847 Mean :4.427 Mean :0.7903
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:6.000 3rd Qu.:1.0000
## Max. :1.00000 Max. :4.000 Max. :7.000 Max. :1.0000
## isMale Under18 over45 House
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :0.0000 Median :1.0000 Median :1.000
## Mean :0.5565 Mean :0.2258 Mean :0.7097 Mean :0.629
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## isRural isAlone psEducation
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1532 Mean :0.4194 Mean :0.7823
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
Create contingency tables for each survey variable. This will determine which demographic variables are appropriate for the initial analysis.
## FamilySize
## Awareness 1 2 3 4
## 0 9 12 1 1
## 1 30 58 9 4
## AgeGroup
## Awareness 1 2 3 4 5 6 7
## 0 2 4 2 5 4 5 1
## 1 6 8 14 18 22 21 12
## isCanadaBorn
## Awareness 0 1
## 0 4 19
## 1 22 79
## isMale
## Awareness 0 1
## 0 9 14
## 1 46 55
## Under18
## Awareness 0 1
## 0 17 6
## 1 79 22
## over45
## Awareness 0 1
## 0 8 15
## 1 28 73
## House
## Awareness 0 1
## 0 8 15
## 1 38 63
## isRural
## Awareness 0 1
## 0 21 2
## 1 84 17
## isAlone
## Awareness 0 1
## 0 14 9
## 1 58 43
## psEducation
## Awareness 0 1
## 0 6 17
## 1 21 80
## FamilySize
## Work 1 2 3 4
## 0 35 59 8 3
## 1 4 11 2 2
## AgeGroup
## Work 1 2 3 4 5 6 7
## 0 7 7 12 20 23 23 13
## 1 1 5 4 3 3 3 0
## isCanadaBorn
## Work 0 1
## 0 21 84
## 1 5 14
## isMale
## Work 0 1
## 0 45 60
## 1 10 9
## Under18
## Work 0 1
## 0 81 24
## 1 15 4
## over45
## Work 0 1
## 0 26 79
## 1 10 9
## House
## Work 0 1
## 0 39 66
## 1 7 12
## isRural
## Work 0 1
## 0 88 17
## 1 17 2
## isAlone
## Work 0 1
## 0 58 47
## 1 14 5
## psEducation
## Work 0 1
## 0 25 80
## 1 2 17
## FamilySize
## Advice 1 2 3 4
## 0 31 62 10 2
## 1 8 8 0 3
## AgeGroup
## Advice 1 2 3 4 5 6 7
## 0 6 10 16 19 22 23 9
## 1 2 2 0 4 4 3 4
## isCanadaBorn
## Advice 0 1
## 0 20 85
## 1 6 13
## isMale
## Advice 0 1
## 0 48 57
## 1 7 12
## Under18
## Advice 0 1
## 0 81 24
## 1 15 4
## over45
## Advice 0 1
## 0 32 73
## 1 4 15
## House
## Advice 0 1
## 0 36 69
## 1 10 9
## isRural
## Advice 0 1
## 0 87 18
## 1 18 1
## isAlone
## Advice 0 1
## 0 62 43
## 1 10 9
## psEducation
## Advice 0 1
## 0 23 82
## 1 4 15
## FamilySize
## Incident 1 2 3 4
## 0 30 57 7 3
## 1 9 13 3 2
## AgeGroup
## Incident 1 2 3 4 5 6 7
## 0 6 9 13 20 19 21 9
## 1 2 3 3 3 7 5 4
## isCanadaBorn
## Incident 0 1
## 0 21 76
## 1 5 22
## isMale
## Incident 0 1
## 0 50 47
## 1 5 22
## Under18
## Incident 0 1
## 0 75 22
## 1 21 6
## over45
## Incident 0 1
## 0 28 69
## 1 8 19
## House
## Incident 0 1
## 0 36 61
## 1 10 17
## isRural
## Incident 0 1
## 0 80 17
## 1 25 2
## isAlone
## Incident 0 1
## 0 55 42
## 1 17 10
## psEducation
## Incident 0 1
## 0 20 77
## 1 7 20
## FamilySize
## Other 1 2 3 4
## 0 34 66 9 5
## 1 5 4 1 0
## AgeGroup
## Other 1 2 3 4 5 6 7
## 0 8 10 15 21 23 25 12
## 1 0 2 1 2 3 1 1
## isCanadaBorn
## Other 0 1
## 0 25 89
## 1 1 9
## isMale
## Other 0 1
## 0 54 60
## 1 1 9
## Under18
## Other 0 1
## 0 88 26
## 1 8 2
## over45
## Other 0 1
## 0 33 81
## 1 3 7
## House
## Other 0 1
## 0 42 72
## 1 4 6
## isRural
## Other 0 1
## 0 95 19
## 1 10 0
## isAlone
## Other 0 1
## 0 67 47
## 1 5 5
## psEducation
## Other 0 1
## 0 25 89
## 1 2 8
Create categorical variables from each demographic variable for the purpose of logistic regression analysis:
finalSample$FamilySize <-factor(finalSample$FamilySize)
finalSample$AgeGroup <-factor(finalSample$AgeGroup)
finalSample$isCanadaBorn <-factor(finalSample$isCanadaBorn)
finalSample$isMale <-factor(finalSample$isMale)
finalSample$Under18 <-factor(finalSample$Under18)
finalSample$over45 <-factor(finalSample$over45)
finalSample$House <-factor(finalSample$House)
finalSample$isRural <-factor(finalSample$isRural)
finalSample$isAlone <-factor(finalSample$isAlone)
finalSample$psEducation <-factor(finalSample$psEducation)
Conduct Logistic Regression for All Appropriate Variables:
#Awareness (over45 removed due to it lacking valuable data)
logitAwareness <- glm(Awareness~FamilySize+AgeGroup+isCanadaBorn+isMale+Under18+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitAwareness)
##
## Call:
## glm(formula = Awareness ~ FamilySize + AgeGroup + isCanadaBorn +
## isMale + Under18 + House + isRural + isAlone + psEducation,
## family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2742 0.1967 0.5234 0.6601 1.7455
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.12995 3.13913 -1.634 0.1022
## FamilySize2 3.48974 1.55501 2.244 0.0248 *
## FamilySize3 4.82062 1.97917 2.436 0.0149 *
## FamilySize4 4.01657 1.96831 2.041 0.0413 *
## AgeGroup2 2.81995 1.98735 1.419 0.1559
## AgeGroup3 4.12071 2.04843 2.012 0.0443 *
## AgeGroup4 3.25572 2.02683 1.606 0.1082
## AgeGroup5 4.15379 1.99933 2.078 0.0377 *
## AgeGroup6 3.46574 1.99761 1.735 0.0828 .
## AgeGroup7 5.46706 2.50665 2.181 0.0292 *
## isCanadaBorn1 -0.58034 0.78052 -0.744 0.4572
## isMale1 -0.53367 0.57884 -0.922 0.3565
## Under181 -0.08536 0.79746 -0.107 0.9148
## House1 -0.64470 0.66196 -0.974 0.3301
## isRural1 0.59375 0.88915 0.668 0.5043
## isAlone1 3.32696 1.54533 2.153 0.0313 *
## psEducation1 0.68202 0.71307 0.956 0.3388
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 118.94 on 123 degrees of freedom
## Residual deviance: 101.93 on 107 degrees of freedom
## AIC: 135.93
##
## Number of Fisher Scoring iterations: 6
#Work
logitWork <- glm(Work~FamilySize+isCanadaBorn+isMale+Under18+over45+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitWork)
##
## Call:
## glm(formula = Work ~ FamilySize + isCanadaBorn + isMale + Under18 +
## over45 + House + isRural + isAlone + psEducation, family = "binomial",
## data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9268 -0.5446 -0.2979 -0.1505 2.8918
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.1658 1.9392 1.117 0.264069
## FamilySize2 -1.0593 1.0845 -0.977 0.328676
## FamilySize3 -0.5012 1.2368 -0.405 0.685293
## FamilySize4 2.3240 1.6204 1.434 0.151517
## isCanadaBorn1 -1.5125 0.7850 -1.927 0.054014 .
## isMale1 -1.0485 0.6688 -1.568 0.116944
## Under181 -2.4966 0.9044 -2.760 0.005774 **
## over451 -3.0187 0.8570 -3.522 0.000428 ***
## House1 0.3537 0.7519 0.470 0.638048
## isRural1 -0.7341 1.0157 -0.723 0.469809
## isAlone1 -2.4905 1.1851 -2.102 0.035592 *
## psEducation1 1.7388 1.0610 1.639 0.101244
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.209 on 123 degrees of freedom
## Residual deviance: 79.439 on 112 degrees of freedom
## AIC: 103.44
##
## Number of Fisher Scoring iterations: 6
#Advice
logitAdvice <- glm(Advice~isCanadaBorn+isMale+Under18+over45+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitAdvice)
##
## Call:
## glm(formula = Advice ~ isCanadaBorn + isMale + Under18 + over45 +
## House + isRural + isAlone + psEducation, family = "binomial",
## data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9485 -0.6186 -0.5235 -0.3330 2.4260
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.60034 1.17693 -1.360 0.174
## isCanadaBorn1 -0.42230 0.64373 -0.656 0.512
## isMale1 0.13090 0.55190 0.237 0.813
## Under181 -0.01261 0.75169 -0.017 0.987
## over451 0.62924 0.68236 0.922 0.356
## House1 -0.56859 0.59510 -0.955 0.339
## isRural1 -1.20110 1.09329 -1.099 0.272
## isAlone1 0.30652 0.64092 0.478 0.632
## psEducation1 -0.03192 0.63114 -0.051 0.960
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.21 on 123 degrees of freedom
## Residual deviance: 100.31 on 115 degrees of freedom
## AIC: 118.31
##
## Number of Fisher Scoring iterations: 5
#Incident (over45 was removed due to it not giving valuable data)
logitIncident <- glm(Incident~FamilySize+AgeGroup+isCanadaBorn+isMale+Under18+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitIncident)
##
## Call:
## glm(formula = Incident ~ FamilySize + AgeGroup + isCanadaBorn +
## isMale + Under18 + House + isRural + isAlone + psEducation,
## family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2871 -0.7823 -0.4055 -0.1301 2.2921
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.39183 2.64357 0.526 0.59854
## FamilySize2 -2.39371 1.49081 -1.606 0.10835
## FamilySize3 -1.38894 1.59238 -0.872 0.38308
## FamilySize4 -0.90211 1.82649 -0.494 0.62137
## AgeGroup2 -1.79218 1.74001 -1.030 0.30302
## AgeGroup3 -2.61198 1.77362 -1.473 0.14084
## AgeGroup4 -3.48715 1.87797 -1.857 0.06333 .
## AgeGroup5 -1.86637 1.69069 -1.104 0.26963
## AgeGroup6 -2.28610 1.76593 -1.295 0.19547
## AgeGroup7 -1.79644 1.87563 -0.958 0.33817
## isCanadaBorn1 0.38755 0.69551 0.557 0.57738
## isMale1 1.99756 0.67431 2.962 0.00305 **
## Under181 0.91675 0.81439 1.126 0.26030
## House1 0.08244 0.63448 0.130 0.89662
## isRural1 -0.20135 0.90039 -0.224 0.82305
## isAlone1 -2.21612 1.43931 -1.540 0.12363
## psEducation1 -0.14534 0.66938 -0.217 0.82811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.96 on 123 degrees of freedom
## Residual deviance: 109.09 on 107 degrees of freedom
## AIC: 143.09
##
## Number of Fisher Scoring iterations: 6
#Other
logitOther <- glm(Other~isCanadaBorn+isMale+Under18+over45+House+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitOther)
##
## Call:
## glm(formula = Other ~ isCanadaBorn + isMale + Under18 + over45 +
## House + isAlone + psEducation, family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7081 -0.4859 -0.2562 -0.1873 2.7688
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.854335 2.051778 -2.853 0.00433 **
## isCanadaBorn1 1.259686 1.213021 1.038 0.29905
## isMale1 2.430795 1.121817 2.167 0.03025 *
## Under181 0.821632 1.065063 0.771 0.44045
## over451 0.006457 0.842673 0.008 0.99389
## House1 0.046724 0.854901 0.055 0.95641
## isAlone1 0.696653 0.892292 0.781 0.43495
## psEducation1 0.033480 0.863458 0.039 0.96907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.525 on 123 degrees of freedom
## Residual deviance: 60.852 on 116 degrees of freedom
## AIC: 76.852
##
## Number of Fisher Scoring iterations: 6
Confidence Intervals for Coefficients:
confint(logitAwareness) #using log-likelihood
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -12.86441913 0.05811958
## FamilySize2 0.94020763 7.31850609
## FamilySize3 1.49508547 9.49956266
## FamilySize4 0.57169783 8.56824406
## AgeGroup2 -0.71034492 7.31672635
## AgeGroup3 0.45590167 8.71508903
## AgeGroup4 -0.30847935 7.84323635
## AgeGroup5 0.63646529 8.68532810
## AgeGroup6 -0.08331657 7.97748462
## AgeGroup7 1.26096185 11.80452481
## isCanadaBorn1 -2.25613284 0.86745823
## isMale1 -1.72805937 0.57082514
## Under181 -1.66253194 1.50499938
## House1 -2.01496023 0.61408801
## isRural1 -1.01091106 2.62676489
## isAlone1 0.75676187 7.11076631
## psEducation1 -0.76201455 2.08876533
confint.default(logitAwareness) #using standard error
## 2.5 % 97.5 %
## (Intercept) -11.2825235 1.0226265
## FamilySize2 0.4419802 6.5375001
## FamilySize3 0.9415269 8.6997220
## FamilySize4 0.1587581 7.8743811
## AgeGroup2 -1.0751809 6.7150748
## AgeGroup3 0.1058654 8.1355640
## AgeGroup4 -0.7168042 7.2282427
## AgeGroup5 0.2351613 8.0724103
## AgeGroup6 -0.4494971 7.3809844
## AgeGroup7 0.5541171 10.3800054
## isCanadaBorn1 -2.1101288 0.9494502
## isMale1 -1.6681736 0.6008384
## Under181 -1.6483517 1.4776288
## House1 -1.9421107 0.6527064
## isRural1 -1.1489503 2.3364566
## isAlone1 0.2981790 6.3557470
## psEducation1 -0.7155635 2.0796061
confint(logitWork)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -1.71436807 6.01207190
## FamilySize2 -3.28985581 1.06361118
## FamilySize3 -3.08484830 1.88665432
## FamilySize4 -0.87499525 5.61141712
## isCanadaBorn1 -3.10884073 0.03175847
## isMale1 -2.43266147 0.23106523
## Under181 -4.47096441 -0.86352763
## over451 -4.88525903 -1.45911346
## House1 -1.10368782 1.87538564
## isRural1 -3.02366281 1.07821552
## isAlone1 -5.00930459 -0.27715770
## psEducation1 -0.05218201 4.29079149
confint.default(logitWork)
## 2.5 % 97.5 %
## (Intercept) -1.6350315 5.96656734
## FamilySize2 -3.1848005 1.06622530
## FamilySize3 -2.9252992 1.92286893
## FamilySize4 -0.8519645 5.49994166
## isCanadaBorn1 -3.0511489 0.02609276
## isMale1 -2.3593253 0.26232708
## Under181 -4.2692514 -0.72388252
## over451 -4.6983699 -1.33906145
## House1 -1.1199223 1.82731885
## isRural1 -2.7248489 1.25658660
## isAlone1 -4.8132903 -0.16780589
## psEducation1 -0.3406986 3.81832496
confint(logitAdvice)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -4.0360860 0.6265680
## isCanadaBorn1 -1.6567506 0.9011494
## isMale1 -0.9379167 1.2529222
## Under181 -1.5668727 1.4331417
## over451 -0.6449772 2.0808381
## House1 -1.7498283 0.6061955
## isRural1 -4.1565430 0.5740896
## isAlone1 -0.9568462 1.5844499
## psEducation1 -1.2069552 1.3258326
confint.default(logitAdvice)
## 2.5 % 97.5 %
## (Intercept) -3.9070741 0.7063885
## isCanadaBorn1 -1.6839779 0.8393876
## isMale1 -0.9507900 1.2125994
## Under181 -1.4858966 1.4606851
## over451 -0.7081729 1.9666467
## House1 -1.7349686 0.5977826
## isRural1 -3.3439055 0.9417144
## isAlone1 -0.9496561 1.5627049
## psEducation1 -1.2689306 1.2050822
confint(logitIncident)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -4.0397099 6.7111400
## FamilySize2 -5.4712095 0.3512812
## FamilySize3 -4.6150450 1.7375556
## FamilySize4 -4.5822608 2.6948249
## AgeGroup2 -5.3428230 1.6732361
## AgeGroup3 -6.2340758 0.8918226
## AgeGroup4 -7.4225442 0.0747858
## AgeGroup5 -5.3055395 1.5555054
## AgeGroup6 -5.8793791 1.2391508
## AgeGroup7 -5.6090631 1.9533143
## isCanadaBorn1 -0.9242412 1.8494310
## isMale1 0.7904360 3.4828484
## Under181 -0.6840152 2.5540336
## House1 -1.1508529 1.3667482
## isRural1 -2.2445418 1.4481393
## isAlone1 -5.1712795 0.4871479
## psEducation1 -1.4414917 1.2183723
confint.default(logitIncident)
## 2.5 % 97.5 %
## (Intercept) -3.7894641 6.5731267
## FamilySize2 -5.3156458 0.5282308
## FamilySize3 -4.5099523 1.7320682
## FamilySize4 -4.4819727 2.6777522
## AgeGroup2 -5.2025419 1.6181758
## AgeGroup3 -6.0882179 0.8642523
## AgeGroup4 -7.1679057 0.1936038
## AgeGroup5 -5.1800544 1.4473126
## AgeGroup6 -5.7472532 1.1750526
## AgeGroup7 -5.4725974 1.8797217
## isCanadaBorn1 -0.9756167 1.7507129
## isMale1 0.6759341 3.3191907
## Under181 -0.6794362 2.5129280
## House1 -1.1611206 1.3259927
## isRural1 -1.9660687 1.5633763
## isAlone1 -5.0371126 0.6048661
## psEducation1 -1.4572910 1.1666125
confint(logitOther)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -10.4549577 -2.254641
## isCanadaBorn1 -0.7589535 4.374330
## isMale1 0.6004028 5.422394
## Under181 -1.4406137 2.904664
## over451 -1.5932798 1.795187
## House1 -1.6259341 1.786500
## isAlone1 -1.0722319 2.495871
## psEducation1 -1.5272683 2.022578
confint.default(logitOther)
## 2.5 % 97.5 %
## (Intercept) -9.8757455 -1.832924
## isCanadaBorn1 -1.1177912 3.637164
## isMale1 0.2320743 4.629515
## Under181 -1.2658537 2.909118
## over451 -1.6451522 1.658067
## House1 -1.6288515 1.722299
## isAlone1 -1.0522071 2.445512
## psEducation1 -1.6588675 1.725827
To further determine which demographic variables are best suited to predict each survey variables, Chi-Squared Analysis is conducted (with p = 0.1):
#FamilySize
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 2:4) #statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 6.5, df = 3, P(> X2) = 0.091
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 2:4) #statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 6.7, df = 3, P(> X2) = 0.083
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 2:4) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 4.1, df = 3, P(> X2) = 0.25
#AgeGroup:
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 5:10) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 7.8, df = 6, P(> X2) = 0.25
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 5:10) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 5.0, df = 6, P(> X2) = 0.55
#isCanadaBorn
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 11) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.55, df = 1, P(> X2) = 0.46
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 5) #statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 3.7, df = 1, P(> X2) = 0.054
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 2) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.43, df = 1, P(> X2) = 0.51
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 11) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.31, df = 1, P(> X2) = 0.58
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 2) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 1.1, df = 1, P(> X2) = 0.3
#isMale
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 12) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.85, df = 1, P(> X2) = 0.36
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 6) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 2.5, df = 1, P(> X2) = 0.12
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 3) #not statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.056, df = 1, P(> X2) = 0.81
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 12) #statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 8.8, df = 1, P(> X2) = 0.0031
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 3) #statistically significant
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 4.7, df = 1, P(> X2) = 0.03
#Under18
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 13) #NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.011, df = 1, P(> X2) = 0.91
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 7) #SS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 7.6, df = 1, P(> X2) = 0.0058
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 4)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.00028, df = 1, P(> X2) = 0.99
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 13)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 1.3, df = 1, P(> X2) = 0.26
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 4)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.6, df = 1, P(> X2) = 0.44
#Over45
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 8)#SS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 12.4, df = 1, P(> X2) = 0.00043
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 5)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.85, df = 1, P(> X2) = 0.36
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 5)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 5.9e-05, df = 1, P(> X2) = 0.99
#House
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 14)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.95, df = 1, P(> X2) = 0.33
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 9)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.22, df = 1, P(> X2) = 0.64
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 6)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.91, df = 1, P(> X2) = 0.34
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 14)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.017, df = 1, P(> X2) = 0.9
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 6)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.003, df = 1, P(> X2) = 0.96
#isRural
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 15)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.45, df = 1, P(> X2) = 0.5
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 9)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.22, df = 1, P(> X2) = 0.64
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 7)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 1.2, df = 1, P(> X2) = 0.27
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 15)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.05, df = 1, P(> X2) = 0.82
#isAlone
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 16)#SS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 4.6, df = 1, P(> X2) = 0.031
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 10)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.52, df = 1, P(> X2) = 0.47
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 8)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.23, df = 1, P(> X2) = 0.63
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 16)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 2.4, df = 1, P(> X2) = 0.12
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 7)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.61, df = 1, P(> X2) = 0.43
#psEducation
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 17)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.91, df = 1, P(> X2) = 0.34
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 11)#SS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 4.4, df = 1, P(> X2) = 0.036
wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 9)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.0026, df = 1, P(> X2) = 0.96
wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 17)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.047, df = 1, P(> X2) = 0.83
wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 8)#NSS
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 0.0015, df = 1, P(> X2) = 0.97
Conduct logistic regression only using the demographic variables that were determined to be statistically significant
logitAwareness <- glm(Awareness~FamilySize+isAlone, data = finalSample, family = "binomial")
summary(logitAwareness)
##
## Call:
## glm(formula = Awareness ~ FamilySize + isAlone, family = "binomial",
## data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4751 0.3770 0.6503 0.6681 1.1283
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1166 0.9113 0.128 0.898
## FamilySize2 1.3295 0.9115 1.459 0.145
## FamilySize3 1.7366 1.3037 1.332 0.183
## FamilySize4 1.2697 1.4424 0.880 0.379
## isAlone1 1.1620 0.8921 1.303 0.193
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 118.94 on 123 degrees of freedom
## Residual deviance: 115.80 on 119 degrees of freedom
## AIC: 125.8
##
## Number of Fisher Scoring iterations: 5
logitWork <- glm(Work~FamilySize+isCanadaBorn+Under18+over45+psEducation, data = finalSample, family = "binomial")
summary(logitWork)
##
## Call:
## glm(formula = Work ~ FamilySize + isCanadaBorn + Under18 + over45 +
## psEducation, family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7069 -0.4942 -0.3633 -0.1964 2.4739
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5963 1.2159 -1.313 0.18921
## FamilySize2 0.8905 0.7066 1.260 0.20760
## FamilySize3 0.9710 1.1087 0.876 0.38115
## FamilySize4 4.0449 1.3806 2.930 0.00339 **
## isCanadaBorn1 -1.0889 0.6851 -1.589 0.11197
## Under181 -1.4571 0.7348 -1.983 0.04738 *
## over451 -2.1436 0.6538 -3.279 0.00104 **
## psEducation1 1.8169 0.9481 1.916 0.05533 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.209 on 123 degrees of freedom
## Residual deviance: 86.932 on 116 degrees of freedom
## AIC: 102.93
##
## Number of Fisher Scoring iterations: 5
logitAdvice <- glm(Advice~House+isRural, data = finalSample, family = "binomial")
summary(logitAdvice)
##
## Call:
## glm(formula = Advice ~ House + isRural, family = "binomial",
## data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7233 -0.7233 -0.5327 -0.3014 2.4958
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2075 0.3617 -3.338 0.000843 ***
## House1 -0.6734 0.5081 -1.325 0.185025
## isRural1 -1.1882 1.0666 -1.114 0.265269
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.21 on 123 degrees of freedom
## Residual deviance: 102.29 on 121 degrees of freedom
## AIC: 108.29
##
## Number of Fisher Scoring iterations: 5
logitIncident <- glm(Incident~isMale, data = finalSample, family = "binomial")
summary(logitIncident)
##
## Call:
## glm(formula = Incident ~ isMale, family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8763 -0.8763 -0.4366 -0.4366 2.1899
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3026 0.4689 -4.910 9.1e-07 ***
## isMale1 1.5435 0.5354 2.883 0.00394 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.96 on 123 degrees of freedom
## Residual deviance: 119.90 on 122 degrees of freedom
## AIC: 123.9
##
## Number of Fisher Scoring iterations: 4
logitOther <- glm(Other~isMale, data = finalSample, family = "binomial")
summary(logitOther)
##
## Call:
## glm(formula = Other ~ isMale, family = "binomial", data = finalSample)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5287 -0.5287 -0.1916 -0.1916 2.8310
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.989 1.009 -3.953 7.72e-05 ***
## isMale1 2.092 1.071 1.954 0.0507 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.525 on 123 degrees of freedom
## Residual deviance: 63.432 on 122 degrees of freedom
## AIC: 67.432
##
## Number of Fisher Scoring iterations: 6
Coefficients as Odds Ratio:
exp(coef(logitAwareness))
## (Intercept) FamilySize2 FamilySize3 FamilySize4 isAlone1
## 1.123664 3.779311 5.677789 3.559783 3.196310
exp(coef(logitWork))
## (Intercept) FamilySize2 FamilySize3 FamilySize4 isCanadaBorn1
## 0.2026349 2.4362293 2.6405416 57.1048659 0.3365781
## Under181 over451 psEducation1
## 0.2328992 0.1172336 6.1526488
exp(coef(logitAdvice))
## (Intercept) House1 isRural1
## 0.2989422 0.5099752 0.3047680
exp(coef(logitIncident))
## (Intercept) isMale1
## 0.100000 4.680851
exp(coef(logitOther))
## (Intercept) isMale1
## 0.01851852 8.09999977
Accuracy of Each Model:
## [1] "Awareness: "
## [1] 0.8145161
## [1] "Work: "
## [1] 0.1532258
## [1] "Advice: "
## [1] 0.1532258
## [1] "Incident: "
## [1] 0.2177419
## [1] "Other: "
## [1] 0.08064516
Based on the accuracy scores listed above, it can be determined that this method of model building created an accurate model for predicting a user’s Awareness of Cyber Threats, but was unable to created accurate models for the other categories. Further analysis would need to be completed in order to determine better models for these variables.