Project Overview:

This project takes sample data from a survey conducted by Statistics Canada. This data is then further broken down into a smaller sample correlated to the issue being addressed. This analysis looks to create models that can predict the associated outcomes listed in the survey.

Project Specifics

Question:

“Why did you more frequently engage in the following cyber security measure or measures during the COVID-19 pandemic?

CS_Q30A: “Changed passwords or passphrases”
CS_Q30B: “Created strong passwords or passphrases”
CS_Q30C: “Updated software or operating systems”

Responses:

Category 1: Greater Awareness of Security Risk (Yes/No)
Category 2: Direction from Work (Yes/No)
Category 3: Advice from friends or family (Yes/No)
Category 4: Result of Cyber Security Incident (Yes/No)
Category 5: Other (Yes/No)

Demographic Variables:

House Hold Size
Age Group
Education Level
Type of Dwelling
Marital Status
Immigration Status
Location

Data Preprocessing:

Set-Up:

List of libraries used in this data analysis

library(dplyr)
library (aod)
library (ggplot2)
library (gplots)
library(reshape2)
library(caret)

Preprocessing Structure:

Creating the sample from the appropriate rows:

setwd("C:/Users/Reed/Desktop/R Project")
df <- read.csv("rawsurveydata.csv", header = T) 
df <- df[, -1] 
sample <- df[ ,c(46:60,138:146)]

Define functions used to replace missing values

colRecode269 <- function(startCol, endCol) { #Replace values in sample
  for (i in startCol:endCol) {
    sample[, i] <<- ifelse(sample[, i] == 2, 0, sample[, i])
    sample[, i] <<- ifelse(sample[, i] == 6, NA, sample[, i])
    sample[, i] <<- ifelse(sample[, i] == 9, NA, sample[, i])
  }
}

colFreqTable <- function(startCol, endCol) { #Frequency of values in sample
  for (i in startCol:endCol) {
    cat("-----------------\n")
    cat(colnames(sample[i]),"\n")
    print(table(sample[, i]))
    cat("\n-----------------")
  }
}

recode269Tab <- function(stColName, endColName) { #Recreate sample with new values
  x <- which(colnames(sample) == stColName)
  y <- which(colnames(sample) == endColName)
  colRecode269(x, y)
  colFreqTable(x, y)
  sample <<- sample
}

Begin by giving all variables a more distinguishable header:

#CS_30A
sample <- rename(sample, chdPsdAwarenessA   = CS_30AA )
sample <- rename(sample, chdPsdDrnWkA   = CS_30AB )
sample <- rename(sample, chdPsdAdvFrnFamA  = CS_30AC )
sample <- rename(sample, chdPsdIncdA  = CS_30AD )
sample <- rename(sample, chdPsdOtherA  = CS_30AE )

#CS_30B
sample <- rename(sample, chdPsdAwarenessB   = CS_30BA )
sample <- rename(sample, chdPsdDrnWkB   = CS_30BB )
sample <- rename(sample, chdPsdAdvFrnFamB  = CS_30BC )
sample <- rename(sample, chdPsdIncdB  = CS_30BD )
sample <- rename(sample, chdPsdOtherB  = CS_30BE )

#CS_30C
sample <- rename(sample, chdPsdAwarenessC   = CS_30CA )
sample <- rename(sample, chdPsdDrnWkC   = CS_30CB )
sample <- rename(sample, chdPsdAdvFrnFamC  = CS_30CC )
sample <- rename(sample, chdPsdIncdC  = CS_30CD )
sample <- rename(sample, chdPsdOtherC  = CS_30CE )

recode269Tab('chdPsdAwarenessA', 'chdPsdOtherC') #record all survey data columns

## -----------------
## chdPsdAwarenessA 
## 
##   0   1 
## 125 294 
## 
## ----------------------------------
## chdPsdDrnWkA 
## 
##   0   1 
## 357  62 
## 
## ----------------------------------
## chdPsdAdvFrnFamA 
## 
##   0   1 
## 364  55 
## 
## ----------------------------------
## chdPsdIncdA 
## 
##   0   1 
## 338  81 
## 
## ----------------------------------
## chdPsdOtherA 
## 
##   0   1 
## 382  37 
## 
## ----------------------------------
## chdPsdAwarenessB 
## 
##   0   1 
## 107 421 
## 
## ----------------------------------
## chdPsdDrnWkB 
## 
##   0   1 
## 457  71 
## 
## ----------------------------------
## chdPsdAdvFrnFamB 
## 
##   0   1 
## 454  74 
## 
## ----------------------------------
## chdPsdIncdB 
## 
##   0   1 
## 451  77 
## 
## ----------------------------------
## chdPsdOtherB 
## 
##   0   1 
## 489  39 
## 
## ----------------------------------
## chdPsdAwarenessC 
## 
##   0   1 
## 134 211 
## 
## ----------------------------------
## chdPsdDrnWkC 
## 
##   0   1 
## 274  71 
## 
## ----------------------------------
## chdPsdAdvFrnFamC 
## 
##   0   1 
## 297  48 
## 
## ----------------------------------
## chdPsdIncdC 
## 
##   0   1 
## 311  34 
## 
## ----------------------------------
## chdPsdOtherC 
## 
##   0   1 
## 281  64 
## 
## -----------------

#Rename demographic column data 
sample <- rename(sample, FamilySize = HHLDSIZC )
sample <- rename(sample, AgeGroup = AGEGRP )
sample <- rename(sample, isCanadaBorn = IMMIGRNC )
sample <- rename(sample, isMale = SEX )
sample <- rename(sample, Under18 = PCHILD )
sample <- rename(sample, MaritalStatus = MARSTATC )
sample <- rename(sample, dwellingType = PDWELCDC )
sample <- rename(sample, highestEd = PEDUC_LC )
sample <- rename(sample, isRural = RURURB )

#Recode demographic column data
sample$isCanadaBorn <- ifelse(sample$isCanadaBorn == 2, 0, 1)
sample$isMale <- ifelse(sample$isMale == 2, 0, 1)
sample$isRural <- ifelse(sample$isRural == 2, 0, 1)

After the sample is created, a correlation matrix is used to determine which variables can be combined:

cormatSample <- cor(sample)
melted_cormatSample <- melt(cormatSample)
ggplot (data = melted_cormatSample, aes (x=Var1, y=Var2, fill=value)) + geom_tile()

New variables are created as a result of this correlation:

#Change due to Password Awareness
sample <- mutate(sample, Awareness = chdPsdAwarenessA + chdPsdAwarenessB + chdPsdAwarenessC)
sample$Awareness <- ifelse(sample$Awareness > 1, 1, 0) #create binary variable
#Change due to Work Order 
sample <-mutate(sample, Work = chdPsdDrnWkA + chdPsdDrnWkB + chdPsdDrnWkC)
sample$Work <- ifelse(sample$Work > 1, 1, 0) #create binary variable
#Change due to Advice from Others
sample <- mutate(sample, Advice = chdPsdAdvFrnFamA + chdPsdAdvFrnFamB + chdPsdAdvFrnFamC)
sample$Advice <- ifelse(sample$Advice > 1, 1, 0) #create binary variable
#Change due to Cyber Security Incident
sample <- mutate (sample, Incident = chdPsdIncdA + chdPsdIncdB + chdPsdIncdC)
sample$Incident <- ifelse(sample$Incident > 1, 1, 0) #create binary variable
#Change due to Other Reasons 
sample <- mutate (sample, Other = chdPsdOtherA + chdPsdOtherB + chdPsdOtherC)
sample$Other <- ifelse(sample$Other > 1, 1, 0) #create binary variable

Newly Created Variables:

Awareness (combination of Awareness columns A, B, and C)
Work (combination of Work columns A, B, and C)
Advice (combination of Advice columns A, B, and C)
Incident (combination of Incident columns A, B, and C)
Other (combination of Other columns A, B, and C)

Creation of New Demographic Variables:

sample$isAlone <- ifelse(sample$MaritalStatus %in% c(3, 4), 1, 0) #create a binary variable to determine if a user is alone or is married
sample$psEducation <- ifelse(sample$highestEd %in% c(1,2), 0, 1) #create a binary variable to determine if a user has post-secondary education
sample$House <-ifelse(sample$dwellingType %in% c(2,3,4), 0, 1) #create a variable to determine if the user lives in a house
sample$over45 <-ifelse(sample$AgeGroup %in% c(4,5,6,7), 1, 0) #create a variable to determine if a user is 45 or older

New Demographic Variables:

isAlone(Determines if a user is single or married)
psEducation (Determines if a user has post-secondary education)
House (Determines if a user lives in a house)
over45 (Determines if a user is over the age of 45)

Remove NA values from sample:

noOmissions <- sample[rowSums(is.na(sample))==0, ]

Final correlation analysis to determine appropraite variables for analysis:

finalSample <- noOmissions[ ,c(25:29,16:20,33,32,24,30:31)]
cormatF <- cor(finalSample)
melted_cormatF <- melt(cormatF)
ggplot (data = melted_cormatF, aes (x=Var1, y=Var2, fill=value)) + geom_tile()

Final Sample for Analysis:

summary(finalSample)

##    Awareness           Work            Advice          Incident     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.8145   Mean   :0.1532   Mean   :0.1532   Mean   :0.2177  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      Other           FamilySize       AgeGroup      isCanadaBorn   
##  Min.   :0.00000   Min.   :1.000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:3.000   1st Qu.:1.0000  
##  Median :0.00000   Median :2.000   Median :5.000   Median :1.0000  
##  Mean   :0.08065   Mean   :1.847   Mean   :4.427   Mean   :0.7903  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:6.000   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :4.000   Max.   :7.000   Max.   :1.0000  
##      isMale          Under18           over45           House      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :1.0000   Median :0.0000   Median :1.0000   Median :1.000  
##  Mean   :0.5565   Mean   :0.2258   Mean   :0.7097   Mean   :0.629  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##     isRural          isAlone        psEducation    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.1532   Mean   :0.4194   Mean   :0.7823  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000

Analysis

Create contingency tables for each survey variable. This will determine which demographic variables are appropriate for the initial analysis.

##          FamilySize
## Awareness  1  2  3  4
##         0  9 12  1  1
##         1 30 58  9  4

##          AgeGroup
## Awareness  1  2  3  4  5  6  7
##         0  2  4  2  5  4  5  1
##         1  6  8 14 18 22 21 12

##          isCanadaBorn
## Awareness  0  1
##         0  4 19
##         1 22 79

##          isMale
## Awareness  0  1
##         0  9 14
##         1 46 55

##          Under18
## Awareness  0  1
##         0 17  6
##         1 79 22

##          over45
## Awareness  0  1
##         0  8 15
##         1 28 73

##          House
## Awareness  0  1
##         0  8 15
##         1 38 63

##          isRural
## Awareness  0  1
##         0 21  2
##         1 84 17

##          isAlone
## Awareness  0  1
##         0 14  9
##         1 58 43

##          psEducation
## Awareness  0  1
##         0  6 17
##         1 21 80

##     FamilySize
## Work  1  2  3  4
##    0 35 59  8  3
##    1  4 11  2  2

##     AgeGroup
## Work  1  2  3  4  5  6  7
##    0  7  7 12 20 23 23 13
##    1  1  5  4  3  3  3  0

##     isCanadaBorn
## Work  0  1
##    0 21 84
##    1  5 14

##     isMale
## Work  0  1
##    0 45 60
##    1 10  9

##     Under18
## Work  0  1
##    0 81 24
##    1 15  4

##     over45
## Work  0  1
##    0 26 79
##    1 10  9

##     House
## Work  0  1
##    0 39 66
##    1  7 12

##     isRural
## Work  0  1
##    0 88 17
##    1 17  2

##     isAlone
## Work  0  1
##    0 58 47
##    1 14  5

##     psEducation
## Work  0  1
##    0 25 80
##    1  2 17

##       FamilySize
## Advice  1  2  3  4
##      0 31 62 10  2
##      1  8  8  0  3

##       AgeGroup
## Advice  1  2  3  4  5  6  7
##      0  6 10 16 19 22 23  9
##      1  2  2  0  4  4  3  4

##       isCanadaBorn
## Advice  0  1
##      0 20 85
##      1  6 13

##       isMale
## Advice  0  1
##      0 48 57
##      1  7 12

##       Under18
## Advice  0  1
##      0 81 24
##      1 15  4

##       over45
## Advice  0  1
##      0 32 73
##      1  4 15

##       House
## Advice  0  1
##      0 36 69
##      1 10  9

##       isRural
## Advice  0  1
##      0 87 18
##      1 18  1

##       isAlone
## Advice  0  1
##      0 62 43
##      1 10  9

##       psEducation
## Advice  0  1
##      0 23 82
##      1  4 15

##         FamilySize
## Incident  1  2  3  4
##        0 30 57  7  3
##        1  9 13  3  2

##         AgeGroup
## Incident  1  2  3  4  5  6  7
##        0  6  9 13 20 19 21  9
##        1  2  3  3  3  7  5  4

##         isCanadaBorn
## Incident  0  1
##        0 21 76
##        1  5 22

##         isMale
## Incident  0  1
##        0 50 47
##        1  5 22

##         Under18
## Incident  0  1
##        0 75 22
##        1 21  6

##         over45
## Incident  0  1
##        0 28 69
##        1  8 19

##         House
## Incident  0  1
##        0 36 61
##        1 10 17

##         isRural
## Incident  0  1
##        0 80 17
##        1 25  2

##         isAlone
## Incident  0  1
##        0 55 42
##        1 17 10

##         psEducation
## Incident  0  1
##        0 20 77
##        1  7 20

##      FamilySize
## Other  1  2  3  4
##     0 34 66  9  5
##     1  5  4  1  0

##      AgeGroup
## Other  1  2  3  4  5  6  7
##     0  8 10 15 21 23 25 12
##     1  0  2  1  2  3  1  1

##      isCanadaBorn
## Other  0  1
##     0 25 89
##     1  1  9

##      isMale
## Other  0  1
##     0 54 60
##     1  1  9

##      Under18
## Other  0  1
##     0 88 26
##     1  8  2

##      over45
## Other  0  1
##     0 33 81
##     1  3  7

##      House
## Other  0  1
##     0 42 72
##     1  4  6

##      isRural
## Other  0  1
##     0 95 19
##     1 10  0

##      isAlone
## Other  0  1
##     0 67 47
##     1  5  5

##      psEducation
## Other  0  1
##     0 25 89
##     1  2  8

Create categorical variables from each demographic variable for the purpose of logistic regression analysis:

finalSample$FamilySize <-factor(finalSample$FamilySize)
finalSample$AgeGroup <-factor(finalSample$AgeGroup)
finalSample$isCanadaBorn <-factor(finalSample$isCanadaBorn)
finalSample$isMale <-factor(finalSample$isMale)
finalSample$Under18 <-factor(finalSample$Under18)
finalSample$over45 <-factor(finalSample$over45)
finalSample$House <-factor(finalSample$House)
finalSample$isRural <-factor(finalSample$isRural)
finalSample$isAlone <-factor(finalSample$isAlone)
finalSample$psEducation <-factor(finalSample$psEducation)

Logistic Regression (Part 1):

Conduct Logistic Regression for All Appropriate Variables:

#Awareness (over45 removed due to it lacking valuable data)
logitAwareness <- glm(Awareness~FamilySize+AgeGroup+isCanadaBorn+isMale+Under18+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitAwareness)

## 
## Call:
## glm(formula = Awareness ~ FamilySize + AgeGroup + isCanadaBorn + 
##     isMale + Under18 + House + isRural + isAlone + psEducation, 
##     family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2742   0.1967   0.5234   0.6601   1.7455  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   -5.12995    3.13913  -1.634   0.1022  
## FamilySize2    3.48974    1.55501   2.244   0.0248 *
## FamilySize3    4.82062    1.97917   2.436   0.0149 *
## FamilySize4    4.01657    1.96831   2.041   0.0413 *
## AgeGroup2      2.81995    1.98735   1.419   0.1559  
## AgeGroup3      4.12071    2.04843   2.012   0.0443 *
## AgeGroup4      3.25572    2.02683   1.606   0.1082  
## AgeGroup5      4.15379    1.99933   2.078   0.0377 *
## AgeGroup6      3.46574    1.99761   1.735   0.0828 .
## AgeGroup7      5.46706    2.50665   2.181   0.0292 *
## isCanadaBorn1 -0.58034    0.78052  -0.744   0.4572  
## isMale1       -0.53367    0.57884  -0.922   0.3565  
## Under181      -0.08536    0.79746  -0.107   0.9148  
## House1        -0.64470    0.66196  -0.974   0.3301  
## isRural1       0.59375    0.88915   0.668   0.5043  
## isAlone1       3.32696    1.54533   2.153   0.0313 *
## psEducation1   0.68202    0.71307   0.956   0.3388  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 118.94  on 123  degrees of freedom
## Residual deviance: 101.93  on 107  degrees of freedom
## AIC: 135.93
## 
## Number of Fisher Scoring iterations: 6

#Work
logitWork <- glm(Work~FamilySize+isCanadaBorn+isMale+Under18+over45+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitWork)

## 
## Call:
## glm(formula = Work ~ FamilySize + isCanadaBorn + isMale + Under18 + 
##     over45 + House + isRural + isAlone + psEducation, family = "binomial", 
##     data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9268  -0.5446  -0.2979  -0.1505   2.8918  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.1658     1.9392   1.117 0.264069    
## FamilySize2    -1.0593     1.0845  -0.977 0.328676    
## FamilySize3    -0.5012     1.2368  -0.405 0.685293    
## FamilySize4     2.3240     1.6204   1.434 0.151517    
## isCanadaBorn1  -1.5125     0.7850  -1.927 0.054014 .  
## isMale1        -1.0485     0.6688  -1.568 0.116944    
## Under181       -2.4966     0.9044  -2.760 0.005774 ** 
## over451        -3.0187     0.8570  -3.522 0.000428 ***
## House1          0.3537     0.7519   0.470 0.638048    
## isRural1       -0.7341     1.0157  -0.723 0.469809    
## isAlone1       -2.4905     1.1851  -2.102 0.035592 *  
## psEducation1    1.7388     1.0610   1.639 0.101244    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.209  on 123  degrees of freedom
## Residual deviance:  79.439  on 112  degrees of freedom
## AIC: 103.44
## 
## Number of Fisher Scoring iterations: 6

#Advice
logitAdvice <- glm(Advice~isCanadaBorn+isMale+Under18+over45+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitAdvice)

## 
## Call:
## glm(formula = Advice ~ isCanadaBorn + isMale + Under18 + over45 + 
##     House + isRural + isAlone + psEducation, family = "binomial", 
##     data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9485  -0.6186  -0.5235  -0.3330   2.4260  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -1.60034    1.17693  -1.360    0.174
## isCanadaBorn1 -0.42230    0.64373  -0.656    0.512
## isMale1        0.13090    0.55190   0.237    0.813
## Under181      -0.01261    0.75169  -0.017    0.987
## over451        0.62924    0.68236   0.922    0.356
## House1        -0.56859    0.59510  -0.955    0.339
## isRural1      -1.20110    1.09329  -1.099    0.272
## isAlone1       0.30652    0.64092   0.478    0.632
## psEducation1  -0.03192    0.63114  -0.051    0.960
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.21  on 123  degrees of freedom
## Residual deviance: 100.31  on 115  degrees of freedom
## AIC: 118.31
## 
## Number of Fisher Scoring iterations: 5

#Incident (over45 was removed due to it not giving valuable data)
logitIncident <- glm(Incident~FamilySize+AgeGroup+isCanadaBorn+isMale+Under18+House+isRural+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitIncident)

## 
## Call:
## glm(formula = Incident ~ FamilySize + AgeGroup + isCanadaBorn + 
##     isMale + Under18 + House + isRural + isAlone + psEducation, 
##     family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2871  -0.7823  -0.4055  -0.1301   2.2921  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    1.39183    2.64357   0.526  0.59854   
## FamilySize2   -2.39371    1.49081  -1.606  0.10835   
## FamilySize3   -1.38894    1.59238  -0.872  0.38308   
## FamilySize4   -0.90211    1.82649  -0.494  0.62137   
## AgeGroup2     -1.79218    1.74001  -1.030  0.30302   
## AgeGroup3     -2.61198    1.77362  -1.473  0.14084   
## AgeGroup4     -3.48715    1.87797  -1.857  0.06333 . 
## AgeGroup5     -1.86637    1.69069  -1.104  0.26963   
## AgeGroup6     -2.28610    1.76593  -1.295  0.19547   
## AgeGroup7     -1.79644    1.87563  -0.958  0.33817   
## isCanadaBorn1  0.38755    0.69551   0.557  0.57738   
## isMale1        1.99756    0.67431   2.962  0.00305 **
## Under181       0.91675    0.81439   1.126  0.26030   
## House1         0.08244    0.63448   0.130  0.89662   
## isRural1      -0.20135    0.90039  -0.224  0.82305   
## isAlone1      -2.21612    1.43931  -1.540  0.12363   
## psEducation1  -0.14534    0.66938  -0.217  0.82811   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.96  on 123  degrees of freedom
## Residual deviance: 109.09  on 107  degrees of freedom
## AIC: 143.09
## 
## Number of Fisher Scoring iterations: 6

#Other
logitOther <- glm(Other~isCanadaBorn+isMale+Under18+over45+House+isAlone+psEducation, data = finalSample, family = "binomial")
summary(logitOther)

## 
## Call:
## glm(formula = Other ~ isCanadaBorn + isMale + Under18 + over45 + 
##     House + isAlone + psEducation, family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7081  -0.4859  -0.2562  -0.1873   2.7688  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -5.854335   2.051778  -2.853  0.00433 **
## isCanadaBorn1  1.259686   1.213021   1.038  0.29905   
## isMale1        2.430795   1.121817   2.167  0.03025 * 
## Under181       0.821632   1.065063   0.771  0.44045   
## over451        0.006457   0.842673   0.008  0.99389   
## House1         0.046724   0.854901   0.055  0.95641   
## isAlone1       0.696653   0.892292   0.781  0.43495   
## psEducation1   0.033480   0.863458   0.039  0.96907   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 69.525  on 123  degrees of freedom
## Residual deviance: 60.852  on 116  degrees of freedom
## AIC: 76.852
## 
## Number of Fisher Scoring iterations: 6

Confidence Intervals:

Confidence Intervals for Coefficients:

confint(logitAwareness) #using log-likelihood

## Waiting for profiling to be done...

##                      2.5 %      97.5 %
## (Intercept)   -12.86441913  0.05811958
## FamilySize2     0.94020763  7.31850609
## FamilySize3     1.49508547  9.49956266
## FamilySize4     0.57169783  8.56824406
## AgeGroup2      -0.71034492  7.31672635
## AgeGroup3       0.45590167  8.71508903
## AgeGroup4      -0.30847935  7.84323635
## AgeGroup5       0.63646529  8.68532810
## AgeGroup6      -0.08331657  7.97748462
## AgeGroup7       1.26096185 11.80452481
## isCanadaBorn1  -2.25613284  0.86745823
## isMale1        -1.72805937  0.57082514
## Under181       -1.66253194  1.50499938
## House1         -2.01496023  0.61408801
## isRural1       -1.01091106  2.62676489
## isAlone1        0.75676187  7.11076631
## psEducation1   -0.76201455  2.08876533

confint.default(logitAwareness) #using standard error

##                     2.5 %     97.5 %
## (Intercept)   -11.2825235  1.0226265
## FamilySize2     0.4419802  6.5375001
## FamilySize3     0.9415269  8.6997220
## FamilySize4     0.1587581  7.8743811
## AgeGroup2      -1.0751809  6.7150748
## AgeGroup3       0.1058654  8.1355640
## AgeGroup4      -0.7168042  7.2282427
## AgeGroup5       0.2351613  8.0724103
## AgeGroup6      -0.4494971  7.3809844
## AgeGroup7       0.5541171 10.3800054
## isCanadaBorn1  -2.1101288  0.9494502
## isMale1        -1.6681736  0.6008384
## Under181       -1.6483517  1.4776288
## House1         -1.9421107  0.6527064
## isRural1       -1.1489503  2.3364566
## isAlone1        0.2981790  6.3557470
## psEducation1   -0.7155635  2.0796061

confint(logitWork)

## Waiting for profiling to be done...

##                     2.5 %      97.5 %
## (Intercept)   -1.71436807  6.01207190
## FamilySize2   -3.28985581  1.06361118
## FamilySize3   -3.08484830  1.88665432
## FamilySize4   -0.87499525  5.61141712
## isCanadaBorn1 -3.10884073  0.03175847
## isMale1       -2.43266147  0.23106523
## Under181      -4.47096441 -0.86352763
## over451       -4.88525903 -1.45911346
## House1        -1.10368782  1.87538564
## isRural1      -3.02366281  1.07821552
## isAlone1      -5.00930459 -0.27715770
## psEducation1  -0.05218201  4.29079149

confint.default(logitWork)

##                    2.5 %      97.5 %
## (Intercept)   -1.6350315  5.96656734
## FamilySize2   -3.1848005  1.06622530
## FamilySize3   -2.9252992  1.92286893
## FamilySize4   -0.8519645  5.49994166
## isCanadaBorn1 -3.0511489  0.02609276
## isMale1       -2.3593253  0.26232708
## Under181      -4.2692514 -0.72388252
## over451       -4.6983699 -1.33906145
## House1        -1.1199223  1.82731885
## isRural1      -2.7248489  1.25658660
## isAlone1      -4.8132903 -0.16780589
## psEducation1  -0.3406986  3.81832496

confint(logitAdvice)

## Waiting for profiling to be done...

##                    2.5 %    97.5 %
## (Intercept)   -4.0360860 0.6265680
## isCanadaBorn1 -1.6567506 0.9011494
## isMale1       -0.9379167 1.2529222
## Under181      -1.5668727 1.4331417
## over451       -0.6449772 2.0808381
## House1        -1.7498283 0.6061955
## isRural1      -4.1565430 0.5740896
## isAlone1      -0.9568462 1.5844499
## psEducation1  -1.2069552 1.3258326

confint.default(logitAdvice)

##                    2.5 %    97.5 %
## (Intercept)   -3.9070741 0.7063885
## isCanadaBorn1 -1.6839779 0.8393876
## isMale1       -0.9507900 1.2125994
## Under181      -1.4858966 1.4606851
## over451       -0.7081729 1.9666467
## House1        -1.7349686 0.5977826
## isRural1      -3.3439055 0.9417144
## isAlone1      -0.9496561 1.5627049
## psEducation1  -1.2689306 1.2050822

confint(logitIncident)

## Waiting for profiling to be done...

##                    2.5 %    97.5 %
## (Intercept)   -4.0397099 6.7111400
## FamilySize2   -5.4712095 0.3512812
## FamilySize3   -4.6150450 1.7375556
## FamilySize4   -4.5822608 2.6948249
## AgeGroup2     -5.3428230 1.6732361
## AgeGroup3     -6.2340758 0.8918226
## AgeGroup4     -7.4225442 0.0747858
## AgeGroup5     -5.3055395 1.5555054
## AgeGroup6     -5.8793791 1.2391508
## AgeGroup7     -5.6090631 1.9533143
## isCanadaBorn1 -0.9242412 1.8494310
## isMale1        0.7904360 3.4828484
## Under181      -0.6840152 2.5540336
## House1        -1.1508529 1.3667482
## isRural1      -2.2445418 1.4481393
## isAlone1      -5.1712795 0.4871479
## psEducation1  -1.4414917 1.2183723

confint.default(logitIncident)

##                    2.5 %    97.5 %
## (Intercept)   -3.7894641 6.5731267
## FamilySize2   -5.3156458 0.5282308
## FamilySize3   -4.5099523 1.7320682
## FamilySize4   -4.4819727 2.6777522
## AgeGroup2     -5.2025419 1.6181758
## AgeGroup3     -6.0882179 0.8642523
## AgeGroup4     -7.1679057 0.1936038
## AgeGroup5     -5.1800544 1.4473126
## AgeGroup6     -5.7472532 1.1750526
## AgeGroup7     -5.4725974 1.8797217
## isCanadaBorn1 -0.9756167 1.7507129
## isMale1        0.6759341 3.3191907
## Under181      -0.6794362 2.5129280
## House1        -1.1611206 1.3259927
## isRural1      -1.9660687 1.5633763
## isAlone1      -5.0371126 0.6048661
## psEducation1  -1.4572910 1.1666125

confint(logitOther)

## Waiting for profiling to be done...

##                     2.5 %    97.5 %
## (Intercept)   -10.4549577 -2.254641
## isCanadaBorn1  -0.7589535  4.374330
## isMale1         0.6004028  5.422394
## Under181       -1.4406137  2.904664
## over451        -1.5932798  1.795187
## House1         -1.6259341  1.786500
## isAlone1       -1.0722319  2.495871
## psEducation1   -1.5272683  2.022578

confint.default(logitOther)

##                    2.5 %    97.5 %
## (Intercept)   -9.8757455 -1.832924
## isCanadaBorn1 -1.1177912  3.637164
## isMale1        0.2320743  4.629515
## Under181      -1.2658537  2.909118
## over451       -1.6451522  1.658067
## House1        -1.6288515  1.722299
## isAlone1      -1.0522071  2.445512
## psEducation1  -1.6588675  1.725827

Chi-Square Analysis:

To further determine which demographic variables are best suited to predict each survey variables, Chi-Squared Analysis is conducted (with p = 0.1):

#FamilySize
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 2:4) #statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 6.5, df = 3, P(> X2) = 0.091

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 2:4) #statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 6.7, df = 3, P(> X2) = 0.083

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 2:4) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 4.1, df = 3, P(> X2) = 0.25

#AgeGroup:
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 5:10) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 7.8, df = 6, P(> X2) = 0.25

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 5:10) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 5.0, df = 6, P(> X2) = 0.55

#isCanadaBorn
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 11) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.55, df = 1, P(> X2) = 0.46

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 5) #statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 3.7, df = 1, P(> X2) = 0.054

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 2) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.43, df = 1, P(> X2) = 0.51

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 11) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.31, df = 1, P(> X2) = 0.58

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 2) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.1, df = 1, P(> X2) = 0.3

#isMale
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 12) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.85, df = 1, P(> X2) = 0.36

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 6) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 2.5, df = 1, P(> X2) = 0.12

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 3) #not statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.056, df = 1, P(> X2) = 0.81

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 12) #statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 8.8, df = 1, P(> X2) = 0.0031

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 3) #statistically significant

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 4.7, df = 1, P(> X2) = 0.03

#Under18 
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 13) #NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.011, df = 1, P(> X2) = 0.91

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 7) #SS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 7.6, df = 1, P(> X2) = 0.0058

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 4)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.00028, df = 1, P(> X2) = 0.99

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 13)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.3, df = 1, P(> X2) = 0.26

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 4)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.6, df = 1, P(> X2) = 0.44

#Over45
wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 8)#SS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 12.4, df = 1, P(> X2) = 0.00043

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 5)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.85, df = 1, P(> X2) = 0.36

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 5)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 5.9e-05, df = 1, P(> X2) = 0.99

#House 
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 14)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.95, df = 1, P(> X2) = 0.33

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 9)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.22, df = 1, P(> X2) = 0.64

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 6)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.91, df = 1, P(> X2) = 0.34

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 14)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.017, df = 1, P(> X2) = 0.9

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 6)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.003, df = 1, P(> X2) = 0.96

#isRural
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 15)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.45, df = 1, P(> X2) = 0.5

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 9)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.22, df = 1, P(> X2) = 0.64

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 7)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 1.2, df = 1, P(> X2) = 0.27

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 15)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.05, df = 1, P(> X2) = 0.82

#isAlone
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 16)#SS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 4.6, df = 1, P(> X2) = 0.031

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 10)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.52, df = 1, P(> X2) = 0.47

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 8)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.23, df = 1, P(> X2) = 0.63

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 16)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 2.4, df = 1, P(> X2) = 0.12

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 7)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.61, df = 1, P(> X2) = 0.43

#psEducation 
wald.test(b = coef(logitAwareness), Sigma = vcov(logitAwareness), Terms = 17)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.91, df = 1, P(> X2) = 0.34

wald.test(b = coef(logitWork), Sigma = vcov(logitWork), Terms = 11)#SS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 4.4, df = 1, P(> X2) = 0.036

wald.test(b = coef(logitAdvice), Sigma = vcov(logitAdvice), Terms = 9)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.0026, df = 1, P(> X2) = 0.96

wald.test(b = coef(logitIncident), Sigma = vcov(logitIncident), Terms = 17)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.047, df = 1, P(> X2) = 0.83

wald.test(b = coef(logitOther), Sigma = vcov(logitOther), Terms = 8)#NSS

## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 0.0015, df = 1, P(> X2) = 0.97

Logistic Regression (Part 2):

Conduct logistic regression only using the demographic variables that were determined to be statistically significant

logitAwareness <- glm(Awareness~FamilySize+isAlone, data = finalSample, family = "binomial")
summary(logitAwareness)

## 
## Call:
## glm(formula = Awareness ~ FamilySize + isAlone, family = "binomial", 
##     data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4751   0.3770   0.6503   0.6681   1.1283  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.1166     0.9113   0.128    0.898
## FamilySize2   1.3295     0.9115   1.459    0.145
## FamilySize3   1.7366     1.3037   1.332    0.183
## FamilySize4   1.2697     1.4424   0.880    0.379
## isAlone1      1.1620     0.8921   1.303    0.193
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 118.94  on 123  degrees of freedom
## Residual deviance: 115.80  on 119  degrees of freedom
## AIC: 125.8
## 
## Number of Fisher Scoring iterations: 5

logitWork <- glm(Work~FamilySize+isCanadaBorn+Under18+over45+psEducation, data = finalSample, family = "binomial")
summary(logitWork)

## 
## Call:
## glm(formula = Work ~ FamilySize + isCanadaBorn + Under18 + over45 + 
##     psEducation, family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7069  -0.4942  -0.3633  -0.1964   2.4739  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -1.5963     1.2159  -1.313  0.18921   
## FamilySize2     0.8905     0.7066   1.260  0.20760   
## FamilySize3     0.9710     1.1087   0.876  0.38115   
## FamilySize4     4.0449     1.3806   2.930  0.00339 **
## isCanadaBorn1  -1.0889     0.6851  -1.589  0.11197   
## Under181       -1.4571     0.7348  -1.983  0.04738 * 
## over451        -2.1436     0.6538  -3.279  0.00104 **
## psEducation1    1.8169     0.9481   1.916  0.05533 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.209  on 123  degrees of freedom
## Residual deviance:  86.932  on 116  degrees of freedom
## AIC: 102.93
## 
## Number of Fisher Scoring iterations: 5

logitAdvice <- glm(Advice~House+isRural, data = finalSample, family = "binomial")
summary(logitAdvice)

## 
## Call:
## glm(formula = Advice ~ House + isRural, family = "binomial", 
##     data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7233  -0.7233  -0.5327  -0.3014   2.4958  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2075     0.3617  -3.338 0.000843 ***
## House1       -0.6734     0.5081  -1.325 0.185025    
## isRural1     -1.1882     1.0666  -1.114 0.265269    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.21  on 123  degrees of freedom
## Residual deviance: 102.29  on 121  degrees of freedom
## AIC: 108.29
## 
## Number of Fisher Scoring iterations: 5

logitIncident <- glm(Incident~isMale, data = finalSample, family = "binomial")
summary(logitIncident)

## 
## Call:
## glm(formula = Incident ~ isMale, family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8763  -0.8763  -0.4366  -0.4366   2.1899  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.3026     0.4689  -4.910  9.1e-07 ***
## isMale1       1.5435     0.5354   2.883  0.00394 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.96  on 123  degrees of freedom
## Residual deviance: 119.90  on 122  degrees of freedom
## AIC: 123.9
## 
## Number of Fisher Scoring iterations: 4

logitOther <- glm(Other~isMale, data = finalSample, family = "binomial")
summary(logitOther)

## 
## Call:
## glm(formula = Other ~ isMale, family = "binomial", data = finalSample)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5287  -0.5287  -0.1916  -0.1916   2.8310  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.989      1.009  -3.953 7.72e-05 ***
## isMale1        2.092      1.071   1.954   0.0507 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 69.525  on 123  degrees of freedom
## Residual deviance: 63.432  on 122  degrees of freedom
## AIC: 67.432
## 
## Number of Fisher Scoring iterations: 6

Coefficients as Odds Ratio:

exp(coef(logitAwareness))

## (Intercept) FamilySize2 FamilySize3 FamilySize4    isAlone1 
##    1.123664    3.779311    5.677789    3.559783    3.196310

exp(coef(logitWork))

##   (Intercept)   FamilySize2   FamilySize3   FamilySize4 isCanadaBorn1 
##     0.2026349     2.4362293     2.6405416    57.1048659     0.3365781 
##      Under181       over451  psEducation1 
##     0.2328992     0.1172336     6.1526488

exp(coef(logitAdvice))

## (Intercept)      House1    isRural1 
##   0.2989422   0.5099752   0.3047680

exp(coef(logitIncident))

## (Intercept)     isMale1 
##    0.100000    4.680851

exp(coef(logitOther))

## (Intercept)     isMale1 
##  0.01851852  8.09999977

Accuracy of Each Model:

## [1] "Awareness: "

## [1] 0.8145161

## [1] "Work: "

## [1] 0.1532258

## [1] "Advice: "

## [1] 0.1532258

## [1] "Incident: "

## [1] 0.2177419

## [1] "Other: "

## [1] 0.08064516

Conclusion:

Based on the accuracy scores listed above, it can be determined that this method of model building created an accurate model for predicting a user’s Awareness of Cyber Threats, but was unable to created accurate models for the other categories. Further analysis would need to be completed in order to determine better models for these variables.

MSCI-3230 R Project

Reed Renaud

2023-04-17