library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("pROC")
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
##資料整理
load("train.rdata")
data<- train
data[,c(2:21,31:34)] <- lapply(c(2:21,31:34), function(a)factor(data[[a]]))
## summary(data)
data <- data[-1]
set.seed(0)
train_idx <- sample(1:nrow(data), size = 0.7 * nrow(data))
train_data <- data[train_idx,c(1:10,12:17,21:32)]
test_data <- data[-train_idx,c(1:10,12:17,21:32) ]
####檢查資料的共變性問題
logitMod <- glm(formula = h1n1_vaccine ~ .,
data = train_data,family = binomial(link = "logit"))
summary(logitMod)
##
## Call:
## glm(formula = h1n1_vaccine ~ ., family = binomial(link = "logit"),
## data = train_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4486 -0.5675 -0.3746 -0.2118 3.3123
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.758577 0.231532 -20.553 < 2e-16 ***
## h1n1_concern1 0.035335 0.084566 0.418 0.676063
## h1n1_concern2 0.022638 0.085305 0.265 0.790723
## h1n1_concern3 -0.131740 0.096847 -1.360 0.173740
## h1n1_knowledge1 0.034601 0.086482 0.400 0.689086
## h1n1_knowledge2 0.195523 0.091062 2.147 0.031783 *
## behavioral_antiviral_meds1 0.172020 0.096495 1.783 0.074636 .
## behavioral_avoidance1 -0.060939 0.054709 -1.114 0.265330
## behavioral_face_mask1 0.181259 0.080956 2.239 0.025158 *
## behavioral_wash_hands1 0.026438 0.068505 0.386 0.699550
## behavioral_large_gatherings1 -0.229582 0.055951 -4.103 4.07e-05 ***
## behavioral_outside_home1 -0.047156 0.056820 -0.830 0.406587
## behavioral_touch_face1 0.076523 0.052770 1.450 0.147027
## doctor_recc_h1n11 1.665091 0.046819 35.564 < 2e-16 ***
## doctor_recc_h1n12 -0.681702 0.104833 -6.503 7.89e-11 ***
## chronic_med_condition1 0.091741 0.048805 1.880 0.060144 .
## child_under_6_months1 0.263505 0.074781 3.524 0.000426 ***
## health_worker1 0.892534 0.063348 14.089 < 2e-16 ***
## opinion_h1n1_vacc_effective3 0.766257 0.148613 5.156 2.52e-07 ***
## opinion_h1n1_vacc_effective4 1.100429 0.138781 7.929 2.20e-15 ***
## opinion_h1n1_vacc_effective5 1.994522 0.140027 14.244 < 2e-16 ***
## opinion_h1n1_risk2 0.694725 0.062888 11.047 < 2e-16 ***
## opinion_h1n1_risk4 1.529548 0.069540 21.995 < 2e-16 ***
## opinion_h1n1_risk5 1.839716 0.093392 19.699 < 2e-16 ***
## opinion_h1n1_sick_from_vacc2 -0.321188 0.055132 -5.826 5.68e-09 ***
## opinion_h1n1_sick_from_vacc4 -0.125903 0.061332 -2.053 0.040089 *
## opinion_h1n1_sick_from_vacc5 -0.268651 0.088677 -3.030 0.002449 **
## age_group35 - 44 Years -0.133728 0.079882 -1.674 0.094120 .
## age_group45 - 54 Years -0.018596 0.074987 -0.248 0.804138
## age_group55 - 64 Years 0.358818 0.076400 4.697 2.65e-06 ***
## age_group65+ Years 0.450501 0.082664 5.450 5.04e-08 ***
## education12 Years 0.149386 0.092625 1.613 0.106787
## educationCollege Graduate 0.304471 0.092748 3.283 0.001028 **
## educationSome College 0.164561 0.092768 1.774 0.076081 .
## raceHispanic 0.373987 0.119752 3.123 0.001790 **
## raceOther or Multiple 0.501506 0.121619 4.124 3.73e-05 ***
## raceWhite 0.350596 0.092442 3.793 0.000149 ***
## sexMale 0.153323 0.045597 3.363 0.000772 ***
## income_poverty> $75,000 0.031315 0.053768 0.582 0.560285
## income_povertyBelow Poverty -0.074165 0.082794 -0.896 0.370370
## marital_statusNot Married -0.118885 0.060009 -1.981 0.047579 *
## rent_or_ownRent -0.055618 0.059818 -0.930 0.352475
## employment_statusNot in Labor Force 0.043153 0.055088 0.783 0.433422
## employment_statusUnemployed 0.014950 0.103302 0.145 0.884930
## census_msaMSA, Principle City 0.066470 0.051480 1.291 0.196640
## census_msaNon-MSA 0.042693 0.053026 0.805 0.420741
## household_adults1 0.050055 0.064241 0.779 0.435876
## household_adults2 0.047146 0.086095 0.548 0.583961
## household_children1 -0.003018 0.073086 -0.041 0.967062
## household_children2 -0.091083 0.079441 -1.147 0.251565
## household_children3 -0.092634 0.099643 -0.930 0.352546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19348 on 18693 degrees of freedom
## Residual deviance: 14301 on 18643 degrees of freedom
## AIC: 14403
##
## Number of Fisher Scoring iterations: 5
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# 模型共線性檢查VIF(希望VIF值都在4以下)
#所有變數皆符合
vif(logitMod)
## GVIF Df GVIF^(1/(2*Df))
## h1n1_concern 1.689241 3 1.091311
## h1n1_knowledge 1.261396 2 1.059773
## behavioral_antiviral_meds 1.072957 1 1.035836
## behavioral_avoidance 1.242281 1 1.114576
## behavioral_face_mask 1.104093 1 1.050758
## behavioral_wash_hands 1.264975 1 1.124711
## behavioral_large_gatherings 1.603822 1 1.266421
## behavioral_outside_home 1.619148 1 1.272458
## behavioral_touch_face 1.260402 1 1.122676
## doctor_recc_h1n1 1.070809 2 1.017251
## chronic_med_condition 1.133640 1 1.064725
## child_under_6_months 1.045813 1 1.022650
## health_worker 1.140246 1 1.067823
## opinion_h1n1_vacc_effective 1.111744 3 1.017812
## opinion_h1n1_risk 1.533520 3 1.073861
## opinion_h1n1_sick_from_vacc 1.511219 3 1.071243
## age_group 2.532823 4 1.123183
## education 1.485360 3 1.068166
## race 1.301729 3 1.044929
## sex 1.107183 1 1.052228
## income_poverty 1.628309 2 1.129625
## marital_status 1.939494 1 1.392657
## rent_or_own 1.305732 1 1.142686
## employment_status 1.638630 2 1.131411
## census_msa 1.091359 2 1.022096
## household_adults 1.857066 2 1.167365
## household_children 1.645365 3 1.086535
####變數挑選
#用隨機森林挑選變數
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
rf <- randomForest(h1n1_vaccine ~ ., train_data)
rf_pred_train <- predict(rf, train_data)
rf_pred_test <- predict(rf, test_data)
importance(rf)
## MeanDecreaseGini
## h1n1_concern 282.22639
## h1n1_knowledge 190.99006
## behavioral_antiviral_meds 53.56421
## behavioral_avoidance 119.20453
## behavioral_face_mask 66.05055
## behavioral_wash_hands 81.23243
## behavioral_large_gatherings 125.57580
## behavioral_outside_home 124.87922
## behavioral_touch_face 117.89091
## doctor_recc_h1n1 807.29667
## chronic_med_condition 119.25973
## child_under_6_months 74.90161
## health_worker 151.92891
## opinion_h1n1_vacc_effective 445.81079
## opinion_h1n1_risk 542.18729
## opinion_h1n1_sick_from_vacc 300.56331
## age_group 384.07683
## education 289.36003
## race 189.71935
## sex 144.01186
## income_poverty 181.23214
## marital_status 116.94064
## rent_or_own 101.73455
## employment_status 169.70155
## census_msa 269.71657
## household_adults 195.37479
## household_children 221.42601
#用Elastic net挑選變數
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 3.0-2
# Dumy code categorical predictor variables
x <- model.matrix(h1n1_vaccine ~ ., train_data)[,-1]
# Convert the outcome (class) to a numerical variable
y <- as.numeric(train_data$h1n1_vaccine)
set.seed(123)
cv.lasso <- cv.glmnet(x, y, alpha = 1, family = "binomial")
# plot(cv.lasso)
coef(cv.lasso, cv.lasso$lambda.min)
## 51 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -4.3716538104
## h1n1_concern1 0.0134112408
## h1n1_concern2 0.0054269753
## h1n1_concern3 -0.1404825666
## h1n1_knowledge1 0.0009760144
## h1n1_knowledge2 0.1657738234
## behavioral_antiviral_meds1 0.1529441008
## behavioral_avoidance1 -0.0434784319
## behavioral_face_mask1 0.1642022452
## behavioral_wash_hands1 0.0148950602
## behavioral_large_gatherings1 -0.2218726799
## behavioral_outside_home1 -0.0441171718
## behavioral_touch_face1 0.0654334782
## doctor_recc_h1n11 1.6583764172
## doctor_recc_h1n12 -0.6598310234
## chronic_med_condition1 0.0868056360
## child_under_6_months1 0.2461760190
## health_worker1 0.8822711803
## opinion_h1n1_vacc_effective3 0.5666126613
## opinion_h1n1_vacc_effective4 0.9139226064
## opinion_h1n1_vacc_effective5 1.8084348167
## opinion_h1n1_risk2 0.6662517660
## opinion_h1n1_risk4 1.4969730975
## opinion_h1n1_risk5 1.7947906761
## opinion_h1n1_sick_from_vacc2 -0.2959925074
## opinion_h1n1_sick_from_vacc4 -0.0983251300
## opinion_h1n1_sick_from_vacc5 -0.2458217077
## age_group35 - 44 Years -0.1291051788
## age_group45 - 54 Years -0.0149339482
## age_group55 - 64 Years 0.3488251452
## age_group65+ Years 0.4368391453
## education12 Years 0.0778384508
## educationCollege Graduate 0.2343049715
## educationSome College 0.0943088347
## raceHispanic 0.3027655443
## raceOther or Multiple 0.4394613325
## raceWhite 0.3098951178
## sexMale 0.1423364479
## income_poverty> $75,000 0.0299008341
## income_povertyBelow Poverty -0.0790490204
## marital_statusNot Married -0.1216807144
## rent_or_ownRent -0.0542707313
## employment_statusNot in Labor Force 0.0307878785
## employment_statusUnemployed .
## census_msaMSA, Principle City 0.0501222639
## census_msaNon-MSA 0.0260564568
## household_adults1 0.0350767370
## household_adults2 0.0195108132
## household_children1 .
## household_children2 -0.0796501273
## household_children3 -0.0759930926
#logistic model
glm_func = function(f){
model <- glm(f, data = train_data, family = "binomial")
train_pred_prob = predict(model, newdata = train_data,type = "response")
test_pred_prob = predict(model, newdata = test_data,type = "response")
model_roc_train = roc(train_data$h1n1_vaccine, train_pred_prob)
model_roc_test = roc(test_data$h1n1_vaccine, test_pred_prob)
return(paste("Train:",model_roc_train$auc,"Test:",model_roc_test$auc,"AIC:",model$aic));
}
#隨機森林加logistic
set.seed(123)
p_glm_AUC <- Map(function(x) glm_func(x),
c(h1n1_vaccine ~ doctor_recc_h1n1,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status+rent_or_own,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status+rent_or_own+behavioral_wash_hands,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status+rent_or_own+behavioral_wash_hands+child_under_6_months,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status+rent_or_own+behavioral_wash_hands+child_under_6_months+behavioral_face_mask,
h1n1_vaccine ~ doctor_recc_h1n1 + opinion_h1n1_risk +opinion_h1n1_vacc_effective+age_group+opinion_h1n1_sick_from_vacc+education+h1n1_concern+census_msa+household_children+household_adults+h1n1_knowledge+race+income_poverty+employment_status+health_worker+sex+behavioral_large_gatherings+behavioral_outside_home+chronic_med_condition+behavioral_avoidance+behavioral_touch_face+marital_status+rent_or_own+behavioral_wash_hands+child_under_6_months+behavioral_face_mask+behavioral_antiviral_meds
))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
p_glm_AUC
## [[1]]
## [1] "Train: 0.704736068606229 Test: 0.696843662191186 AIC: 16751.6787219887"
##
## [[2]]
## [1] "Train: 0.785200732523966 Test: 0.775971638190149 AIC: 15602.7660319576"
##
## [[3]]
## [1] "Train: 0.815193939147864 Test: 0.80678297881071 AIC: 14916.1376866553"
##
## [[4]]
## [1] "Train: 0.818828320172427 Test: 0.808633742645393 AIC: 14836.2494790515"
##
## [[5]]
## [1] "Train: 0.820750951216198 Test: 0.810315102206779 AIC: 14775.3960724978"
##
## [[6]]
## [1] "Train: 0.822414579883158 Test: 0.813763525996305 AIC: 14708.0198847984"
##
## [[7]]
## [1] "Train: 0.822153313275495 Test: 0.813213393429185 AIC: 14698.3890758276"
##
## [[8]]
## [1] "Train: 0.822190574825499 Test: 0.813457544482028 AIC: 14701.4601833389"
##
## [[9]]
## [1] "Train: 0.822253247026663 Test: 0.813504966487517 AIC: 14706.61956065"
##
## [[10]]
## [1] "Train: 0.822800259568007 Test: 0.813643642047325 AIC: 14699.2871335275"
##
## [[11]]
## [1] "Train: 0.823844591191667 Test: 0.813746459669846 AIC: 14662.0748950116"
##
## [[12]]
## [1] "Train: 0.824430548503687 Test: 0.813877814428413 AIC: 14645.7265991242"
##
## [[13]]
## [1] "Train: 0.824447525962953 Test: 0.814024043935998 AIC: 14642.6654845374"
##
## [[14]]
## [1] "Train: 0.824674196832032 Test: 0.814194986976434 AIC: 14639.2186707027"
##
## [[15]]
## [1] "Train: 0.829751678982715 Test: 0.817699412563987 AIC: 14444.284345399"
##
## [[16]]
## [1] "Train: 0.830354494134258 Test: 0.81833305822042 AIC: 14436.4891361911"
##
## [[17]]
## [1] "Train: 0.831195523460097 Test: 0.818149944930788 AIC: 14416.9497581745"
##
## [[18]]
## [1] "Train: 0.831207152207989 Test: 0.818144675819067 AIC: 14418.7000394771"
##
## [[19]]
## [1] "Train: 0.831155655908545 Test: 0.818258637846024 AIC: 14417.7710685381"
##
## [[20]]
## [1] "Train: 0.831211039849643 Test: 0.818178248920298 AIC: 14419.2905065148"
##
## [[21]]
## [1] "Train: 0.831278548106139 Test: 0.818098093141109 AIC: 14418.312292187"
##
## [[22]]
## [1] "Train: 0.8313529003208 Test: 0.818392090923419 AIC: 14415.3236639422"
##
## [[23]]
## [1] "Train: 0.831382019183999 Test: 0.818405846569062 AIC: 14416.4690390177"
##
## [[24]]
## [1] "Train: 0.831391118828749 Test: 0.818471547263 AIC: 14418.2145471363"
##
## [[25]]
## [1] "Train: 0.83151571133555 Test: 0.818635868941804 AIC: 14407.8055037563"
##
## [[26]]
## [1] "Train: 0.83175186634141 Test: 0.819007504520711 AIC: 14403.9126510673"
##
## [[27]]
## [1] "Train: 0.83190128848919 Test: 0.819384315952725 AIC: 14402.7746587252"
a<-as.data.frame(p_glm_AUC)
colnames(a) <- c(1:27)
a <- t(a)
b<-data.frame(
numOfVar = c(1:27),
AUC = a
)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
c <- separate(b, AUC, c("del","trainAUC","del2", "test"), " ")
## Warning: Expected 4 pieces. Additional pieces discarded in 27 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
p_glm_AUC_df <- c[,-c(2,4)]
plot(p_glm_AUC_df$numOfVar, p_glm_AUC_df$trainAUC ,type = "l", ylim = c(0.5, 1))
lines(p_glm_AUC_df$numOfVar, p_glm_AUC_df$test, col = "blue")

# 以上圖來看最好的模型是3個變數的模型
p_glm_AUC[[3]]
## [1] "Train: 0.815193939147864 Test: 0.80678297881071 AIC: 14916.1376866553"
# "Train: 0.815193939147864 Test: 0.80678297881071"
#doctor_recc_h1n1
#opinion_h1n1_risk
#opinion_h1n1_vacc_effective
#Elastic Net + Logistic
set.seed(123)
Net_glm_AUC <- Map(function(x) glm_func(x),
c(h1n1_vaccine ~ opinion_h1n1_vacc_effective,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa+behavioral_outside_home,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa+behavioral_outside_home+behavioral_avoidance,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa+behavioral_outside_home+behavioral_avoidance+household_adults,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa+behavioral_outside_home+behavioral_avoidance+household_adults+employment_status,
h1n1_vaccine ~ opinion_h1n1_vacc_effective+opinion_h1n1_risk+doctor_recc_h1n1+health_worker+race+age_group+opinion_h1n1_sick_from_vacc+child_under_6_months+education+behavioral_large_gatherings+h1n1_knowledge+behavioral_face_mask+behavioral_antiviral_meds+sex+h1n1_concern+marital_status+chronic_med_condition+household_children+income_poverty+behavioral_touch_face+rent_or_own+census_msa+behavioral_outside_home+behavioral_avoidance+household_adults+employment_status+behavioral_wash_hands
))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Net_glm_AUC
## [[1]]
## [1] "Train: 0.684386503147161 Test: 0.683367465986252 AIC: 17765.0765917918"
##
## [[2]]
## [1] "Train: 0.758640219493343 Test: 0.749316787389943 AIC: 16522.6134313684"
##
## [[3]]
## [1] "Train: 0.815196075214707 Test: 0.80678297881071 AIC: 14916.1376866553"
##
## [[4]]
## [1] "Train: 0.821699595589572 Test: 0.811244983852271 AIC: 14677.309781569"
##
## [[5]]
## [1] "Train: 0.82290420912484 Test: 0.812767617251725 AIC: 14627.2413157842"
##
## [[6]]
## [1] "Train: 0.826427531762118 Test: 0.815119039958519 AIC: 14535.3527179829"
##
## [[7]]
## [1] "Train: 0.827816863813674 Test: 0.815763223838394 AIC: 14489.2580586568"
##
## [[8]]
## [1] "Train: 0.828002710173256 Test: 0.815862357745641 AIC: 14480.4660729955"
##
## [[9]]
## [1] "Train: 0.829060174335864 Test: 0.818134976923156 AIC: 14442.8795990277"
##
## [[10]]
## [1] "Train: 0.829987731457368 Test: 0.817911948946504 AIC: 14417.0157310061"
##
## [[11]]
## [1] "Train: 0.830382254458946 Test: 0.817769543042115 AIC: 14407.220157982"
##
## [[12]]
## [1] "Train: 0.830599876948879 Test: 0.818155913482118 AIC: 14404.5896458676"
##
## [[13]]
## [1] "Train: 0.830705073968748 Test: 0.818528341759249 AIC: 14404.0374094768"
##
## [[14]]
## [1] "Train: 0.83138078026523 Test: 0.819118575530618 AIC: 14393.0717350533"
##
## [[15]]
## [1] "Train: 0.831293910698872 Test: 0.818908323984157 AIC: 14391.6729839707"
##
## [[16]]
## [1] "Train: 0.831608937836832 Test: 0.819296652855066 AIC: 14382.9422629185"
##
## [[17]]
## [1] "Train: 0.831549879860765 Test: 0.819422971648449 AIC: 14381.6591729009"
##
## [[18]]
## [1] "Train: 0.831595386628782 Test: 0.819396579460536 AIC: 14385.6925998629"
##
## [[19]]
## [1] "Train: 0.831585996478942 Test: 0.819453373956786 AIC: 14388.2433658693"
##
## [[20]]
## [1] "Train: 0.831593660686773 Test: 0.819404786218615 AIC: 14388.6182366194"
##
## [[21]]
## [1] "Train: 0.83161869539017 Test: 0.819374710315429 AIC: 14389.8773867273"
##
## [[22]]
## [1] "Train: 0.831673643573631 Test: 0.819564305078771 AIC: 14392.1164458978"
##
## [[23]]
## [1] "Train: 0.831707094380387 Test: 0.819593961318192 AIC: 14393.3181763139"
##
## [[24]]
## [1] "Train: 0.831791238325455 Test: 0.819479020075782 AIC: 14394.2172916467"
##
## [[25]]
## [1] "Train: 0.831864172191732 Test: 0.819362679954154 AIC: 14397.5340296649"
##
## [[26]]
## [1] "Train: 0.831892804031691 Test: 0.819352468135863 AIC: 14400.9238465434"
##
## [[27]]
## [1] "Train: 0.83190128848919 Test: 0.819384315952725 AIC: 14402.7746587252"
a<-as.data.frame(Net_glm_AUC)
colnames(a) <- c(1:27)
a <- t(a)
b<-data.frame(
numOfVar = c(1:27),
AUC = a
)
library(tidyr)
c <- separate(b, AUC, c("del","trainAUC","del2", "test"), " ")
## Warning: Expected 4 pieces. Additional pieces discarded in 27 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Net_glm_AUC_df <- c[,-c(2,4)]
plot(Net_glm_AUC_df$numOfVar, Net_glm_AUC_df$trainAUC ,type = "l", ylim = c(0.5, 1))
lines(Net_glm_AUC_df$numOfVar, Net_glm_AUC_df$test, col = "blue")

# 以上圖來看最好的模型是3個變數的模型
Net_glm_AUC[[3]]
## [1] "Train: 0.815196075214707 Test: 0.80678297881071 AIC: 14916.1376866553"
#"Train: 0.815196075214707 Test: 0.80678297881071"
#opinion_h1n1_vacc_effective
#opinion_h1n1_risk
#doctor_recc_h1n1
####################
#### H1N1_CART決策樹演算法-rpart()
####################
library(rpart); library(rpart.plot);library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
library("pROC")
cart_func = function(f){
model <- rpart(f,train_data,na.action = na.omit,control = rpart.control(maxdepth = 2))
train_pred_prob = predict(model , newdata = train_data,type = "prob")[, "1"];
test_pred_prob = predict(model , newdata = test_data,type = "prob")[, "1"];
model_roc_train = roc(train_data$h1n1_vaccine, train_pred_prob)
model_roc_test = roc(test_data$h1n1_vaccine, test_pred_prob)
plot.roc(model_roc_train ,print.thres = "best", print.auc = T);
return(paste("Train:",model_roc_train$auc,"Test:",model_roc_test$auc));
}
cart_func (h1n1_vaccine ~ .)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "Train: 0.703379879767804 Test: 0.698271171803817"
####################
#### H1N1_隨機森林模型
####################
library(randomForest)
rf_func = function(f){
model <- randomForest(f,train_data,ntree=80, mtry=3, importance=TRUE)
train_pred_prob = predict(model , newdata = train_data,type = "prob")[, "1"];
test_pred_prob = predict(model , newdata = test_data,type = "prob")[, "1"];
model_roc_train = roc(train_data$h1n1_vaccine, train_pred_prob)
model_roc_test = roc(test_data$h1n1_vaccine, test_pred_prob)
plot.roc(model_roc_train ,print.thres = "best", print.auc = T);
return(paste("Train:",model_roc_train$auc,"Test:",model_roc_test$auc));
}
rf_func(h1n1_vaccine ~ .)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "Train: 0.996723051312358 Test: 0.809342787891674"