資料來源:https://www.kaggle.com/datasets/aravindpcoder/obesity-or-cvd-risk-classifyregressorcluster/data
資料筆數:2111筆
變數介紹:
1.Gender(性別);類別
(Female、Male)
2.Age (年齡);單位(歲)
3.Height(身高);單位(公尺)
4.Weight(體重);單位(公斤)
5.family_history_with_overweight(有超重家族史);類別
(yes、no)
6.FAVC(Frequent consumption of high caloric food)(經常食用高熱量食物);類別
(yes、no)
7.FCVC(Frequency of consumption of vegetables)(吃蔬菜的頻率);連續
(1~3:1是最少、3是最多)
8.NCP(Number of main meals)(主餐數量);連續
(1~4:1是最少、4是最多)
9.CAEC(Consumption of food between meals)(兩餐之間的食物消耗);類別
(no、Sometimes、Frequently、Always)
10.SMOKE(是否抽菸);類別
(no,yes)
11.CH2O(Consumption of water daily)(每天飲水);連續
(1~3:1是最少、3是最多)
12.SCC (Calories consumption monitoring) (卡路里消耗監測);類別
(no、yes)
13.FAF (Physical activity frequency) (身體活動頻率);連續
(0~3:0是最少、3是最多)
14.TUE (Time using technology devices) (使用科技設備的時間);連續
(0~2:0是最少、2是最多)
15.CALC(Consumption of alcohol)(飲酒);類別
(no、Sometimes、Frequently、Always)
16.MTRANS (Transportation used) (使用的交通工具);類別
(Public_Transportation、Automobile、Walking、Motorbike、Bike)
17.NObeyesdad(肥胖程度);類別
(Insufficient_Weight(體重不足) : < 18.5
Normal_Weight(正常) : 18.5 ~ 24.9
Overweight_Level(超重) : 25.0 ~ 29.9
Obesity_Type_I(肥胖 I) : 30.0 ~ 34.9
Obesity_Type_II(肥胖 II) : 35.0 ~ 39.9
Obesity_Type_III(肥胖 III) : > 40)
讀入檔案
library(readr)
## Warning: 套件 'readr' 是用 R 版本 4.4.2 來建造的
library(knitr)
## Warning: 套件 'knitr' 是用 R 版本 4.4.2 來建造的
data <- read_csv("D:/PU_University/1131classfitication_data/ObesityDataSet.csv")
## Rows: 2111 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Gender, family_history_with_overweight, FAVC, CAEC, SMOKE, SCC, CAL...
## dbl (8): Age, Height, Weight, FCVC, NCP, CH2O, FAF, TUE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 17
## Gender Age Height Weight family_history_with_overw…¹ FAVC FCVC NCP CAEC
## <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr>
## 1 Female 21 1.62 64 yes no 2 3 Some…
## 2 Female 21 1.52 56 yes no 3 3 Some…
## 3 Male 23 1.8 77 yes no 2 3 Some…
## 4 Male 27 1.8 87 no no 3 3 Some…
## 5 Male 22 1.78 89.8 no no 2 1 Some…
## 6 Male 29 1.62 53 no yes 2 3 Some…
## # ℹ abbreviated name: ¹family_history_with_overweight
## # ℹ 8 more variables: SMOKE <chr>, CH2O <dbl>, SCC <chr>, FAF <dbl>, TUE <dbl>,
## # CALC <chr>, MTRANS <chr>, NObeyesdad <chr>
檢查是否有NA值
#檢查是否有NA值
sum(is.na(data))
## [1] 0
將NObeyesdad欄位裡的Overweight_Level_I和Overweight_Level_II的資料都改為Overweight_Level
# 加載 dplyr 套件
library(dplyr)
## Warning: 套件 'dplyr' 是用 R 版本 4.4.2 來建造的
##
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
##
## filter, lag
## 下列物件被遮斷自 'package:base':
##
## intersect, setdiff, setequal, union
data <- data %>%
mutate(NObeyesdad = recode(NObeyesdad,
"Overweight_Level_I" = "Overweight_Level",
"Overweight_Level_II" = "Overweight_Level"))
1.類別變數:family_history_with_overweight(有超重家族史)(列), NObeyesdad(肥胖程度)(行)
t1 = table(data$family_history_with_overweight, data$NObeyesdad);t1
##
## Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II
## no 146 132 7 1
## yes 126 155 344 296
##
## Obesity_Type_III Overweight_Level
## no 0 99
## yes 324 481
#列總和
col1 = apply(t1 , 1 ,sum)
在不同的肥胖程度下,有超重家族史的比例、比例差、相對風險:
#比例
p1 = t1[,1]/col1;p1
## no yes
## 0.37922078 0.07300116
#比例差
diff1 = round(p1[1],3)-round(p1[2],3);diff1
## no
## 0.306
#相對風險
rr1 = round(p1[1],3)/round(p1[2],3);rr1
## no
## 5.191781
Insufficient_Weight(體重不足) :
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 146 / 385 = 0.379 , \(p_2\) = 126 / 1726 = 0.073
@比例差
樣本比例差 = 0.379 - 0.073 = 0.306
沒有超重家族史比有超重家族史的比例高 30.6%
@相對風險
相對風險\(\hat {r}\) = 5.192
沒有超重家族史的比例為有超重家族史比例的5.192倍
#比例
p2 = t1[,2]/col1;p2
## no yes
## 0.34285714 0.08980301
#比例差
diff2 = round(p2[1],3)-round(p2[2],3);diff2
## no
## 0.253
#相對風險
rr2 = round(p2[1],3)/round(p2[2],3);rr2
## no
## 3.811111
Normal_Weight(正常) :
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 132 / 385 = 0.343 , \(p_2\) = 155 / 1726 = 0.09
@比例差
樣本比例差 = 0.343 - 0.09 = 0.253
沒有超重家族史比有超重家族史的比例高 25.3%
@相對風險
相對風險\(\hat {r}\) = 3.811
沒有超重家族史的比例為有超重家族史比例的3.811倍
#比例
p3 = t1[,3]/col1;p3
## no yes
## 0.01818182 0.19930475
#比例差
diff3 = round(p3[1],3)-round(p3[2],3);diff3
## no
## -0.181
#相對風險
rr3 = round(p3[1],3)/round(p3[2],3);rr3
## no
## 0.09045226
Overweight_Level(超重):
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 7 / 385 = 0.018 , \(p_2\) = 344 / 1726 = 0.199
@比例差
樣本比例差 = 0.018 - 0.199 = -0.181
沒有超重家族史比有超重家族史的比例低 18.1%
@相對風險
相對風險\(\hat {r}\) = 0.09
沒有超重家族史的比例為有超重家族史比例的0.09倍
#比例
p4 = t1[,4]/col1;p4
## no yes
## 0.002597403 0.171494786
#比例差
diff4 = round(p4[1],3)-round(p4[2],3);diff4
## no
## -0.168
#相對風險
rr4 = round(p4[1],3)/round(p4[2],3);rr4
## no
## 0.01754386
Obesity_Type_I(肥胖 I):
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 1 / 385 = 0.003 , \(p_2\) = 296 / 1726 = 0.171
@比例差
樣本比例差 = 0.003 - 0.171 = -0.168
沒有超重家族史比有超重家族史的比例低 16.8%
@相對風險
相對風險\(\hat {r}\) = 0.018
沒有超重家族史的比例為有超重家族史比例的0.018倍
#比例
p5 = t1[,5]/col1;p5
## no yes
## 0.0000000 0.1877173
#比例差
diff5 = round(p5[1],3)-round(p5[2],3);diff5
## no
## -0.188
#相對風險
rr5 = round(p5[1],3)/round(p5[2],3);rr5
## no
## 0
Obesity_Type_II(肥胖 II):
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 0 / 385 = 0 , \(p_2\) = 324 / 1726 = 0.188
@比例差
樣本比例差 = 0 - 0.188 = -0.188
沒有超重家族史比有超重家族史的比例低 18.8%
@相對風險
相對風險\(\hat {r}\) = 0
沒有超重家族史的比例為有超重家族史比例的0倍
#比例
p6 = t1[,6]/col1;p6
## no yes
## 0.2571429 0.2786790
#比例差
diff6 = round(p6[1],3)-round(p6[2],3);diff6
## no
## -0.022
#相對風險
rr6 = round(p6[1],3)/round(p6[2],3);rr6
## no
## 0.921147
Obesity_Type_III(肥胖 III):
@沒有超重家族史與有超重家族史的比例
\(p_1\) = 99 / 385 = 0.257 , \(p_2\) = 481 / 1726 = 0.279
@比例差
樣本比例差 = 0.257 - 0.279 = -0.022
沒有超重家族史比有超重家族史的比例低 2.2%
@相對風險
相對風險\(\hat {r}\) = 0.921
沒有超重家族史的比例為有超重家族史比例的0.921倍
2.類別變數:Gender(性別)(列), SCC(卡路里消耗監測)(行)
t2 = table(data$Gender, data$SCC);t2
##
## no yes
## Female 973 70
## Male 1042 26
#列總和
col2 = apply(t2, 1, sum)
#比例
p = t2[,1]/col2;p
## Female Male
## 0.9328859 0.9756554
#比例差
diff = round(p[1],3)-round(p[2],3);diff
## Female
## -0.043
a = prop.test(t2, correct=F, conf.level = 0.95) ;a
##
## 2-sample test for equality of proportions without continuity correction
##
## data: t2
## X-squared = 22.236, df = 1, p-value = 2.41e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.06054675 -0.02499230
## sample estimates:
## prop 1 prop 2
## 0.9328859 0.9756554
#相對風險
rr = round(p[1],3)/round(p[2],3);rr
## Female
## 0.9559426
#相對風險的標準誤估計
ase.logr1 = sqrt(sum((1-p)/(p*apply(t2, 1, sum))))
ase.logr1
## [1] 0.009609358
#相對風險的95%信賴區間
z = c(-1.96, 1.96)
ci.logr1 = log(rr) + z*ase.logr1
ci.logr1
## [1] -0.06389173 -0.02622304
#相對風險的信賴區間
exp(ci.logr1)
## [1] 0.9381066 0.9741178
#勝算比
or = t2[1,1]*t2[2,2]/(t2[1,2]*t2[2,1]);or
## [1] 0.346833
@女性與男性沒有做卡路里消耗監測的比例
\(p_1\) = 973 / 1043 = 0.933 , \(p_2\) = 1042 / 1068 = 0.976
@比例差
樣本比例差 = 0.933 - 0.976 = -0.043
女性沒有做卡路里消耗監測比男性沒有做卡路里消耗監測的比例低 4.3%
@相對風險
相對風險\(\hat {r}\) = 0.956
女性沒有做卡路里消耗監測的比例為男性沒有做卡路里消耗監測比例的0.956倍
\(log(\hat {r})\)的標準誤估計 = 0.01
\(log(r)\)的 95% 信賴區間為(-0.064,-0.026)
女性沒有做卡路里消耗監測的比例相對於男性沒有做卡路里消耗監測的比例,其倍數在0.938和0.974之間
@勝算比
女性沒有做卡路里消耗監測的勝算為男性沒有做卡路里消耗監測勝算的0.347倍
3.將CALC喝酒頻率和NObeyesdad肥胖程度拿來做模型適配
table(data$CALC, data$NObeyesdad)
##
## Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II
## Always 0 1 0 0
## Frequently 1 18 14 2
## no 117 107 165 71
## Sometimes 154 161 172 224
##
## Obesity_Type_III Overweight_Level
## Always 0 0
## Frequently 0 35
## no 1 178
## Sometimes 323 367
因為兩個分類都是有順序性的類別,因此將順序更改正確
array(c(0,1,154,117,1,18,161,107,0,35,367,178,0,14,172,165,0,2,224,71,0,0,323,1),
dim = c(4, 6),
dimnames = list(CALC喝酒頻率 = c("Always", "Frequently","Sometimes","no"),
NObeyesdad肥胖程度 = c("Insufficient_Weight", "Normal_Weight", "Overweight_Level","Obesity_Type_I","Obesity_Type_II","Obesity_Type_III")))
## NObeyesdad肥胖程度
## CALC喝酒頻率 Insufficient_Weight Normal_Weight Overweight_Level Obesity_Type_I
## Always 0 1 0 0
## Frequently 1 18 35 14
## Sometimes 154 161 367 172
## no 117 107 178 165
## NObeyesdad肥胖程度
## CALC喝酒頻率 Obesity_Type_II Obesity_Type_III
## Always 0 0
## Frequently 2 0
## Sometimes 224 323
## no 71 1
變數
1. CALC(calc):喝酒頻率(1 = Always、2 = Frequently、3 = Sometimes、4 = no)
2. NObeyesdad(nobeyesdad):肥胖程度(1 = Insufficient_Weight、2 = Normal_Weight、3 = Overweight_Level、4 = Obesity_Type_I、5 = Obesity_Type_II、6 = Obesity_Type_III)
獨立模型 (calc, nobeyesdad)
count = c(0,1,154,117,1,18,161,107,0,35,367,178,0,14,172,165,0,2,224,71,0,0,323,1)
calc=rep(1:4,6)
nobeyesdad=rep(1:6, each=4)
indep=glm(count~factor(calc)+factor(nobeyesdad), family=poisson); summary(indep)
##
## Call:
## glm(formula = count ~ factor(calc) + factor(nobeyesdad), family = poisson)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.04911 1.00074 -2.048 0.0406 *
## factor(calc)2 4.24849 1.00627 4.222 2.42e-05 ***
## factor(calc)3 7.24494 0.99950 7.249 4.21e-13 ***
## factor(calc)4 6.45990 0.99993 6.460 1.04e-10 ***
## factor(nobeyesdad)2 0.05368 0.08462 0.634 0.5258
## factor(nobeyesdad)3 0.75723 0.07349 10.304 < 2e-16 ***
## factor(nobeyesdad)4 0.25498 0.08078 3.157 0.0016 **
## factor(nobeyesdad)5 0.08793 0.08393 1.048 0.2948
## factor(nobeyesdad)6 0.17494 0.08224 2.127 0.0334 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3227.24 on 23 degrees of freedom
## Residual deviance: 375.26 on 15 degrees of freedom
## AIC: 493.07
##
## Number of Fisher Scoring iterations: 5
p_value <- 1 - pchisq(round(indep$deviance,3), indep$df.residual)
配適度檢定 : \(G^2\)=375.258,df=15,p-value = P(\(G^2\)>375.258) = 0 –>獨立模型適配不良
L×L關聯模型
ll=glm(count~factor(calc)+factor(nobeyesdad)+calc*nobeyesdad, family=poisson); summary(ll)
##
## Call:
## glm(formula = count ~ factor(calc) + factor(nobeyesdad) + calc *
## nobeyesdad, family = poisson)
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.52261 1.02482 -3.437 0.000588 ***
## factor(calc)2 5.30589 1.01816 5.211 1.88e-07 ***
## factor(calc)3 9.23444 1.03685 8.906 < 2e-16 ***
## factor(calc)4 9.24634 1.06508 8.681 < 2e-16 ***
## factor(nobeyesdad)2 0.85349 0.13011 6.560 5.40e-11 ***
## factor(nobeyesdad)3 2.34190 0.20757 11.282 < 2e-16 ***
## factor(nobeyesdad)4 2.60994 0.29718 8.782 < 2e-16 ***
## factor(nobeyesdad)5 3.19908 0.38391 8.333 < 2e-16 ***
## factor(nobeyesdad)6 4.02873 0.46752 8.617 < 2e-16 ***
## calc NA NA NA NA
## nobeyesdad NA NA NA NA
## calc:nobeyesdad -0.23577 0.02814 -8.377 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3227.24 on 23 degrees of freedom
## Residual deviance: 301.42 on 14 degrees of freedom
## AIC: 421.23
##
## Number of Fisher Scoring iterations: 6
p_value <- 1 - pchisq(round(ll$deviance,3), ll$df.residual)
配適度檢定 : \(G^2\)=301.419,df=14,p-value = P(\(G^2\)>301.419) =0 –>獨立模型適配不良
4.適配模型 先將資料處哩,並把Y(NObeyesdad)是”Normal_Weight” = 1,其他則為0
data <- data %>%
mutate(family_history_with_overweight = factor(family_history_with_overweight),
FAVC = factor(FAVC),
CAEC = factor(CAEC),
SMOKE = factor(SMOKE),
SCC = factor(SCC),
CALC = factor(CALC),
MTRANS = factor(MTRANS)) # 將 BMI 類別轉換為因子
data <- data %>%
mutate(NObeyesdad = recode(NObeyesdad,
"Insufficient_Weight" = 0,
"Normal_Weight" = 1,
"Overweight_Level" = 0,
"Obesity_Type_I" = 0,
"Obesity_Type_II" = 0,
"Obesity_Type_III" = 0))
summary(data)
## Gender Age Height Weight
## Length:2111 Min. :14.00 Min. :1.450 Min. : 39.00
## Class :character 1st Qu.:19.95 1st Qu.:1.630 1st Qu.: 65.47
## Mode :character Median :22.78 Median :1.700 Median : 83.00
## Mean :24.31 Mean :1.702 Mean : 86.59
## 3rd Qu.:26.00 3rd Qu.:1.768 3rd Qu.:107.43
## Max. :61.00 Max. :1.980 Max. :173.00
## family_history_with_overweight FAVC FCVC NCP
## no : 385 no : 245 Min. :1.000 Min. :1.000
## yes:1726 yes:1866 1st Qu.:2.000 1st Qu.:2.659
## Median :2.386 Median :3.000
## Mean :2.419 Mean :2.686
## 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :3.000 Max. :4.000
## CAEC SMOKE CH2O SCC FAF
## Always : 53 no :2067 Min. :1.000 no :2015 Min. :0.0000
## Frequently: 242 yes: 44 1st Qu.:1.585 yes: 96 1st Qu.:0.1245
## no : 51 Median :2.000 Median :1.0000
## Sometimes :1765 Mean :2.008 Mean :1.0103
## 3rd Qu.:2.477 3rd Qu.:1.6667
## Max. :3.000 Max. :3.0000
## TUE CALC MTRANS
## Min. :0.0000 Always : 1 Automobile : 457
## 1st Qu.:0.0000 Frequently: 70 Bike : 7
## Median :0.6253 no : 639 Motorbike : 11
## Mean :0.6579 Sometimes :1401 Public_Transportation:1580
## 3rd Qu.:1.0000 Walking : 56
## Max. :2.0000
## NObeyesdad
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.136
## 3rd Qu.:0.000
## Max. :1.000
將全部解釋變數做適配模型
m1 = glm(NObeyesdad ~ .,data = data, family = binomial)
summary(m1)
##
## Call:
## glm(formula = NObeyesdad ~ ., family = binomial, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 20.538921 535.414937 0.038 0.96940
## GenderMale 1.002704 0.220792 4.541 5.59e-06 ***
## Age -0.019350 0.016363 -1.183 0.23700
## Height -0.451333 1.228463 -0.367 0.71332
## Weight -0.066031 0.006556 -10.072 < 2e-16 ***
## family_history_with_overweightyes -0.163531 0.184848 -0.885 0.37633
## FAVCyes -0.341139 0.198528 -1.718 0.08573 .
## FCVC -0.263890 0.147720 -1.786 0.07403 .
## NCP 0.027599 0.097135 0.284 0.77631
## CAECFrequently -2.232462 0.392414 -5.689 1.28e-08 ***
## CAECno -2.429481 0.542054 -4.482 7.39e-06 ***
## CAECSometimes -2.534810 0.364460 -6.955 3.53e-12 ***
## SMOKEyes 1.334380 0.455168 2.932 0.00337 **
## CH2O -0.407213 0.138588 -2.938 0.00330 **
## SCCyes 0.028177 0.286440 0.098 0.92164
## FAF 0.219788 0.093595 2.348 0.01886 *
## TUE -0.279801 0.128986 -2.169 0.03007 *
## CALCFrequently -12.188619 535.411447 -0.023 0.98184
## CALCno -13.005995 535.411364 -0.024 0.98062
## CALCSometimes -13.107526 535.411366 -0.024 0.98047
## MTRANSBike 1.534639 0.944194 1.625 0.10409
## MTRANSMotorbike 1.734646 0.729798 2.377 0.01746 *
## MTRANSPublic_Transportation 0.024841 0.231678 0.107 0.91461
## MTRANSWalking 1.881607 0.388018 4.849 1.24e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1678.5 on 2110 degrees of freedom
## Residual deviance: 1129.9 on 2087 degrees of freedom
## AIC: 1177.9
##
## Number of Fisher Scoring iterations: 12
解釋變數中,Gender、Weight、CAEC、MTRANS影響較為顯著
m2 = glm(NObeyesdad ~ Gender+Weight+CAEC+MTRANS,data = data, family = binomial)
summary(m2)
##
## Call:
## glm(formula = NObeyesdad ~ Gender + Weight + CAEC + MTRANS, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.927404 0.517630 9.519 < 2e-16 ***
## GenderMale 0.959579 0.171477 5.596 2.19e-08 ***
## Weight -0.069055 0.005221 -13.226 < 2e-16 ***
## CAECFrequently -2.131767 0.377613 -5.645 1.65e-08 ***
## CAECno -2.527030 0.506608 -4.988 6.10e-07 ***
## CAECSometimes -2.568574 0.351199 -7.314 2.60e-13 ***
## MTRANSBike 1.882429 0.921137 2.044 0.0410 *
## MTRANSMotorbike 1.777004 0.711304 2.498 0.0125 *
## MTRANSPublic_Transportation 0.045978 0.200592 0.229 0.8187
## MTRANSWalking 1.977145 0.351649 5.622 1.88e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1678.5 on 2110 degrees of freedom
## Residual deviance: 1173.2 on 2101 degrees of freedom
## AIC: 1193.2
##
## Number of Fisher Scoring iterations: 6
m3<-step(m1)
## Start: AIC=1177.93
## NObeyesdad ~ Gender + Age + Height + Weight + family_history_with_overweight +
## FAVC + FCVC + NCP + CAEC + SMOKE + CH2O + SCC + FAF + TUE +
## CALC + MTRANS
##
## Df Deviance AIC
## - SCC 1 1129.9 1175.9
## - NCP 1 1130.0 1176.0
## - Height 1 1130.1 1176.1
## - family_history_with_overweight 1 1130.7 1176.7
## - Age 1 1131.3 1177.3
## <none> 1129.9 1177.9
## - FAVC 1 1132.8 1178.8
## - CALC 3 1136.9 1178.9
## - FCVC 1 1133.1 1179.1
## - TUE 1 1134.7 1180.7
## - FAF 1 1135.4 1181.4
## - SMOKE 1 1138.0 1184.0
## - CH2O 1 1138.7 1184.7
## - Gender 1 1151.3 1197.3
## - MTRANS 4 1166.7 1206.7
## - CAEC 3 1182.3 1224.3
## - Weight 1 1260.5 1306.5
##
## Step: AIC=1175.94
## NObeyesdad ~ Gender + Age + Height + Weight + family_history_with_overweight +
## FAVC + FCVC + NCP + CAEC + SMOKE + CH2O + FAF + TUE + CALC +
## MTRANS
##
## Df Deviance AIC
## - NCP 1 1130.0 1174.0
## - Height 1 1130.1 1174.1
## - family_history_with_overweight 1 1130.7 1174.7
## - Age 1 1131.4 1175.4
## <none> 1129.9 1175.9
## - CALC 3 1136.9 1176.9
## - FAVC 1 1132.9 1176.9
## - FCVC 1 1133.2 1177.2
## - TUE 1 1134.7 1178.7
## - FAF 1 1135.5 1179.5
## - SMOKE 1 1138.1 1182.1
## - CH2O 1 1138.7 1182.7
## - Gender 1 1151.3 1195.3
## - MTRANS 4 1166.8 1204.8
## - CAEC 3 1182.7 1222.7
## - Weight 1 1260.5 1304.5
##
## Step: AIC=1174.02
## NObeyesdad ~ Gender + Age + Height + Weight + family_history_with_overweight +
## FAVC + FCVC + CAEC + SMOKE + CH2O + FAF + TUE + CALC + MTRANS
##
## Df Deviance AIC
## - Height 1 1130.1 1172.1
## - family_history_with_overweight 1 1130.8 1172.8
## - Age 1 1131.5 1173.5
## <none> 1130.0 1174.0
## - CALC 3 1137.0 1175.0
## - FAVC 1 1133.1 1175.1
## - FCVC 1 1133.4 1175.4
## - TUE 1 1134.8 1176.8
## - FAF 1 1136.0 1178.0
## - SMOKE 1 1138.2 1180.2
## - CH2O 1 1138.8 1180.8
## - Gender 1 1151.3 1193.3
## - MTRANS 4 1166.8 1202.8
## - CAEC 3 1183.1 1221.1
## - Weight 1 1262.1 1304.1
##
## Step: AIC=1172.12
## NObeyesdad ~ Gender + Age + Weight + family_history_with_overweight +
## FAVC + FCVC + CAEC + SMOKE + CH2O + FAF + TUE + CALC + MTRANS
##
## Df Deviance AIC
## - family_history_with_overweight 1 1130.9 1170.9
## - Age 1 1131.5 1171.5
## <none> 1130.1 1172.1
## - CALC 3 1137.1 1173.1
## - FAVC 1 1133.4 1173.4
## - FCVC 1 1133.5 1173.5
## - TUE 1 1135.0 1175.0
## - FAF 1 1136.1 1176.1
## - SMOKE 1 1138.4 1178.4
## - CH2O 1 1138.9 1178.9
## - Gender 1 1157.5 1197.5
## - MTRANS 4 1166.8 1200.8
## - CAEC 3 1183.1 1219.1
## - Weight 1 1278.4 1318.4
##
## Step: AIC=1170.92
## NObeyesdad ~ Gender + Age + Weight + FAVC + FCVC + CAEC + SMOKE +
## CH2O + FAF + TUE + CALC + MTRANS
##
## Df Deviance AIC
## - Age 1 1132.4 1170.4
## <none> 1130.9 1170.9
## - CALC 3 1138.0 1172.0
## - FCVC 1 1134.3 1172.3
## - FAVC 1 1134.5 1172.5
## - TUE 1 1136.3 1174.3
## - FAF 1 1137.1 1175.1
## - SMOKE 1 1139.0 1177.0
## - CH2O 1 1140.7 1178.7
## - Gender 1 1158.4 1196.4
## - MTRANS 4 1168.0 1200.0
## - CAEC 3 1184.4 1218.4
## - Weight 1 1317.6 1355.6
##
## Step: AIC=1170.38
## NObeyesdad ~ Gender + Weight + FAVC + FCVC + CAEC + SMOKE + CH2O +
## FAF + TUE + CALC + MTRANS
##
## Df Deviance AIC
## <none> 1132.4 1170.4
## - CALC 3 1139.0 1171.0
## - FAVC 1 1135.5 1171.5
## - FCVC 1 1135.7 1171.7
## - TUE 1 1137.0 1173.0
## - FAF 1 1139.3 1175.3
## - SMOKE 1 1139.8 1175.8
## - CH2O 1 1142.4 1178.4
## - MTRANS 4 1169.8 1199.8
## - Gender 1 1164.8 1200.8
## - CAEC 3 1185.6 1217.6
## - Weight 1 1363.8 1399.8
summary(m3)
##
## Call:
## glm(formula = NObeyesdad ~ Gender + Weight + FAVC + FCVC + CAEC +
## SMOKE + CH2O + FAF + TUE + CALC + MTRANS, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19.50980 535.41182 0.036 0.97093
## GenderMale 1.01518 0.18203 5.577 2.45e-08 ***
## Weight -0.07129 0.00565 -12.618 < 2e-16 ***
## FAVCyes -0.34719 0.19320 -1.797 0.07232 .
## FCVC -0.26463 0.14564 -1.817 0.06922 .
## CAECFrequently -2.25765 0.39170 -5.764 8.22e-09 ***
## CAECno -2.38237 0.53187 -4.479 7.49e-06 ***
## CAECSometimes -2.54440 0.36271 -7.015 2.30e-12 ***
## SMOKEyes 1.26590 0.45314 2.794 0.00521 **
## CH2O -0.42957 0.13660 -3.145 0.00166 **
## FAF 0.23113 0.08810 2.624 0.00870 **
## TUE -0.26776 0.12568 -2.130 0.03314 *
## CALCFrequently -12.10687 535.41141 -0.023 0.98196
## CALCno -12.92732 535.41133 -0.024 0.98074
## CALCSometimes -12.99873 535.41133 -0.024 0.98063
## MTRANSBike 1.63042 0.93473 1.744 0.08111 .
## MTRANSMotorbike 1.82632 0.72859 2.507 0.01219 *
## MTRANSPublic_Transportation 0.14678 0.20965 0.700 0.48387
## MTRANSWalking 1.97277 0.37828 5.215 1.84e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1678.5 on 2110 degrees of freedom
## Residual deviance: 1132.4 on 2092 degrees of freedom
## AIC: 1170.4
##
## Number of Fisher Scoring iterations: 12
最終模型公式為:
NObeyesdad = \(\beta_0\) + \(\beta_1\)Gender + \(\beta_2\)weight + \(\beta_3\)CAEC + \(\beta_4\)MTRANS