Phân tích phiếu điều tra cán bộ
CB<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/Canbo_Final.csv",header=T)
head(CB)
Xây dựng mô hình đầy đủ các biến
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.3.4 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## -- Conflicts ---------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(relaimpo)
## Warning: package 'relaimpo' was built under R version 3.4.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: boot
## Loading required package: survey
## Warning: package 'survey' was built under R version 3.4.3
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
##
## aml
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Loading required package: mitools
## Warning: package 'mitools' was built under R version 3.4.3
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
# Xay dựng mô hình hồi quy cho tất cả các biến thu thập được
model_CB<-lm(Y_mean~., data=CB)
# Sử dụng kĩ thuật stepwise để lựa chọn biến có ý nghĩa cho mô hình
step(model_CB, direction = "backward") # Biến G loại
## Start: AIC=-140.87
## Y_mean ~ XA + XB + XC + XD + XE + XF + XG + XH
##
## Df Sum of Sq RSS AIC
## - XG 1 0.00334 2.0880 -142.79
## <none> 2.0846 -140.87
## - XE 1 0.09346 2.1781 -140.68
## - XC 1 0.13372 2.2183 -139.76
## - XA 1 0.25429 2.3389 -137.12
## - XD 1 0.30008 2.3847 -136.15
## - XH 1 0.41325 2.4979 -133.83
## - XF 1 0.42198 2.5066 -133.66
## - XB 1 0.44981 2.5344 -133.10
##
## Step: AIC=-142.79
## Y_mean ~ XA + XB + XC + XD + XE + XF + XH
##
## Df Sum of Sq RSS AIC
## <none> 2.0880 -142.79
## - XE 1 0.10762 2.1956 -142.28
## - XC 1 0.13126 2.2192 -141.74
## - XA 1 0.27368 2.3616 -138.63
## - XD 1 0.29694 2.3849 -138.14
## - XF 1 0.42015 2.5081 -135.62
## - XH 1 0.44648 2.5344 -135.10
## - XB 1 0.44682 2.5348 -135.10
##
## Call:
## lm(formula = Y_mean ~ XA + XB + XC + XD + XE + XF + XH, data = CB)
##
## Coefficients:
## (Intercept) XA XB XC XD
## 0.04913 0.22193 0.23669 0.05271 0.12875
## XE XF XH
## 0.07976 0.10924 0.09317
# Phần trăm các biến đóng góp vào mô hình
calc.relimp(model_CB,type="lmg",rela=T)
## Response variable: Y_mean
## Total response variance: 0.2666665
## Analysis based on 50 observations
##
## 8 Regressors:
## XA XB XC XD XE XF XG XH
## Proportion of variance explained by model: 84.05%
## Metrics are normalized to sum to 100% (rela=TRUE).
##
## Relative importance metrics:
##
## lmg
## XA 0.258548893
## XB 0.267715934
## XC 0.053116613
## XD 0.150024773
## XE 0.067020854
## XF 0.106651984
## XG 0.006345585
## XH 0.090575363
##
## Average coefficients for different model sizes:
##
## 1X 2Xs 3Xs 4Xs 5Xs 6Xs
## XA 0.6663780 0.58318404 0.50673002 0.43700389 0.37368890 0.316313228
## XB 0.5557882 0.48520273 0.42528111 0.37441098 0.33131624 0.294800412
## XC 0.1699068 0.12745651 0.09885330 0.08001057 0.06770023 0.060064150
## XD 0.4300523 0.34859571 0.28228427 0.23062173 0.19199087 0.164077695
## XE 0.3096426 0.23235591 0.17706854 0.13811477 0.11131929 0.093587433
## XF 0.2524974 0.20287934 0.16959797 0.14821249 0.13442747 0.124740201
## XG 0.0430660 0.02946205 0.02190958 0.01612736 0.01038145 0.004052432
## XH 0.1925201 0.16029104 0.13764122 0.12195923 0.11102333 0.103569341
## 7Xs 8Xs
## XA 0.264396111 0.21745163
## XB 0.263867495 0.23789168
## XC 0.056002569 0.05476533
## XD 0.144352145 0.13056160
## XE 0.082647947 0.07640724
## XF 0.116865048 0.10952479
## XG -0.002916997 -0.01038689
## XH 0.098854566 0.09634436
Loại bỏ biến nhóm G
# Thành lập một cơ sở dữ liệu mới mà không có biến , và gọi là df2
df2<-CB[,-7]
# Xây dựng lại mô hình
model_CB1<-lm(Y_mean~., data=df2)
library(relaimpo)
calc.relimp(model_CB1,type="lmg",rela=T)
## Response variable: Y_mean
## Total response variance: 0.2666665
## Analysis based on 50 observations
##
## 7 Regressors:
## XA XB XC XD XE XF XH
## Proportion of variance explained by model: 84.02%
## Metrics are normalized to sum to 100% (rela=TRUE).
##
## Relative importance metrics:
##
## lmg
## XA 0.25894834
## XB 0.26946607
## XC 0.05335115
## XD 0.15137597
## XE 0.06652159
## XF 0.10775061
## XH 0.09258627
##
## Average coefficients for different model sizes:
##
## 1X 2Xs 3Xs 4Xs 5Xs 6Xs
## XA 0.6663780 0.5673592 0.47793951 0.39885162 0.33007142 0.27121241
## XB 0.5557882 0.4730522 0.40698450 0.35346240 0.30894337 0.27058604
## XC 0.1699068 0.1205953 0.09080994 0.07264063 0.06159820 0.05539957
## XD 0.4300523 0.3344196 0.26240628 0.20978610 0.17248988 0.14672453
## XE 0.3096426 0.2118139 0.15169906 0.11511078 0.09354485 0.08263154
## XF 0.2524974 0.1946561 0.16056148 0.14084612 0.12833635 0.11849088
## XH 0.1925201 0.1527258 0.12847884 0.11327242 0.10335982 0.09695788
## 7Xs
## XA 0.22192608
## XB 0.23669221
## XC 0.05271341
## XD 0.12874987
## XE 0.07975827
## XF 0.10924128
## XH 0.09316593