This demo is to demonstrate the implementation of variable selection in multiple linear regression model in assessing the satisfaction of land relocation in Thai Nguyen, Vietnam

Hoang Van Thu Project

HVT<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/H_Van_Thu.csv",sep=";")

# look at some first few rows 

head(HVT)
#- Group A
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages -------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.3.4     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## -- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(relaimpo)
## Warning: package 'relaimpo' was built under R version 3.4.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: boot
## Loading required package: survey
## Warning: package 'survey' was built under R version 3.4.3
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
## Loading required package: mitools
## Warning: package 'mitools' was built under R version 3.4.3
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
GroupA<- HVT %>% dplyr::select(A1:A5,Y.TB)

head(GroupA)
# Linear regression model

model_A<-lm(Y.TB~., data=GroupA)

step(model_A,direction = "backward") # Variables should be selected 
## Start:  AIC=-123.79
## Y.TB ~ A1 + A2 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A4    1   0.00052 3.3082 -125.78
## - A1    1   0.05524 3.3629 -124.96
## <none>              3.3077 -123.79
## - A3    1   0.29425 3.6019 -121.53
## - A5    1   0.38111 3.6888 -120.34
## - A2    1   0.52027 3.8280 -118.48
## 
## Step:  AIC=-125.78
## Y.TB ~ A1 + A2 + A3 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A1    1   0.05553 3.3637 -126.95
## <none>              3.3082 -125.78
## - A5    1   0.38079 3.6890 -122.33
## - A2    1   0.52422 3.8324 -120.43
## - A3    1   1.10445 4.4127 -113.38
## 
## Step:  AIC=-126.95
## Y.TB ~ A2 + A3 + A5
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.3637 -126.95
## - A5    1   0.39427 3.7580 -123.41
## - A3    1   1.05389 4.4176 -115.32
## - A2    1   1.22483 4.5886 -113.42
## 
## Call:
## lm(formula = Y.TB ~ A2 + A3 + A5, data = GroupA)
## 
## Coefficients:
## (Intercept)           A2           A3           A5  
##      3.8988       0.4353      -0.5535       0.1639
Group1<- GroupA %>% dplyr::select(A2,A3,A5) # Only variables A2, A3 and A5 should be selected 

Mean_A<-rowMeans(Group1)
#Group B

GroupB<- HVT %>% dplyr::select(B6:B10,Y.TB)

head(GroupB)
## Linear regression model

model_B<-lm(Y.TB~., data=GroupB)

step(model_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-120.26
## Y.TB ~ B6 + B7 + B8 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B8    1   0.08716 3.6370 -121.04
## <none>              3.5499 -120.25
## - B9    1   0.17297 3.7228 -119.88
## - B7    1   0.18977 3.7396 -119.65
## - B6    1   0.29022 3.8401 -118.33
## - B10   1   0.49536 4.0452 -115.72
## 
## Step:  AIC=-121.04
## Y.TB ~ B6 + B7 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.6370 -121.04
## - B9    1   0.15269 3.7897 -120.99
## - B7    1   0.19965 3.8367 -120.37
## - B6    1   0.37227 4.0093 -118.17
## - B10   1   0.60160 4.2386 -115.39
## 
## Call:
## lm(formula = Y.TB ~ B6 + B7 + B9 + B10, data = GroupB)
## 
## Coefficients:
## (Intercept)           B6           B7           B9          B10  
##      2.2980       0.1779       0.1442      -0.1706       0.2134
Group2<- GroupB %>% dplyr::select(B6,B7,B9,B10) # Only these variables should be selected 

Mean_B<-rowMeans(Group2)
# Group C

GroupC<- HVT %>% dplyr::select(C11:C14,Y.TB)

head(GroupC)
## Linear regression model

model_C<-lm(Y.TB~., data=GroupC)

step(model_C,direction = "backward") # Variables should be selected 
## Start:  AIC=-111.51
## Y.TB ~ C11 + C12 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C12   1   0.01806 4.4194 -113.30
## - C14   1   0.11325 4.5146 -112.24
## <none>              4.4013 -111.51
## - C11   1   0.18619 4.5875 -111.43
## - C13   1   0.64651 5.0478 -106.65
## 
## Step:  AIC=-113.3
## Y.TB ~ C11 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C14   1   0.11124 4.5306 -114.06
## - C11   1   0.17037 4.5897 -113.41
## <none>              4.4194 -113.30
## - C13   1   0.66427 5.0836 -108.30
## 
## Step:  AIC=-114.06
## Y.TB ~ C11 + C13
## 
##        Df Sum of Sq    RSS     AIC
## - C11   1   0.12628 4.6569 -114.68
## <none>              4.5306 -114.06
## - C13   1   0.56680 5.0974 -110.16
## 
## Step:  AIC=-114.68
## Y.TB ~ C13
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.6569 -114.68
## - C13   1    1.0481 5.7050 -106.53
## 
## Call:
## lm(formula = Y.TB ~ C13, data = GroupC)
## 
## Coefficients:
## (Intercept)          C13  
##      2.2818       0.3147
Group3<- GroupC %>% dplyr::select(C13)

Mean_C<-rowMeans(Group3)
# Group D

GroupD<- HVT %>% dplyr::select(D15:D16,Y.TB)

head(GroupD)
## Linear regression model

model_D<-lm(Y.TB~., data=GroupD)

step(model_D,direction = "backward") # Variables should be selected 
## Start:  AIC=-112.34
## Y.TB ~ D15 + D16
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.6895 -112.33
## - D16   1   0.24936 4.9388 -111.75
## - D15   1   0.92811 5.6176 -105.31
## 
## Call:
## lm(formula = Y.TB ~ D15 + D16, data = GroupD)
## 
## Coefficients:
## (Intercept)          D15          D16  
##      2.9906       0.3507      -0.1428
Group4<- GroupD %>% dplyr::select(D15:D16)

Mean_D<-rowMeans(Group4)
# Group E

GroupE<- HVT %>% dplyr::select(E17:E20,Y.TB)

head(GroupE)
## Linear regression model

model_E<-lm(Y.TB~., data=GroupE)

step(model_E,direction = "backward") # Variables should be selected 
## Start:  AIC=-117.59
## Y.TB ~ E17 + E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E17   1   0.02011 3.9168 -119.34
## <none>              3.8967 -117.59
## - E20   1   0.21918 4.1159 -116.86
## - E18   1   0.53801 4.4347 -113.13
## - E19   1   0.56462 4.4613 -112.83
## 
## Step:  AIC=-119.34
## Y.TB ~ E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.9168 -119.34
## - E20   1   0.31859 4.2354 -117.43
## - E19   1   0.56160 4.4784 -114.64
## - E18   1   0.64746 4.5643 -113.69
## 
## Call:
## lm(formula = Y.TB ~ E18 + E19 + E20, data = GroupE)
## 
## Coefficients:
## (Intercept)          E18          E19          E20  
##      1.1783       0.2328       0.1779       0.1334
Group5<- GroupE %>% dplyr::select(E18:E20)

Mean_E<-rowMeans(Group5)
# Group F

GroupF<- HVT %>% dplyr::select(F21:F23,Y.TB)

head(GroupF)
## Linear regression model

model_F<-lm(Y.TB~., data=GroupF)

step(model_F,direction = "backward") # Variables should be selected 
## Start:  AIC=-129.83
## Y.TB ~ F21 + F22 + F23
## 
##        Df Sum of Sq    RSS     AIC
## - F22   1   0.00237 3.1778 -131.79
## - F21   1   0.03731 3.2128 -131.24
## <none>              3.1755 -129.83
## - F23   1   1.30594 4.4814 -114.60
## 
## Step:  AIC=-131.79
## Y.TB ~ F21 + F23
## 
##        Df Sum of Sq    RSS     AIC
## - F21   1   0.03577 3.2136 -133.23
## <none>              3.1778 -131.79
## - F23   1   1.33063 4.5085 -116.30
## 
## Step:  AIC=-133.23
## Y.TB ~ F23
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.2136 -133.23
## - F23   1    2.4914 5.7050 -106.53
## 
## Call:
## lm(formula = Y.TB ~ F23, data = GroupF)
## 
## Coefficients:
## (Intercept)          F23  
##      2.8931       0.2561
Group6<- GroupF %>% dplyr::select(F23)

Mean_F<-rowMeans(Group6)
# Group G

GroupG<- HVT %>% dplyr::select(G24:G25,Y.TB)

head(GroupG)
## Linear regression model

model_G<-lm(Y.TB~., data=GroupG)

step(model_G,direction = "backward") # Variables should be selected 
## Start:  AIC=-154.08
## Y.TB ~ G24 + G25
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.0348 -154.08
## - G25   1   0.43711 2.4719 -146.35
## - G24   1   1.01596 3.0508 -135.83
## 
## Call:
## lm(formula = Y.TB ~ G24 + G25, data = GroupG)
## 
## Coefficients:
## (Intercept)          G24          G25  
##      2.4998       0.2155       0.1390
Group7<- GroupG %>% dplyr::select(G24:G25)

Mean_G<-rowMeans(Group7)
# Group H
GroupH<- HVT %>% dplyr::select(H26:H27,Y.TB)

head(GroupH)
## Linear regression model

model_H<-lm(Y.TB~., data=GroupH)

step(model_H,direction = "backward") # Variables should be selected 
## Start:  AIC=-150.96
## Y.TB ~ H26 + H27
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.1657 -150.96
## - H26   1   0.15469 2.3204 -149.51
## - H27   1   0.15469 2.3204 -149.51
## 
## Call:
## lm(formula = Y.TB ~ H26 + H27, data = GroupH)
## 
## Coefficients:
## (Intercept)          H26          H27  
##      2.6769       0.2011       0.2011
Group8<- GroupH %>% dplyr::select(H26:H27)

Mean_H<-rowMeans(Group8)
# Dataset

HVT_Dataset<-data.frame(Mean_A,Mean_B,Mean_C,Mean_D,Mean_E,Mean_F,Mean_G,Mean_H, Y_mean=HVT$Y.TB)

head(HVT_Dataset)
# Final model

HVT_Data<-na.omit(HVT_Dataset) # Omit NA data points

model_final<-lm(Y_mean~., data=HVT_Data)

step(model_final,direction = "backward") # Unfortunately the Variable B should be removed as it is unsatisfied the condition of stepwise selection
## Start:  AIC=-211.15
## Y_mean ~ Mean_A + Mean_B + Mean_C + Mean_D + Mean_E + Mean_F + 
##     Mean_G + Mean_H
## 
##          Df Sum of Sq     RSS     AIC
## - Mean_B  1  0.001043 0.51226 -213.05
## <none>                0.51122 -211.15
## - Mean_C  1  0.030175 0.54139 -210.28
## - Mean_D  1  0.031316 0.54254 -210.18
## - Mean_H  1  0.113259 0.62448 -203.14
## - Mean_F  1  0.125825 0.63704 -202.15
## - Mean_E  1  0.158358 0.66958 -199.66
## - Mean_A  1  0.246919 0.75814 -193.45
## - Mean_G  1  0.273650 0.78487 -191.71
## 
## Step:  AIC=-213.05
## Y_mean ~ Mean_A + Mean_C + Mean_D + Mean_E + Mean_F + Mean_G + 
##     Mean_H
## 
##          Df Sum of Sq     RSS     AIC
## <none>                0.51226 -213.05
## - Mean_C  1  0.031745 0.54401 -212.04
## - Mean_D  1  0.036664 0.54893 -211.59
## - Mean_H  1  0.113619 0.62588 -205.03
## - Mean_F  1  0.136812 0.64907 -203.21
## - Mean_E  1  0.192161 0.70442 -199.12
## - Mean_G  1  0.272937 0.78520 -193.69
## - Mean_A  1  0.307920 0.82018 -191.51
## 
## Call:
## lm(formula = Y_mean ~ Mean_A + Mean_C + Mean_D + Mean_E + Mean_F + 
##     Mean_G + Mean_H, data = HVT_Data)
## 
## Coefficients:
## (Intercept)       Mean_A       Mean_C       Mean_D       Mean_E  
##     0.44652      0.18170      0.06527      0.05127      0.20593  
##      Mean_F       Mean_G       Mean_H  
##     0.09547      0.15212      0.12180
# Remove Mean_B
df<-HVT_Data[,-2]

# Fitting the model 
model_HVT1<-lm(Y_mean~., data=df)

impo_HVT1<-calc.relimp(model_HVT1,type=c("lmg"),rela=T)

impo_HVT1
## Response variable: Y_mean 
## Total response variance: 0.1164286 
## Analysis based on 50 observations 
## 
## 7 Regressors: 
## Mean_A Mean_C Mean_D Mean_E Mean_F Mean_G Mean_H 
## Proportion of variance explained by model: 91.02%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##               lmg
## Mean_A 0.10304471
## Mean_C 0.06330955
## Mean_D 0.02316092
## Mean_E 0.12425101
## Mean_F 0.17407391
## Mean_G 0.26549252
## Mean_H 0.24666737
## 
## Average coefficients for different model sizes: 
## 
##               1X        2Xs       3Xs        4Xs       5Xs        6Xs
## Mean_A 0.2580618 0.22533976 0.2079465 0.19754626 0.1902697 0.18476229
## Mean_C 0.3147448 0.22766191 0.1716722 0.13564648 0.1099771 0.08811272
## Mean_D 0.1394444 0.09293346 0.0684665 0.05626892 0.0507789 0.04928148
## Mean_E 0.5349572 0.42245420 0.3442569 0.29003945 0.2519314 0.22533542
## Mean_F 0.2560526 0.19551317 0.1555685 0.12877098 0.1109824 0.10013574
## Mean_G 0.3537931 0.30364147 0.2596262 0.22188373 0.1907776 0.16717156
## Mean_H 0.4021938 0.34156383 0.2885113 0.24192549 0.1998767 0.16043779
##               7Xs
## Mean_A 0.18170435
## Mean_C 0.06527345
## Mean_D 0.05126938
## Mean_E 0.20592570
## Mean_F 0.09547314
## Mean_G 0.15212149
## Mean_H 0.12179713
# Manipulating the dataset

df_HVT<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/HVT_Final.csv",header=T)

model_HVT2<-lm(Y_mean~., data=df_HVT)

impo_HVT2<-calc.relimp(model_HVT2,type=c("lmg"),rela=T)

impo_HVT2
## Response variable: Y_mean 
## Total response variance: 0.1164286 
## Analysis based on 50 observations 
## 
## 7 Regressors: 
## Mean_A Mean_C Mean_D Mean_E Mean_F Mean_G Mean_H 
## Proportion of variance explained by model: 48.27%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##               lmg
## Mean_A 0.21301087
## Mean_C 0.02077414
## Mean_D 0.06668971
## Mean_E 0.35790977
## Mean_F 0.08091115
## Mean_G 0.23766256
## Mean_H 0.02304179
## 
## Average coefficients for different model sizes: 
## 
##                1X        2Xs        3Xs        4Xs          5Xs
## Mean_A 0.25806184 0.23704844 0.22108352 0.20906614  0.200099006
## Mean_C 0.07061959 0.04658246 0.02685615 0.01064096 -0.002888626
## Mean_D 0.13944444 0.12044406 0.10442965 0.09088227  0.079393838
## Mean_E 0.53495723 0.50146959 0.46907527 0.43894776  0.411960711
## Mean_F 0.09490334 0.08341120 0.07397738 0.06615532  0.059675545
## Mean_G 0.17658423 0.15898706 0.14329206 0.12933533  0.116822992
## Mean_H 0.06564568 0.04550025 0.02833064 0.01373067  0.001474334
##                6Xs         7Xs
## Mean_A  0.19339378  0.18808807
## Mean_C -0.01437788 -0.02409297
## Mean_D  0.06960882  0.06112419
## Mean_E  0.38854344  0.36862223
## Mean_F  0.05435841  0.04994838
## Mean_G  0.10545854  0.09509132
## Mean_H -0.00861258 -0.01681929

Duong Bac Son Keo Dai

# Bac Son Keo Dai Road project
BS_KD<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/S%E1%BB%91%20li%E1%BB%87u%20T.S%C6%A1n.csv",sep=";")

# look at some first few rows 

head(BS_KD)
#- Group A
library(tidyverse)

GroupA_BS_KD<- BS_KD %>% dplyr::select(A1:A5,Y.TB)

head(GroupA_BS_KD)
# Linear regression model

model_BSKD_A<-lm(Y.TB~., data=GroupA_BS_KD)

step(model_BSKD_A,direction = "backward") # Variables should be selected 
## Start:  AIC=-136.89
## Y.TB ~ A1 + A2 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A4    1  0.001067 2.5465 -138.87
## - A1    1  0.009538 2.5550 -138.70
## - A3    1  0.101962 2.6474 -136.92
## <none>              2.5455 -136.89
## - A5    1  0.107990 2.6534 -136.81
## - A2    1  0.304097 2.8495 -133.24
## 
## Step:  AIC=-138.86
## Y.TB ~ A1 + A2 + A3 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A1    1   0.00925 2.5558 -140.68
## <none>              2.5465 -138.87
## - A5    1   0.11908 2.6656 -138.58
## - A3    1   0.14688 2.6934 -138.06
## - A2    1   0.38541 2.9319 -133.82
## 
## Step:  AIC=-140.68
## Y.TB ~ A2 + A3 + A5
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.5558 -140.68
## - A5    1   0.11450 2.6703 -140.49
## - A3    1   0.27527 2.8310 -137.57
## - A2    1   0.52904 3.0848 -133.28
## 
## Call:
## lm(formula = Y.TB ~ A2 + A3 + A5, data = GroupA_BS_KD)
## 
## Coefficients:
## (Intercept)           A2           A3           A5  
##     2.15439      0.30987      0.11197     -0.07981
Group_BSKD_A<- GroupA_BS_KD %>% dplyr::select(A2,A3,A5) # Only variables A2, A3 and A5 should be selected 

Mean_A_BSKD<-rowMeans(Group_BSKD_A)
#Group B

GroupB_BSKD<- BS_KD %>% dplyr::select(B6:B10,Y.TB)

head(GroupB_BSKD)
## Linear regression model

model_BSKD_B<-lm(Y.TB~., data=GroupB_BSKD)

step(model_BSKD_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-142.78
## Y.TB ~ B6 + B7 + B8 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B8    1   0.01502 2.2775 -144.45
## - B9    1   0.03168 2.2941 -144.08
## <none>              2.2624 -142.78
## - B6    1   0.10991 2.3724 -142.41
## - B10   1   0.35633 2.6188 -137.47
## - B7    1   0.45340 2.7159 -135.65
## 
## Step:  AIC=-144.45
## Y.TB ~ B6 + B7 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B9    1   0.01945 2.2969 -146.02
## <none>              2.2775 -144.45
## - B6    1   0.15096 2.4284 -143.24
## - B10   1   0.36011 2.6376 -139.11
## - B7    1   0.45124 2.7287 -137.41
## 
## Step:  AIC=-146.02
## Y.TB ~ B6 + B7 + B10
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.2969 -146.02
## - B6    1   0.13789 2.4348 -145.11
## - B10   1   0.39151 2.6884 -140.15
## - B7    1   0.46017 2.7571 -138.89
## 
## Call:
## lm(formula = Y.TB ~ B6 + B7 + B10, data = GroupB_BSKD)
## 
## Coefficients:
## (Intercept)           B6           B7          B10  
##      1.9206       0.1149       0.1692       0.1340
Group_BSKD_B<- GroupB_BSKD %>% dplyr::select(B6,B7,B10) # Only these variables should be selected 

Mean_BSKD_B<-rowMeans(Group_BSKD_B)
# Group C

GroupC_BSKD<- BS_KD %>% dplyr::select(C11:C14,Y.TB)

head(GroupC_BSKD)
## Linear regression model

model_BSKD_B<-lm(Y.TB~., data=GroupC_BSKD)

step(model_BSKD_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-154
## Y.TB ~ C11 + C12 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C14   1   0.01849 1.8997 -155.51
## - C11   1   0.02207 1.9033 -155.42
## <none>              1.8812 -154.00
## - C12   1   0.14422 2.0255 -152.31
## - C13   1   0.58901 2.4703 -142.38
## 
## Step:  AIC=-155.52
## Y.TB ~ C11 + C12 + C13
## 
##        Df Sum of Sq    RSS     AIC
## - C11   1   0.01477 1.9145 -157.13
## <none>              1.8997 -155.51
## - C12   1   0.20333 2.1031 -152.43
## - C13   1   0.77105 2.6708 -140.48
## 
## Step:  AIC=-157.13
## Y.TB ~ C12 + C13
## 
##        Df Sum of Sq    RSS     AIC
## <none>              1.9145 -157.13
## - C12   1    0.2903 2.2048 -152.07
## - C13   1    1.1457 3.0602 -135.68
## 
## Call:
## lm(formula = Y.TB ~ C12 + C13, data = GroupC_BSKD)
## 
## Coefficients:
## (Intercept)          C12          C13  
##      1.9257       0.1000       0.3252
Group_BSKD_C<- GroupC_BSKD %>% dplyr::select(C12:C13)

Mean_BSKD_C<-rowMeans(Group_BSKD_C)
# Group D

GroupD_BSKD<- BS_KD %>% dplyr::select(D15:D16,Y.TB)

head(GroupD_BSKD)
## Linear regression model

model_BSKD_D<-lm(Y.TB~., data=GroupD_BSKD)

step(model_BSKD_D,direction = "backward") # Variables should be selected 
## Start:  AIC=-135.92
## Y.TB ~ D15 + D16
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.9261 -135.92
## - D15   1   0.19493 3.1210 -134.69
## - D16   1   0.72299 3.6491 -126.88
## 
## Call:
## lm(formula = Y.TB ~ D15 + D16, data = GroupD_BSKD)
## 
## Coefficients:
## (Intercept)          D15          D16  
##      2.4406       0.1424       0.1644
Group_BSKD_D<- GroupD_BSKD %>% dplyr::select(D15:D16)

Mean_BSKD_D<-rowMeans(Group_BSKD_D)
# Group E

GroupE_BSKD<- BS_KD %>% dplyr::select(E17:E20,Y.TB)

head(GroupE_BSKD)
## Linear regression model

model_BSKD_E<-lm(Y.TB~., data=GroupE_BSKD)

step(model_BSKD_E,direction = "backward") # Variables should be selected 
## Start:  AIC=-146.3
## Y.TB ~ E17 + E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E18   1   0.00028 2.1950 -148.29
## - E20   1   0.02264 2.2174 -147.78
## <none>              2.1948 -146.30
## - E17   1   0.28857 2.4833 -142.12
## - E19   1   0.40864 2.6034 -139.76
## 
## Step:  AIC=-148.29
## Y.TB ~ E17 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E20   1   0.02401 2.2191 -149.75
## <none>              2.1950 -148.29
## - E17   1   0.31612 2.5112 -143.56
## - E19   1   0.42802 2.6231 -141.38
## 
## Step:  AIC=-149.75
## Y.TB ~ E17 + E19
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.2191 -149.75
## - E17   1   0.30675 2.5258 -145.27
## - E19   1   0.70685 2.9259 -137.92
## 
## Call:
## lm(formula = Y.TB ~ E17 + E19, data = GroupE_BSKD)
## 
## Coefficients:
## (Intercept)          E17          E19  
##      2.1383       0.1424       0.2135
Group_BSKD_E<- GroupE_BSKD %>% dplyr::select(E17,E19)

Mean_BSKD_E<-rowMeans(Group_BSKD_E)
# Group F

GroupF_BSKD<- BS_KD %>% dplyr::select(F21:F23,Y.TB)

head(GroupF_BSKD)
## Linear regression model

model_BSKD_F<-lm(Y.TB~., data=GroupF_BSKD)

step(model_BSKD_F,direction = "backward") # Variables should be selected 
## Start:  AIC=-134.18
## Y.TB ~ F21 + F22 + F23
## 
##        Df Sum of Sq    RSS     AIC
## - F22   1   0.07997 2.9908 -134.82
## - F23   1   0.08521 2.9960 -134.74
## <none>              2.9108 -134.18
## - F21   1   0.62489 3.5357 -126.46
## 
## Step:  AIC=-134.82
## Y.TB ~ F21 + F23
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.9908 -134.82
## - F23   1   0.12327 3.1140 -134.81
## - F21   1   0.93568 3.9265 -123.21
## 
## Call:
## lm(formula = Y.TB ~ F21 + F23, data = GroupF_BSKD)
## 
## Coefficients:
## (Intercept)          F21          F23  
##     2.35649      0.24183      0.09643
Group_BSKD_F<- GroupF_BSKD %>% dplyr::select(F21,F23)

Mean_BSKD_F<-rowMeans(Group_BSKD_F)
# Group G

GroupG_BSKD<- BS_KD %>% dplyr::select(G24:G25,Y.TB)

head(GroupG_BSKD)
## Linear regression model

model_BSKD_G<-lm(Y.TB~., data=GroupG_BSKD)

step(model_BSKD_G,direction = "backward") # Variables should be selected 
## Start:  AIC=-138.15
## Y.TB ~ G24 + G25
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.7981 -138.15
## - G25   1   0.67134 3.4694 -129.40
## - G24   1   0.69847 3.4966 -129.01
## 
## Call:
## lm(formula = Y.TB ~ G24 + G25, data = GroupG_BSKD)
## 
## Coefficients:
## (Intercept)          G24          G25  
##      2.0438       0.2293       0.2152
Group_BSKD_G<- GroupG_BSKD %>% dplyr::select(G24:G25)

Mean_BSKD_G<-rowMeans(Group_BSKD_G)
# Group H
GroupH_BSKD<- HVT %>% dplyr::select(H26:H27,Y.TB)

head(GroupH_BSKD)
## Linear regression model

model_BSKD_H<-lm(Y.TB~., data=GroupH_BSKD)

step(model_BSKD_H,direction = "backward") # Variables should be selected 
## Start:  AIC=-150.96
## Y.TB ~ H26 + H27
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.1657 -150.96
## - H26   1   0.15469 2.3204 -149.51
## - H27   1   0.15469 2.3204 -149.51
## 
## Call:
## lm(formula = Y.TB ~ H26 + H27, data = GroupH_BSKD)
## 
## Coefficients:
## (Intercept)          H26          H27  
##      2.6769       0.2011       0.2011
Group_BSKD_H<- GroupH_BSKD %>% dplyr::select(H26:H27)

Mean_BSKD_H<-rowMeans(Group_BSKD_H)
# Dataset

BSKD_Dataset<-data.frame(Mean_A_BSKD,Mean_BSKD_B,Mean_BSKD_C,Mean_BSKD_D,Mean_BSKD_E,Mean_BSKD_F,Mean_BSKD_G,Mean_BSKD_H, Y_mean=BS_KD$Y.TB)

head(BSKD_Dataset)
# Final model

BSKD_Data<-na.omit(BSKD_Dataset) # Omit NA data points

model_BSKD<-lm(Y_mean~., data=BSKD_Data)

step(model_BSKD,direction = "backward") # Unfortunately the Variable B should be removed as it is unsatisfied the condition of stepwise selection
## Start:  AIC=-184.17
## Y_mean ~ Mean_A_BSKD + Mean_BSKD_B + Mean_BSKD_C + Mean_BSKD_D + 
##     Mean_BSKD_E + Mean_BSKD_F + Mean_BSKD_G + Mean_BSKD_H
## 
##               Df Sum of Sq     RSS     AIC
## - Mean_BSKD_D  1   0.00007 0.87692 -186.17
## - Mean_BSKD_H  1   0.00653 0.88337 -185.80
## - Mean_BSKD_F  1   0.01361 0.89045 -185.40
## <none>                     0.87685 -184.17
## - Mean_BSKD_E  1   0.06213 0.93897 -182.75
## - Mean_BSKD_B  1   0.08736 0.96421 -181.42
## - Mean_BSKD_C  1   0.09195 0.96880 -181.19
## - Mean_A_BSKD  1   0.10045 0.97730 -180.75
## - Mean_BSKD_G  1   0.46827 1.34512 -164.78
## 
## Step:  AIC=-186.17
## Y_mean ~ Mean_A_BSKD + Mean_BSKD_B + Mean_BSKD_C + Mean_BSKD_E + 
##     Mean_BSKD_F + Mean_BSKD_G + Mean_BSKD_H
## 
##               Df Sum of Sq     RSS     AIC
## - Mean_BSKD_H  1   0.00646 0.88338 -187.80
## - Mean_BSKD_F  1   0.01354 0.89045 -187.40
## <none>                     0.87692 -186.17
## - Mean_BSKD_E  1   0.07222 0.94914 -184.21
## - Mean_BSKD_B  1   0.09048 0.96739 -183.26
## - Mean_A_BSKD  1   0.10187 0.97879 -182.67
## - Mean_BSKD_C  1   0.11131 0.98822 -182.19
## - Mean_BSKD_G  1   0.47994 1.35685 -166.34
## 
## Step:  AIC=-187.8
## Y_mean ~ Mean_A_BSKD + Mean_BSKD_B + Mean_BSKD_C + Mean_BSKD_E + 
##     Mean_BSKD_F + Mean_BSKD_G
## 
##               Df Sum of Sq     RSS     AIC
## - Mean_BSKD_F  1   0.01429 0.89767 -189.00
## <none>                     0.88338 -187.80
## - Mean_BSKD_E  1   0.07245 0.95582 -185.86
## - Mean_BSKD_B  1   0.08672 0.97010 -185.12
## - Mean_A_BSKD  1   0.10681 0.99019 -184.09
## - Mean_BSKD_C  1   0.10821 0.99159 -184.02
## - Mean_BSKD_G  1   0.47670 1.36008 -168.22
## 
## Step:  AIC=-189
## Y_mean ~ Mean_A_BSKD + Mean_BSKD_B + Mean_BSKD_C + Mean_BSKD_E + 
##     Mean_BSKD_G
## 
##               Df Sum of Sq     RSS     AIC
## <none>                     0.89767 -189.00
## - Mean_BSKD_B  1   0.07805 0.97572 -186.83
## - Mean_BSKD_E  1   0.10149 0.99916 -185.64
## - Mean_A_BSKD  1   0.11037 1.00804 -185.20
## - Mean_BSKD_C  1   0.13381 1.03148 -184.05
## - Mean_BSKD_G  1   0.57527 1.47294 -166.24
## 
## Call:
## lm(formula = Y_mean ~ Mean_A_BSKD + Mean_BSKD_B + Mean_BSKD_C + 
##     Mean_BSKD_E + Mean_BSKD_G, data = BSKD_Data)
## 
## Coefficients:
## (Intercept)  Mean_A_BSKD  Mean_BSKD_B  Mean_BSKD_C  Mean_BSKD_E  
##      0.5866       0.1162       0.1363       0.1288       0.1093  
## Mean_BSKD_G  
##      0.3148
# The importance of each variable 
impo_BS_KD <- calc.relimp(model_BSKD, type = c("lmg"),rela=T)

impo_BS_KD
## Response variable: Y_mean 
## Total response variance: 0.08505943 
## Analysis based on 50 observations 
## 
## 8 Regressors: 
## Mean_A_BSKD Mean_BSKD_B Mean_BSKD_C Mean_BSKD_D Mean_BSKD_E Mean_BSKD_F Mean_BSKD_G Mean_BSKD_H 
## Proportion of variance explained by model: 78.96%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                     lmg
## Mean_A_BSKD 0.102875571
## Mean_BSKD_B 0.180335396
## Mean_BSKD_C 0.174421738
## Mean_BSKD_D 0.084378741
## Mean_BSKD_E 0.162450067
## Mean_BSKD_F 0.079435694
## Mean_BSKD_G 0.207554619
## Mean_BSKD_H 0.008548173
## 
## Average coefficients for different model sizes: 
## 
##                      1X         2Xs         3Xs         4Xs          5Xs
## Mean_A_BSKD  0.27980252  0.20741121  0.17107987  0.14997908  0.135749348
## Mean_BSKD_B  0.42670734  0.34361192  0.28921459  0.25128623  0.221889812
## Mean_BSKD_C  0.34934247  0.28129410  0.22885124  0.18985262  0.161752875
## Mean_BSKD_D  0.31509615  0.21169929  0.14197288  0.09380995  0.059551758
## Mean_BSKD_E  0.35656198  0.28505147  0.22835144  0.18437481  0.151031689
## Mean_BSKD_F  0.35023529  0.24200195  0.17663364  0.13523209  0.106693912
## Mean_BSKD_G  0.44388769  0.36519224  0.33064813  0.31621752  0.310245317
## Mean_BSKD_H -0.07625229 -0.04085679 -0.02235368 -0.01044984 -0.001484833
##                     6Xs        7Xs          8Xs
## Mean_A_BSKD 0.125246804 0.11731010  0.111823050
## Mean_BSKD_B 0.196480781 0.17257925  0.148524021
## Mean_BSKD_C 0.142221777 0.12948053  0.122425280
## Mean_BSKD_D 0.034108022 0.01393686 -0.003534527
## Mean_BSKD_E 0.126455237 0.10911357  0.097856556
## Mean_BSKD_F 0.084891653 0.06653346  0.049625306
## Mean_BSKD_G 0.307787397 0.30693966  0.307223574
## Mean_BSKD_H 0.005912985 0.01235859  0.018216643
# Remove Mean_B
df_BSKD<-BSKD_Data[,-c(4,6,8)]

# Fitting the model 
model_BS1<-lm(Y_mean~., data=df_BSKD)

impo_BSKD1<-calc.relimp(model_BS1,type=c("lmg"),rela=T)

impo_BSKD1
## Response variable: Y_mean 
## Total response variance: 0.08505943 
## Analysis based on 50 observations 
## 
## 5 Regressors: 
## Mean_A_BSKD Mean_BSKD_B Mean_BSKD_C Mean_BSKD_E Mean_BSKD_G 
## Proportion of variance explained by model: 78.46%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                   lmg
## Mean_A_BSKD 0.1096482
## Mean_BSKD_B 0.1999413
## Mean_BSKD_C 0.2203991
## Mean_BSKD_E 0.2133738
## Mean_BSKD_G 0.2566376
## 
## Average coefficients for different model sizes: 
## 
##                    1X       2Xs       3Xs       4Xs       5Xs
## Mean_A_BSKD 0.2798025 0.1724267 0.1341424 0.1208441 0.1161921
## Mean_BSKD_B 0.4267073 0.3163279 0.2358173 0.1786830 0.1362954
## Mean_BSKD_C 0.3493425 0.2611068 0.1974159 0.1552470 0.1288155
## Mean_BSKD_E 0.3565620 0.2632846 0.1934919 0.1450570 0.1093374
## Mean_BSKD_G 0.4438877 0.3622056 0.3354663 0.3234411 0.3147626

Duong Viet Bac - Quang Trung and Dong Quang

# Bac Son Keo Dai Road project
QT_DQ<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/D_Viet%20Bac.csv",header = T,sep=";")

# look at some first few rows 

head(QT_DQ)
#- Group A
library(tidyverse)

GroupA_QT_DQ<- QT_DQ %>% dplyr::select(A1:A5,Y.TB)

head(GroupA_QT_DQ)
# Linear regression model

model_QTDQ_A<-lm(Y.TB~., data=GroupA_QT_DQ)

step(model_QTDQ_A,direction = "backward") # Variables should be selected 
## Start:  AIC=-117.84
## Y.TB ~ A1 + A2 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A1    1   0.06911 3.7948 -118.92
## - A2    1   0.07806 3.8038 -118.80
## - A5    1   0.10708 3.8328 -118.42
## <none>              3.7257 -117.84
## - A3    1   0.38321 4.1089 -114.94
## - A4    1   0.54316 4.2689 -113.03
## 
## Step:  AIC=-118.92
## Y.TB ~ A2 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A5    1   0.10644 3.9012 -119.54
## <none>              3.7948 -118.92
## - A3    1   0.40203 4.1968 -115.89
## - A4    1   0.52245 4.3173 -114.47
## - A2    1   1.12566 4.9205 -107.93
## 
## Step:  AIC=-119.54
## Y.TB ~ A2 + A3 + A4
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.9012 -119.54
## - A3    1   0.29799 4.1992 -117.86
## - A4    1   0.42744 4.3287 -116.34
## - A2    1   1.73574 5.6370 -103.13
## 
## Call:
## lm(formula = Y.TB ~ A2 + A3 + A4, data = GroupA_QT_DQ)
## 
## Coefficients:
## (Intercept)           A2           A3           A4  
##      2.4745       0.4156       0.2618      -0.3900
Group_QTDQ_A<- GroupA_QT_DQ %>% dplyr::select(A2,A3,A4) # Only variables A2, A3 and A5 should be selected 

Mean_A_QTDQ<-rowMeans(Group_QTDQ_A)
#Group B

GroupB_QTDQ<- QT_DQ %>% dplyr::select(B6:B10,Y.TB)

head(GroupB_QTDQ)
## Linear regression model

model_QTDQ_B<-lm(Y.TB~., data=GroupB_QTDQ)

step(model_QTDQ_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-122.47
## Y.TB ~ B6 + B7 + B8 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B8    1   0.00018 3.3963 -124.47
## - B6    1   0.03477 3.4309 -123.96
## - B7    1   0.03886 3.4350 -123.90
## <none>              3.3962 -122.47
## - B9    1   0.22927 3.6254 -121.20
## - B10   1   0.58367 3.9798 -116.54
## 
## Step:  AIC=-124.47
## Y.TB ~ B6 + B7 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B6    1   0.03713 3.4335 -125.92
## - B7    1   0.03888 3.4352 -125.90
## <none>              3.3963 -124.47
## - B9    1   0.23547 3.6318 -123.11
## - B10   1   0.58799 3.9843 -118.48
## 
## Step:  AIC=-125.92
## Y.TB ~ B7 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B7    1   0.09850 3.5320 -126.51
## <none>              3.4335 -125.92
## - B9    1   0.20073 3.6342 -125.08
## - B10   1   0.83302 4.2665 -117.06
## 
## Step:  AIC=-126.51
## Y.TB ~ B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.5320 -126.51
## - B9    1   0.67302 4.2050 -119.79
## - B10   1   1.03844 4.5704 -115.62
## 
## Call:
## lm(formula = Y.TB ~ B9 + B10, data = GroupB_QTDQ)
## 
## Coefficients:
## (Intercept)           B9          B10  
##      1.7104       0.2021       0.2494
Group_QTDQ_B<- GroupB_QTDQ %>% dplyr::select(B9,B10) # Only these variables should be selected 

Mean_QTDQ_B<-rowMeans(Group_QTDQ_B)
# Group C

GroupC_QTDQ<- QT_DQ %>% dplyr::select(C11:C14,Y.TB)

head(GroupC_QTDQ)
## Linear regression model

model_QTDQ_B<-lm(Y.TB~., data=GroupC_QTDQ)

step(model_QTDQ_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-122.13
## Y.TB ~ C11 + C12 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C12   1   0.04449 3.6033 -123.51
## - C11   1   0.05290 3.6117 -123.39
## - C14   1   0.11560 3.6744 -122.53
## <none>              3.5588 -122.13
## - C13   1   0.31632 3.8751 -119.87
## 
## Step:  AIC=-123.51
## Y.TB ~ C11 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C11   1   0.12526 3.7285 -123.80
## - C14   1   0.12947 3.7328 -123.74
## <none>              3.6033 -123.51
## - C13   1   0.38545 3.9887 -120.43
## 
## Step:  AIC=-123.8
## Y.TB ~ C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.7285 -123.80
## - C14   1   0.19553 3.9241 -123.25
## - C13   1   1.30114 5.0297 -110.83
## 
## Call:
## lm(formula = Y.TB ~ C13 + C14, data = GroupC_QTDQ)
## 
## Coefficients:
## (Intercept)          C13          C14  
##      2.1804       0.2486       0.1117
Group_QTDQ_C<- GroupC_QTDQ %>% dplyr::select(C13,C14)

Mean_QTDQ_C<-rowMeans(Group_QTDQ_C)
# Group D

GroupD_QTDQ<- QT_DQ %>% dplyr::select(D15:D16,Y.TB)

head(GroupD_QTDQ)
## Linear regression model

model_QTDQ_D<-lm(Y.TB~., data=GroupD_QTDQ)

step(model_QTDQ_D,direction = "backward") # Variables should be selected 
## Start:  AIC=-120.35
## Y.TB ~ D15 + D16
## 
##        Df Sum of Sq    RSS     AIC
## - D16   1   0.05808 4.0529 -121.63
## <none>              3.9949 -120.35
## - D15   1   0.77166 4.7665 -113.52
## 
## Step:  AIC=-121.63
## Y.TB ~ D15
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.0529 -121.63
## - D15   1    2.1871 6.2400 -102.05
## 
## Call:
## lm(formula = Y.TB ~ D15, data = GroupD_QTDQ)
## 
## Coefficients:
## (Intercept)          D15  
##      2.1244       0.4255
Group_QTDQ_D<- GroupD_QTDQ %>% dplyr::select(D15)

Mean_QTDQ_D<-rowMeans(Group_QTDQ_D)
# Group E

GroupE_QTDQ<- QT_DQ %>% dplyr::select(E17:E20,Y.TB)

head(GroupE_QTDQ)
## Linear regression model

model_QTDQ_E<-lm(Y.TB~., data=GroupE_QTDQ)

step(model_QTDQ_E,direction = "backward") # Variables should be selected 
## Start:  AIC=-108.09
## Y.TB ~ E17 + E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E18   1   0.01333 4.7259 -109.95
## - E17   1   0.04116 4.7537 -109.66
## <none>              4.7126 -108.09
## - E19   1   0.21163 4.9242 -107.89
## - E20   1   0.47560 5.1882 -105.28
## 
## Step:  AIC=-109.95
## Y.TB ~ E17 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E17   1   0.05826 4.7842 -111.34
## <none>              4.7259 -109.95
## - E19   1   0.19845 4.9244 -109.89
## - E20   1   0.47274 5.1987 -107.18
## 
## Step:  AIC=-111.34
## Y.TB ~ E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E19   1   0.16202 4.9462 -111.67
## <none>              4.7842 -111.34
## - E20   1   0.41746 5.2016 -109.15
## 
## Step:  AIC=-111.67
## Y.TB ~ E20
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.9462 -111.67
## - E20   1    1.2938 6.2400 -102.05
## 
## Call:
## lm(formula = Y.TB ~ E20, data = GroupE_QTDQ)
## 
## Coefficients:
## (Intercept)          E20  
##      2.5217       0.2483
Group_QTDQ_E<- GroupE_QTDQ %>% dplyr::select(E20)

Mean_QTDQ_E<-rowMeans(Group_QTDQ_E)
# Group F

GroupF_QTDQ<- QT_DQ %>% dplyr::select(F21:F23,Y.TB)

head(GroupF_QTDQ)
## Linear regression model

model_QTDQ_F<-lm(Y.TB~., data=GroupF_QTDQ)

step(model_QTDQ_F,direction = "backward") # Variables should be selected 
## Start:  AIC=-127.3
## Y.TB ~ F21 + F22 + F23
## 
##        Df Sum of Sq    RSS     AIC
## - F21   1   0.11323 3.4533 -127.64
## <none>              3.3401 -127.30
## - F23   1   0.30638 3.6464 -124.91
## - F22   1   0.61774 3.9578 -120.82
## 
## Step:  AIC=-127.63
## Y.TB ~ F22 + F23
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.4533 -127.64
## - F23   1   0.70809 4.1614 -120.31
## - F22   1   1.64652 5.0998 -110.14
## 
## Call:
## lm(formula = Y.TB ~ F22 + F23, data = GroupF_QTDQ)
## 
## Coefficients:
## (Intercept)          F22          F23  
##      1.9577       0.2638       0.1972
Group_QTDQ_F<- GroupF_QTDQ %>% dplyr::select(F22,F23)

Mean_QTDQ_F<-rowMeans(Group_QTDQ_F)
# Group G

GroupG_QTDQ<- QT_DQ %>% dplyr::select(G24:G25,Y.TB)

head(GroupG_QTDQ)
## Linear regression model

model_QTDQ_G<-lm(Y.TB~., data=GroupG_QTDQ)

step(model_QTDQ_G,direction = "backward") # Variables should be selected 
## Start:  AIC=-153.6
## Y.TB ~ G24 + G25
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.0546 -153.60
## - G24   1   0.65858 2.7131 -141.70
## - G25   1   0.99609 3.0507 -135.83
## 
## Call:
## lm(formula = Y.TB ~ G24 + G25, data = GroupG_QTDQ)
## 
## Coefficients:
## (Intercept)          G24          G25  
##      2.1169       0.2017       0.2413
Group_QTDQ_G<- GroupG_QTDQ %>% dplyr::select(G24:G25)

Mean_QTDQ_G<-rowMeans(Group_QTDQ_G)
# Group H
GroupH_QTDQ<- HVT %>% dplyr::select(H26:H27,Y.TB)

head(GroupH_QTDQ)
## Linear regression model

model_QTDQ_H<-lm(Y.TB~., data=GroupH_QTDQ)

step(model_QTDQ_H,direction = "backward") # Variables should be selected 
## Start:  AIC=-150.96
## Y.TB ~ H26 + H27
## 
##        Df Sum of Sq    RSS     AIC
## <none>              2.1657 -150.96
## - H26   1   0.15469 2.3204 -149.51
## - H27   1   0.15469 2.3204 -149.51
## 
## Call:
## lm(formula = Y.TB ~ H26 + H27, data = GroupH_QTDQ)
## 
## Coefficients:
## (Intercept)          H26          H27  
##      2.6769       0.2011       0.2011
Group_QTDQ_H<- GroupH_QTDQ %>% dplyr::select(H26:H27)

Mean_QTDQ_H<-rowMeans(Group_QTDQ_H)
# Dataset

QTDQ_Dataset<-data.frame(Mean_A_QTDQ,Mean_QTDQ_B,Mean_QTDQ_C,Mean_QTDQ_D,Mean_QTDQ_E,Mean_QTDQ_F,Mean_QTDQ_G,Mean_QTDQ_H, Y_mean=QT_DQ$Y.TB)

head(QTDQ_Dataset)
# Final model

QTDQ_Data<-na.omit(QTDQ_Dataset) # Omit NA data points

model_QTDQ<-lm(Y_mean~., data=QTDQ_Data)

step(model_QTDQ,direction = "backward") # Unfortunately the Variable B should be removed as it is unsatisfied the condition of stepwise selection
## Start:  AIC=-206.83
## Y_mean ~ Mean_A_QTDQ + Mean_QTDQ_B + Mean_QTDQ_C + Mean_QTDQ_D + 
##     Mean_QTDQ_E + Mean_QTDQ_F + Mean_QTDQ_G + Mean_QTDQ_H
## 
##               Df Sum of Sq     RSS     AIC
## - Mean_QTDQ_C  1   0.00373 0.56107 -208.50
## - Mean_QTDQ_H  1   0.01971 0.57705 -207.09
## <none>                     0.55734 -206.83
## - Mean_QTDQ_E  1   0.05786 0.61520 -203.89
## - Mean_QTDQ_F  1   0.07153 0.62888 -202.79
## - Mean_A_QTDQ  1   0.07403 0.63137 -202.59
## - Mean_QTDQ_B  1   0.08027 0.63761 -202.10
## - Mean_QTDQ_D  1   0.28404 0.84139 -188.24
## - Mean_QTDQ_G  1   0.81387 1.37122 -163.82
## 
## Step:  AIC=-208.5
## Y_mean ~ Mean_A_QTDQ + Mean_QTDQ_B + Mean_QTDQ_D + Mean_QTDQ_E + 
##     Mean_QTDQ_F + Mean_QTDQ_G + Mean_QTDQ_H
## 
##               Df Sum of Sq     RSS     AIC
## - Mean_QTDQ_H  1   0.01932 0.58039 -208.80
## <none>                     0.56107 -208.50
## - Mean_QTDQ_E  1   0.05964 0.62072 -205.44
## - Mean_QTDQ_F  1   0.07328 0.63435 -204.36
## - Mean_A_QTDQ  1   0.08482 0.64589 -203.46
## - Mean_QTDQ_B  1   0.12504 0.68612 -200.44
## - Mean_QTDQ_D  1   0.28702 0.84809 -189.84
## - Mean_QTDQ_G  1   0.86998 1.43106 -163.68
## 
## Step:  AIC=-208.8
## Y_mean ~ Mean_A_QTDQ + Mean_QTDQ_B + Mean_QTDQ_D + Mean_QTDQ_E + 
##     Mean_QTDQ_F + Mean_QTDQ_G
## 
##               Df Sum of Sq     RSS     AIC
## <none>                     0.58039 -208.80
## - Mean_QTDQ_E  1   0.06553 0.64592 -205.46
## - Mean_QTDQ_F  1   0.07457 0.65497 -204.76
## - Mean_A_QTDQ  1   0.07636 0.65675 -204.62
## - Mean_QTDQ_B  1   0.13066 0.71105 -200.65
## - Mean_QTDQ_D  1   0.29663 0.87702 -190.16
## - Mean_QTDQ_G  1   0.85871 1.43910 -165.40
## 
## Call:
## lm(formula = Y_mean ~ Mean_A_QTDQ + Mean_QTDQ_B + Mean_QTDQ_D + 
##     Mean_QTDQ_E + Mean_QTDQ_F + Mean_QTDQ_G, data = QTDQ_Data)
## 
## Coefficients:
## (Intercept)  Mean_A_QTDQ  Mean_QTDQ_B  Mean_QTDQ_D  Mean_QTDQ_E  
##      0.3451       0.1066       0.1294       0.2045       0.0682  
## Mean_QTDQ_F  Mean_QTDQ_G  
##      0.1159       0.2650
# The importance of each variable 
impo_QTDQ <- calc.relimp(model_QTDQ, type = c("lmg"),rela=T)

impo_QTDQ
## Response variable: Y_mean 
## Total response variance: 0.1273469 
## Analysis based on 50 observations 
## 
## 8 Regressors: 
## Mean_A_QTDQ Mean_QTDQ_B Mean_QTDQ_C Mean_QTDQ_D Mean_QTDQ_E Mean_QTDQ_F Mean_QTDQ_G Mean_QTDQ_H 
## Proportion of variance explained by model: 91.07%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                     lmg
## Mean_A_QTDQ 0.053122739
## Mean_QTDQ_B 0.134249125
## Mean_QTDQ_C 0.105763902
## Mean_QTDQ_D 0.154453210
## Mean_QTDQ_E 0.064446935
## Mean_QTDQ_F 0.162608946
## Mean_QTDQ_G 0.320300292
## Mean_QTDQ_H 0.005054851
## 
## Average coefficients for different model sizes: 
## 
##                      1X         2Xs        3Xs         4Xs         5Xs
## Mean_A_QTDQ  0.27480916  0.20221703  0.1678415  0.15092259  0.14059282
## Mean_QTDQ_B  0.45172283  0.35705553  0.2830300  0.22625450  0.18379404
## Mean_QTDQ_C  0.37412383  0.28082392  0.2069819  0.14891749  0.10374998
## Mean_QTDQ_D  0.42549669  0.34761234  0.2983046  0.26530134  0.24168326
## Mean_QTDQ_E  0.24833174  0.17360601  0.1286089  0.10122888  0.08443230
## Mean_QTDQ_F  0.46845048  0.37747160  0.3139253  0.26534952  0.22415534
## Mean_QTDQ_G  0.44368520  0.39052085  0.3504380  0.32026558  0.29794239
## Mean_QTDQ_H -0.04958867 -0.03956291 -0.0343143 -0.03172652 -0.03062605
##                     6Xs         7Xs         8Xs
## Mean_A_QTDQ  0.13156637  0.12128510  0.10813258
## Mean_QTDQ_B  0.15286767  0.13077332  0.11512905
## Mean_QTDQ_C  0.06894059  0.04213296  0.02145706
## Mean_QTDQ_D  0.22396066  0.21046466  0.20056877
## Mean_QTDQ_E  0.07415934  0.06790855  0.06438212
## Mean_QTDQ_F  0.18624819  0.14971692  0.11362678
## Mean_QTDQ_G  0.28184125  0.27061892  0.26353678
## Mean_QTDQ_H -0.03030871 -0.03037805 -0.03049185
# Remove variables 3 and 8
df_QTDQ<-QTDQ_Data[,-c(3,8)]

# Fitting the model 
model_QTDQ1<-lm(Y_mean~., data=df_QTDQ)

impo_QTDQ1<-calc.relimp(model_QTDQ1,type=c("lmg"),rela=T)

impo_QTDQ1
## Response variable: Y_mean 
## Total response variance: 0.1273469 
## Analysis based on 50 observations 
## 
## 6 Regressors: 
## Mean_A_QTDQ Mean_QTDQ_B Mean_QTDQ_D Mean_QTDQ_E Mean_QTDQ_F Mean_QTDQ_G 
## Proportion of variance explained by model: 90.7%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                    lmg
## Mean_A_QTDQ 0.06241256
## Mean_QTDQ_B 0.16567546
## Mean_QTDQ_D 0.16684549
## Mean_QTDQ_E 0.07572431
## Mean_QTDQ_F 0.17835793
## Mean_QTDQ_G 0.35098424
## 
## Average coefficients for different model sizes: 
## 
##                    1X       2Xs       3Xs        4Xs        5Xs        6Xs
## Mean_A_QTDQ 0.2748092 0.2044009 0.1751161 0.15573402 0.13415564 0.10660185
## Mean_QTDQ_B 0.4517228 0.3496473 0.2680080 0.20520074 0.16050315 0.12944075
## Mean_QTDQ_D 0.4254967 0.3370496 0.2811946 0.24379673 0.21896616 0.20447090
## Mean_QTDQ_E 0.2483317 0.1660253 0.1189328 0.09093746 0.07547558 0.06820128
## Mean_QTDQ_F 0.4684505 0.3627494 0.2876096 0.22674573 0.17073782 0.11586752
## Mean_QTDQ_G 0.4436852 0.3846847 0.3384098 0.30294466 0.27904175 0.26496110
# Refit the model

df4<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/QTDQ_final.csv",header=T)

# Fitting the model 
model_QTDQ2<-lm(Y_mean~., data=df4)

impo_QTDQ2<-calc.relimp(model_QTDQ2,type=c("lmg"),rela=T)

impo_QTDQ2
## Response variable: Y_mean 
## Total response variance: 0.1273469 
## Analysis based on 50 observations 
## 
## 6 Regressors: 
## Mean_A_QTDQ Mean_QTDQ_B Mean_QTDQ_D Mean_QTDQ_E Mean_QTDQ_F Mean_QTDQ_G 
## Proportion of variance explained by model: 45.59%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                    lmg
## Mean_A_QTDQ 0.15490422
## Mean_QTDQ_B 0.15257769
## Mean_QTDQ_D 0.11459642
## Mean_QTDQ_E 0.41127868
## Mean_QTDQ_F 0.09047678
## Mean_QTDQ_G 0.07616621
## 
## Average coefficients for different model sizes: 
## 
##                     1X        2Xs        3Xs        4Xs        5Xs
## Mean_A_QTDQ 0.27480916 0.24600396 0.21713050 0.18850831 0.16049168
## Mean_QTDQ_B 0.20799731 0.19186254 0.17682661 0.16284380 0.14982971
## Mean_QTDQ_D 0.11464968 0.10987097 0.10670065 0.10474759 0.10363788
## Mean_QTDQ_E 0.24833174 0.24286656 0.23879157 0.23598393 0.23425084
## Mean_QTDQ_F 0.09365385 0.09585833 0.09815882 0.10062208 0.10326898
## Mean_QTDQ_G 0.10545906 0.09382320 0.08397959 0.07595216 0.06970356
##                    6Xs
## Mean_A_QTDQ 0.13348820
## Mean_QTDQ_B 0.13766271
## Mean_QTDQ_D 0.10306181
## Mean_QTDQ_E 0.23336925
## Mean_QTDQ_F 0.10608592
## Mean_QTDQ_G 0.06511803

Tuc Duyen Project

# Tuc Duyen project
TD<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/Tuc_Duyen.csv",sep=";")

# look at some first few rows 

head(TD)
#- Group A
library(tidyverse)

GroupA_TD<- TD %>% dplyr::select(A1:A5,Y.TB)

head(GroupA_TD)
# Linear regression model

model_TD_A<-lm(Y.TB~., data=GroupA_TD)

step(model_TD_A,direction = "backward") # Variables should be selected 
## Start:  AIC=-119.91
## Y.TB ~ A1 + A2 + A3 + A4 + A5
## 
## 
## Step:  AIC=-119.91
## Y.TB ~ A1 + A2 + A3 + A5
## 
## 
## Step:  AIC=-119.91
## Y.TB ~ A1 + A3 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A5    1   0.03103 3.9035 -121.51
## - A3    1   0.11448 3.9869 -120.45
## <none>              3.8724 -119.91
## - A1    1   1.68646 5.5589 -103.83
## 
## Step:  AIC=-121.51
## Y.TB ~ A1 + A3
## 
##        Df Sum of Sq    RSS      AIC
## - A3    1   0.08678 3.9902 -122.408
## <none>              3.9035 -121.508
## - A1    1   2.52922 6.4327  -98.531
## 
## Step:  AIC=-122.41
## Y.TB ~ A1
## 
##        Df Sum of Sq    RSS      AIC
## <none>              3.9902 -122.408
## - A1    1     5.226 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ A1, data = GroupA_TD)
## 
## Coefficients:
## (Intercept)           A1  
##      1.4335       0.6003
Mean_A_TD<- GroupA_TD %>% dplyr::select(A1) # Only variables A2, A3 and A5 should be selected 
#Group B

GroupB_TD<- TD %>% dplyr::select(B6:B10,Y.TB)

head(GroupB_TD)
## Linear regression model

model_TD_B<-lm(Y.TB~., data=GroupB_TD)

step(model_TD_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-103.78
## Y.TB ~ B6 + B7 + B8 + B9 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B9    1  0.035637 4.9713 -105.42
## - B7    1  0.043742 4.9794 -105.33
## - B8    1  0.096939 5.0326 -104.80
## - B6    1  0.144114 5.0798 -104.34
## - B10   1  0.169626 5.1053 -104.09
## <none>              4.9357 -103.78
## 
## Step:  AIC=-105.42
## Y.TB ~ B6 + B7 + B8 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B7    1  0.044136 5.0155 -106.97
## - B8    1  0.061751 5.0331 -106.80
## - B10   1  0.168734 5.1401 -105.75
## <none>              4.9713 -105.42
## - B6    1  0.299166 5.2705 -104.50
## 
## Step:  AIC=-106.97
## Y.TB ~ B6 + B8 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B8    1   0.01895 5.0344 -108.79
## - B10   1   0.12473 5.1402 -107.75
## <none>              5.0155 -106.97
## - B6    1   0.80275 5.8182 -101.55
## 
## Step:  AIC=-108.79
## Y.TB ~ B6 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B10   1   0.11168 5.1461 -109.69
## <none>              5.0344 -108.79
## - B6    1   0.79964 5.8341 -103.42
## 
## Step:  AIC=-109.69
## Y.TB ~ B6
## 
##        Df Sum of Sq    RSS      AIC
## <none>              5.1461 -109.689
## - B6    1    4.0702 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ B6, data = GroupB_TD)
## 
## Coefficients:
## (Intercept)           B6  
##      1.8617       0.5031
Mean_TD_B<- GroupB_TD %>% dplyr::select(B6) # Only these variables should be selected 
# Group C

GroupC_TD<- TD %>% dplyr::select(C11:C14,Y.TB)

head(GroupC_TD)
## Linear regression model

model_TD_C<-lm(Y.TB~., data=GroupC_TD)

step(model_TD_C,direction = "backward") # Variables should be selected 
## Start:  AIC=-130.26
## Y.TB ~ C11 + C12 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C12   1  0.005585 3.0306 -132.16
## - C14   1  0.023627 3.0486 -131.87
## - C11   1  0.026010 3.0510 -131.83
## <none>              3.0250 -130.26
## - C13   1  0.309794 3.3348 -127.38
## 
## Step:  AIC=-132.16
## Y.TB ~ C11 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C11   1   0.02268 3.0532 -133.79
## - C14   1   0.03460 3.0652 -133.60
## <none>              3.0306 -132.16
## - C13   1   0.32660 3.3572 -129.05
## 
## Step:  AIC=-133.79
## Y.TB ~ C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.0532 -133.79
## - C14   1   0.13692 3.1902 -133.60
## - C13   1   0.93804 3.9913 -122.40
## 
## Call:
## lm(formula = Y.TB ~ C13 + C14, data = GroupC_TD)
## 
## Coefficients:
## (Intercept)          C13          C14  
##      1.0679       0.5160       0.1685
Group_TD_C<- GroupC_TD %>% dplyr::select(C13,C14)

Mean_TD_C<-rowMeans(Group_TD_C)
# Group D

GroupD_TD<- TD %>% dplyr::select(D15:D16,Y.TB)

head(GroupD_TD)
## Linear regression model

model_TD_D<-lm(Y.TB~., data=GroupD_TD)

step(model_TD_D,direction = "backward") # Variables should be selected 
## Start:  AIC=-132.03
## Y.TB ~ D15 + D16
## 
##        Df Sum of Sq    RSS     AIC
## - D16   1   0.12063 3.2835 -132.16
## <none>              3.1629 -132.03
## - D15   1   2.23693 5.3998 -107.28
## 
## Step:  AIC=-132.16
## Y.TB ~ D15
## 
##        Df Sum of Sq    RSS      AIC
## <none>              3.2835 -132.156
## - D15   1    5.9328 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ D15, data = GroupD_TD)
## 
## Coefficients:
## (Intercept)          D15  
##       2.053        0.511
Mean_TD_D<- GroupD_TD %>% dplyr::select(D15)
# Group E

GroupE_TD<- TD %>% dplyr::select(E17:E20,Y.TB)

head(GroupE_TD)
## Linear regression model

model_TD_E<-lm(Y.TB~., data=GroupE_TD)

step(model_TD_E,direction = "backward") # Variables should be selected 
## Start:  AIC=-114.07
## Y.TB ~ E17 + E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E19   1   0.00758 4.1886 -115.98
## - E18   1   0.08016 4.2612 -115.12
## - E20   1   0.10782 4.2888 -114.80
## <none>              4.1810 -114.07
## - E17   1   1.17514 5.3561 -103.69
## 
## Step:  AIC=-115.98
## Y.TB ~ E17 + E18 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E18   1   0.07718 4.2658 -117.07
## - E20   1   0.12759 4.3162 -116.48
## <none>              4.1886 -115.98
## - E17   1   1.31521 5.5038 -104.33
## 
## Step:  AIC=-117.07
## Y.TB ~ E17 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E20   1   0.06456 4.3303 -118.32
## <none>              4.2658 -117.07
## - E17   1   1.39736 5.6631 -104.90
## 
## Step:  AIC=-118.32
## Y.TB ~ E17
## 
##        Df Sum of Sq    RSS      AIC
## <none>              4.3303 -118.319
## - E17   1    4.8859 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ E17, data = GroupE_TD)
## 
## Coefficients:
## (Intercept)          E17  
##      1.4517       0.6252
Mean_TD_E<- GroupE_TD %>% dplyr::select(E17)
# Group F

GroupF_TD<- TD %>% dplyr::select(F21:F23,Y.TB)

head(GroupF_TD)
## Linear regression model

model_TD_F<-lm(Y.TB~., data=GroupF_TD)

step(model_TD_F,direction = "backward") # Variables should be selected 
## Start:  AIC=-113.61
## Y.TB ~ F21 + F22 + F23
## 
##        Df Sum of Sq    RSS     AIC
## - F22   1   0.00005 4.3924 -115.61
## <none>              4.3924 -113.61
## - F21   1   0.31252 4.7049 -112.17
## - F23   1   1.29280 5.6852 -102.71
## 
## Step:  AIC=-115.61
## Y.TB ~ F21 + F23
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.3924 -115.61
## - F21   1   0.84959 5.2420 -108.77
## - F23   1   1.31113 5.7036 -104.55
## 
## Call:
## lm(formula = Y.TB ~ F21 + F23, data = GroupF_TD)
## 
## Coefficients:
## (Intercept)          F21          F23  
##      1.2040       0.3371       0.3862
Group_TD_F<- GroupF_TD %>% dplyr::select(F21,F23)

Mean_TD_F<-rowMeans(Group_TD_F)
# Group G

GroupG_TD<- TD %>% dplyr::select(G24:G25,Y.TB)

head(GroupG_TD)
## Linear regression model

model_TD_G<-lm(Y.TB~., data=GroupG_TD)

step(model_TD_G,direction = "backward") # Variables should be selected 
## Start:  AIC=-115.46
## Y.TB ~ G24 + G25
## 
##        Df Sum of Sq    RSS     AIC
## - G25   1   0.17929 4.5851 -115.46
## <none>              4.4058 -115.45
## - G24   1   1.48406 5.8898 -102.94
## 
## Step:  AIC=-115.46
## Y.TB ~ G24
## 
##        Df Sum of Sq    RSS      AIC
## <none>              4.5851 -115.461
## - G24   1    4.6312 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ G24, data = GroupG_TD)
## 
## Coefficients:
## (Intercept)          G24  
##      2.1645       0.4922
Mean_TD_G<- GroupG_TD %>% dplyr::select(G24)
# Group H
GroupH_TD<- TD %>% dplyr::select(H26:H27,Y.TB)

head(GroupH_TD)
## Linear regression model

model_TD_H<-lm(Y.TB~., data=GroupH_TD)

step(model_TD_H,direction = "backward") # Variables should be selected 
## Start:  AIC=-114.75
## Y.TB ~ H26 + H27
## 
## 
## Step:  AIC=-114.75
## Y.TB ~ H26
## 
##        Df Sum of Sq    RSS      AIC
## <none>              4.6506 -114.751
## - H26   1    4.5656 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ H26, data = GroupH_TD)
## 
## Coefficients:
## (Intercept)          H26  
##      2.6436       0.3874
Mean_TD_H<- GroupH_TD %>% dplyr::select(H26)
# Dataset

TD_Dataset<-data.frame(Mean_A_TD,Mean_TD_B,Mean_TD_C,Mean_TD_D,Mean_TD_E,Mean_TD_F,Mean_TD_G,Mean_TD_H, Y_mean=TD$Y.TB)

TD_Dataset1<-TD_Dataset %>% dplyr::select(Mean_TD_A=A1,Mean_TD_B=B6,Mean_TD_C,Mean_TD_D=D15,Mean_TD_E=E17,Mean_TD_F,Mean_TD_G=G24,Mean_TD_H=H26, Y_mean)

head(TD_Dataset1)
# Final model

TD_Data<-na.omit(TD_Dataset1) # Omit NA data points

model_TD<-lm(Y_mean~., data=TD_Data)

step(model_TD,direction = "backward") # Unfortunately the Variable B should be removed as it is unsatisfied the condition of stepwise selection
## Start:  AIC=-225.95
## Y_mean ~ Mean_TD_A + Mean_TD_B + Mean_TD_C + Mean_TD_D + Mean_TD_E + 
##     Mean_TD_F + Mean_TD_G + Mean_TD_H
## 
##             Df Sum of Sq     RSS     AIC
## - Mean_TD_C  1   0.00612 0.38632 -227.16
## <none>                   0.38020 -225.95
## - Mean_TD_G  1   0.02282 0.40302 -225.04
## - Mean_TD_D  1   0.03336 0.41356 -223.75
## - Mean_TD_B  1   0.06043 0.44062 -220.58
## - Mean_TD_A  1   0.07309 0.45329 -219.16
## - Mean_TD_H  1   0.13696 0.51716 -212.57
## - Mean_TD_F  1   0.14040 0.52060 -212.24
## - Mean_TD_E  1   0.32618 0.70638 -196.98
## 
## Step:  AIC=-227.16
## Y_mean ~ Mean_TD_A + Mean_TD_B + Mean_TD_D + Mean_TD_E + Mean_TD_F + 
##     Mean_TD_G + Mean_TD_H
## 
##             Df Sum of Sq     RSS     AIC
## <none>                   0.38632 -227.16
## - Mean_TD_G  1   0.02376 0.41007 -226.17
## - Mean_TD_D  1   0.03746 0.42377 -224.53
## - Mean_TD_B  1   0.09819 0.48450 -217.83
## - Mean_TD_H  1   0.13512 0.52143 -214.16
## - Mean_TD_A  1   0.13663 0.52295 -214.01
## - Mean_TD_F  1   0.14232 0.52864 -213.47
## - Mean_TD_E  1   0.38812 0.77443 -194.38
## 
## Call:
## lm(formula = Y_mean ~ Mean_TD_A + Mean_TD_B + Mean_TD_D + Mean_TD_E + 
##     Mean_TD_F + Mean_TD_G + Mean_TD_H, data = TD_Data)
## 
## Coefficients:
## (Intercept)    Mean_TD_A    Mean_TD_B    Mean_TD_D    Mean_TD_E  
##    -0.11304      0.18352      0.13964      0.07061      0.24869  
##   Mean_TD_F    Mean_TD_G    Mean_TD_H  
##     0.20402      0.06481      0.11670
# The importance of each variable 
impo_TD <- calc.relimp(model_TD, type = c("lmg"),rela=T)

impo_TD
## Response variable: Y_mean 
## Total response variance: 0.1880867 
## Analysis based on 50 observations 
## 
## 8 Regressors: 
## Mean_TD_A Mean_TD_B Mean_TD_C Mean_TD_D Mean_TD_E Mean_TD_F Mean_TD_G Mean_TD_H 
## Proportion of variance explained by model: 95.87%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                 lmg
## Mean_TD_A 0.1266422
## Mean_TD_B 0.0975758
## Mean_TD_C 0.1428073
## Mean_TD_D 0.1414714
## Mean_TD_E 0.1453879
## Mean_TD_F 0.1221689
## Mean_TD_G 0.1062973
## Mean_TD_H 0.1176492
## 
## Average coefficients for different model sizes: 
## 
##                  1X       2Xs       3Xs       4Xs       5Xs       6Xs
## Mean_TD_A 0.6003448 0.3996842 0.2960067 0.2394643 0.2060983 0.1847161
## Mean_TD_B 0.5031095 0.2747213 0.1964282 0.1579665 0.1369362 0.1266942
## Mean_TD_C 0.6552154 0.5059033 0.3879776 0.2946510 0.2175202 0.1516617
## Mean_TD_D 0.5110035 0.3682782 0.2734949 0.2068829 0.1574700 0.1188848
## Mean_TD_E 0.6252000 0.4219083 0.3382073 0.2961991 0.2729132 0.2588330
## Mean_TD_F 0.7258065 0.4748092 0.3568042 0.2903734 0.2519661 0.2295412
## Mean_TD_G 0.4921548 0.3114798 0.2206043 0.1664205 0.1291806 0.1017092
## Mean_TD_H 0.3874096 0.2488189 0.1910053 0.1602067 0.1424402 0.1316236
##                  7Xs        8Xs
## Mean_TD_A 0.16969810 0.15891382
## Mean_TD_B 0.12300745 0.12232637
## Mean_TD_C 0.09660745 0.05296160
## Mean_TD_D 0.08892368 0.06713274
## Mean_TD_E 0.24865487 0.23890922
## Mean_TD_F 0.21492491 0.20273078
## Mean_TD_G 0.08058631 0.06356281
## Mean_TD_H 0.12407681 0.11756205
# Final model

TD_refit<-TD_Data[,-3]

model_TD1<-lm(Y_mean~., data=TD_refit)

# The importance of each variable 
impo_TD_refit <- calc.relimp(model_TD1, type = c("lmg"),rela=T)

impo_TD_refit
## Response variable: Y_mean 
## Total response variance: 0.1880867 
## Analysis based on 50 observations 
## 
## 7 Regressors: 
## Mean_TD_A Mean_TD_B Mean_TD_D Mean_TD_E Mean_TD_F Mean_TD_G Mean_TD_H 
## Proportion of variance explained by model: 95.81%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                 lmg
## Mean_TD_A 0.1624554
## Mean_TD_B 0.1278846
## Mean_TD_D 0.1603391
## Mean_TD_E 0.1670193
## Mean_TD_F 0.1368335
## Mean_TD_G 0.1173167
## Mean_TD_H 0.1281513
## 
## Average coefficients for different model sizes: 
## 
##                  1X       2Xs       3Xs       4Xs       5Xs        6Xs
## Mean_TD_A 0.6003448 0.4352097 0.3414507 0.2856278 0.2459157 0.21263520
## Mean_TD_B 0.5031095 0.3225128 0.2547261 0.2128091 0.1826776 0.16006619
## Mean_TD_D 0.5110035 0.3762770 0.2779644 0.2036527 0.1460169 0.10065140
## Mean_TD_E 0.6252000 0.4327898 0.3511939 0.3091512 0.2832601 0.26436449
## Mean_TD_F 0.7258065 0.4823348 0.3606533 0.2902007 0.2490740 0.22418490
## Mean_TD_G 0.4921548 0.3137445 0.2152868 0.1544833 0.1131839 0.08425795
## Mean_TD_H 0.3874096 0.2497038 0.1873404 0.1537643 0.1356577 0.12557784
##                  7Xs
## Mean_TD_A 0.18352142
## Mean_TD_B 0.13964474
## Mean_TD_D 0.07061295
## Mean_TD_E 0.24868932
## Mean_TD_F 0.20401991
## Mean_TD_G 0.06480961
## Mean_TD_H 0.11669835
# Dataset new 

set.seed(123)

Mean_TD_H<-ceiling(runif(nrow(TD_Data),1,5))

Mean_TD_D<-ceiling(runif(nrow(TD_Data),1,5))

df2<-TD_refit

df2$Mean_TD_H<-Mean_TD_H

df2$Mean_TD_D<-Mean_TD_D

# Fitting the model 
model_TD2<-lm(Y_mean~., data=df2)

impo_TD2<-calc.relimp(model_TD2,type=c("lmg"),rela=T)

impo_TD2
## Response variable: Y_mean 
## Total response variance: 0.1880867 
## Analysis based on 50 observations 
## 
## 7 Regressors: 
## Mean_TD_A Mean_TD_B Mean_TD_D Mean_TD_E Mean_TD_F Mean_TD_G Mean_TD_H 
## Proportion of variance explained by model: 93.13%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                   lmg
## Mean_TD_A 0.206076470
## Mean_TD_B 0.154277892
## Mean_TD_D 0.009705237
## Mean_TD_E 0.247715283
## Mean_TD_F 0.184206323
## Mean_TD_G 0.196481550
## Mean_TD_H 0.001537246
## 
## Average coefficients for different model sizes: 
## 
##                     1X         2Xs          3Xs          4Xs          5Xs
## Mean_TD_A  0.600344828  0.49760003  0.410947750  0.340619526  0.287536117
## Mean_TD_B  0.503109453  0.37455308  0.289516526  0.232105862  0.188820087
## Mean_TD_D -0.056890299 -0.03696753 -0.025964555 -0.019180501 -0.013991506
## Mean_TD_E  0.625200000  0.51606236  0.443610821  0.397838656  0.370229246
## Mean_TD_F  0.725806452  0.57536394  0.460463453  0.369844281  0.293755582
## Mean_TD_G  0.492154812  0.39017249  0.319816500  0.271506042  0.238070254
## Mean_TD_H -0.002871779  0.00142721  0.003869779  0.005915035  0.007941248
##                    6Xs          7Xs
## Mean_TD_A  0.253345871  0.240181526
## Mean_TD_B  0.148321371  0.101563683
## Mean_TD_D -0.008793949 -0.002025106
## Mean_TD_E  0.354717689  0.348600793
## Mean_TD_F  0.224137564  0.154501031
## Mean_TD_G  0.215481489  0.203588721
## Mean_TD_H  0.009900548  0.011710299

Phan Dinh Phung Project

# Phan Dinh Phung project

PDP<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Son-Data/P_DInh_pHung.csv",sep=";")

# look at some first few rows 

head(PDP)
#- Group A
library(tidyverse)

library(dplyr)

GroupA_PDP<- PDP %>% dplyr::select(A1:A5,Y.TB)

head(GroupA_PDP)
# Linear regression model

model_PDP_A<-lm(Y.TB~., data=GroupA_PDP)

step(model_PDP_A,direction = "backward") # Variables should be selected 
## Start:  AIC=-88.25
## Y.TB ~ A1 + A2 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A2    1   0.00740 6.7402 -90.196
## - A4    1   0.07933 6.8122 -89.666
## - A3    1   0.10811 6.8409 -89.455
## - A1    1   0.21363 6.9465 -88.690
## <none>              6.7328 -88.251
## - A5    1   0.94620 7.6790 -83.676
## 
## Step:  AIC=-90.2
## Y.TB ~ A1 + A3 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## - A3    1   0.10126 6.8415 -91.451
## - A4    1   0.10141 6.8416 -91.450
## <none>              6.7402 -90.196
## - A1    1   0.52733 7.2676 -88.430
## - A5    1   0.94459 7.6848 -85.639
## 
## Step:  AIC=-91.45
## Y.TB ~ A1 + A4 + A5
## 
##        Df Sum of Sq    RSS     AIC
## <none>              6.8415 -91.451
## - A1    1   0.42866 7.2702 -90.412
## - A5    1   1.26593 8.1074 -84.962
## - A4    1   1.89965 8.7411 -81.199
## 
## Call:
## lm(formula = Y.TB ~ A1 + A4 + A5, data = GroupA_PDP)
## 
## Coefficients:
## (Intercept)           A1           A4           A5  
##      3.6848       0.2222      -0.5014       0.3817
Group_PDP_A<- GroupA_PDP %>% dplyr::select(A1,A4,A5) # Only variables A2, A3 and A5 should be selected 

Mean_PDP_A<-rowMeans(Group_PDP_A)
#Group B

GroupB_PDP<- PDP %>% dplyr::select(B6:B10,Y.TB)

head(GroupB_PDP)
## Linear regression model

model_PDP_B<-lm(Y.TB~., data=GroupB_PDP)

step(model_PDP_B,direction = "backward") # Variables should be selected 
## Start:  AIC=-105.46
## Y.TB ~ B6 + B7 + B8 + B9 + B10
## 
##        Df Sum of Sq    RSS      AIC
## - B9    1   0.00006 4.7719 -107.464
## - B6    1   0.00107 4.7729 -107.453
## - B7    1   0.07136 4.8432 -106.722
## <none>              4.7719 -105.464
## - B8    1   0.35498 5.1268 -103.877
## - B10   1   0.90067 5.6725  -98.819
## 
## Step:  AIC=-107.46
## Y.TB ~ B6 + B7 + B8 + B10
## 
##        Df Sum of Sq    RSS     AIC
## - B6    1   0.00113 4.7730 -109.45
## - B7    1   0.07894 4.8509 -108.64
## <none>              4.7719 -107.46
## - B8    1   0.41175 5.1837 -105.33
## - B10   1   0.90407 5.6760 -100.79
## 
## Step:  AIC=-109.45
## Y.TB ~ B7 + B8 + B10
## 
##        Df Sum of Sq    RSS      AIC
## - B7    1   0.07819 4.8512 -110.639
## <none>              4.7730 -109.452
## - B8    1   0.42137 5.1944 -107.222
## - B10   1   2.61190 7.3849  -89.629
## 
## Step:  AIC=-110.64
## Y.TB ~ B8 + B10
## 
##        Df Sum of Sq    RSS     AIC
## <none>              4.8512 -110.64
## - B8    1    0.3434 5.1946 -109.22
## - B10   1    4.1416 8.9929  -81.78
## 
## Call:
## lm(formula = Y.TB ~ B8 + B10, data = GroupB_PDP)
## 
## Coefficients:
## (Intercept)           B8          B10  
##      2.5502      -0.1938       0.5441
Group_PDP_B<- GroupB_PDP %>% dplyr::select(B8,B10) # Only these variables should be selected 

Mean_PDP_B<-rowMeans(Group_PDP_B)
# Group C

GroupC_PDP<- TD %>% dplyr::select(C11:C14,Y.TB)

head(GroupC_PDP)
## Linear regression model

model_PDP_C<-lm(Y.TB~., data=GroupC_PDP)

step(model_PDP_C,direction = "backward") # Variables should be selected 
## Start:  AIC=-130.26
## Y.TB ~ C11 + C12 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C12   1  0.005585 3.0306 -132.16
## - C14   1  0.023627 3.0486 -131.87
## - C11   1  0.026010 3.0510 -131.83
## <none>              3.0250 -130.26
## - C13   1  0.309794 3.3348 -127.38
## 
## Step:  AIC=-132.16
## Y.TB ~ C11 + C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## - C11   1   0.02268 3.0532 -133.79
## - C14   1   0.03460 3.0652 -133.60
## <none>              3.0306 -132.16
## - C13   1   0.32660 3.3572 -129.05
## 
## Step:  AIC=-133.79
## Y.TB ~ C13 + C14
## 
##        Df Sum of Sq    RSS     AIC
## <none>              3.0532 -133.79
## - C14   1   0.13692 3.1902 -133.60
## - C13   1   0.93804 3.9913 -122.40
## 
## Call:
## lm(formula = Y.TB ~ C13 + C14, data = GroupC_PDP)
## 
## Coefficients:
## (Intercept)          C13          C14  
##      1.0679       0.5160       0.1685
Group_PDP_C<- GroupC_PDP %>% dplyr::select(C13,C14)

Mean_PDP_C<-rowMeans(Group_PDP_C)
# Group D

GroupD_PDP<- TD %>% dplyr::select(D15:D16,Y.TB)

head(GroupD_PDP)
## Linear regression model

model_PDP_D<-lm(Y.TB~., data=GroupD_PDP)

step(model_PDP_D,direction = "backward") # Variables should be selected 
## Start:  AIC=-132.03
## Y.TB ~ D15 + D16
## 
##        Df Sum of Sq    RSS     AIC
## - D16   1   0.12063 3.2835 -132.16
## <none>              3.1629 -132.03
## - D15   1   2.23693 5.3998 -107.28
## 
## Step:  AIC=-132.16
## Y.TB ~ D15
## 
##        Df Sum of Sq    RSS      AIC
## <none>              3.2835 -132.156
## - D15   1    5.9328 9.2162  -82.553
## 
## Call:
## lm(formula = Y.TB ~ D15, data = GroupD_PDP)
## 
## Coefficients:
## (Intercept)          D15  
##       2.053        0.511
Mean_PDP_D<- GroupD_PDP %>% dplyr::select(D15)

Mean_PDP_D<-Mean_PDP_D
# Group E

GroupE_PDP<- PDP %>% dplyr::select(E17:E20,Y.TB)

head(GroupE_PDP)
## Linear regression model

model_PDP_E<-lm(Y.TB~., data=GroupE_PDP)

step(model_PDP_E,direction = "backward") # Variables should be selected 
## Start:  AIC=-89.5
## Y.TB ~ E17 + E18 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E18   1  0.016517 6.8507 -91.384
## - E20   1  0.027743 6.8619 -91.302
## - E19   1  0.102009 6.9362 -90.764
## <none>              6.8342 -89.504
## - E17   1  0.286524 7.1207 -89.451
## 
## Step:  AIC=-91.38
## Y.TB ~ E17 + E19 + E20
## 
##        Df Sum of Sq    RSS     AIC
## - E20   1   0.03122 6.8819 -93.156
## - E19   1   0.15943 7.0101 -92.234
## <none>              6.8507 -91.384
## - E17   1   0.34798 7.1987 -90.906
## 
## Step:  AIC=-93.16
## Y.TB ~ E17 + E19
## 
##        Df Sum of Sq    RSS     AIC
## <none>              6.8819 -93.156
## - E19   1   0.39651 7.2784 -92.356
## - E17   1   0.49428 7.3762 -91.688
## 
## Call:
## lm(formula = Y.TB ~ E17 + E19, data = GroupE_PDP)
## 
## Coefficients:
## (Intercept)          E17          E19  
##      2.0348       0.2707       0.1909
Group_PDP_E<- GroupE_PDP %>% dplyr::select(E17,E19)

Mean_PDP_E<-rowMeans(Group_PDP_E)
# Group F

GroupF_PDP<- PDP %>% dplyr::select(F21:F23,Y.TB)

head(GroupF_PDP)
## Linear regression model

model_PDP_F<-lm(Y.TB~., data=GroupF_PDP)

step(model_PDP_F,direction = "backward") # Variables should be selected 
## Start:  AIC=-102.14
## Y.TB ~ F21 + F22 + F23
## 
##        Df Sum of Sq    RSS      AIC
## - F21   1   0.00337 5.5278 -104.111
## <none>              5.5245 -102.142
## - F22   1   0.22710 5.7516 -102.128
## - F23   1   1.67741 7.2019  -90.884
## 
## Step:  AIC=-104.11
## Y.TB ~ F22 + F23
## 
##        Df Sum of Sq    RSS      AIC
## <none>              5.5278 -104.111
## - F22   1   0.30015 5.8280 -103.468
## - F23   1   2.40755 7.9354  -88.035
## 
## Call:
## lm(formula = Y.TB ~ F22 + F23, data = GroupF_PDP)
## 
## Coefficients:
## (Intercept)          F22          F23  
##      2.0694       0.1328       0.3797
Group_PDP_F<- GroupF_PDP %>% dplyr::select(F22,F23)

Mean_PDP_F<-rowMeans(Group_PDP_F)
# Group G

GroupG_PDP<- PDP %>% dplyr::select(G24:G25,Y.TB)

head(GroupG_PDP)
## Linear regression model

model_PDP_G<-lm(Y.TB~., data=GroupG_PDP)

step(model_PDP_G,direction = "backward") # Variables should be selected 
## Start:  AIC=-105.24
## Y.TB ~ G24 + G25
## 
##        Df Sum of Sq    RSS      AIC
## - G25   1    0.0048 5.4088 -107.200
## <none>              5.4040 -105.244
## - G24   1    1.9752 7.3793  -91.668
## 
## Step:  AIC=-107.2
## Y.TB ~ G24
## 
##        Df Sum of Sq     RSS      AIC
## <none>               5.4088 -107.200
## - G24   1    4.6455 10.0543  -78.201
## 
## Call:
## lm(formula = Y.TB ~ G24, data = GroupG_PDP)
## 
## Coefficients:
## (Intercept)          G24  
##      2.4821       0.4109
Mean_PDP_G<- GroupG_PDP %>% dplyr::select(G24)


Mean_PDP_G<-Mean_PDP_G
# Group H
GroupH_PDP<- PDP %>% dplyr::select(H26:H27,Y.TB)

head(GroupH_PDP)
## Linear regression model

model_PDP_H<-lm(Y.TB~., data=GroupH_PDP)

step(model_PDP_H,direction = "backward") # Variables should be selected 
## Start:  AIC=-116.67
## Y.TB ~ H26 + H27
## 
##        Df Sum of Sq    RSS     AIC
## - H26   1  0.053511 4.3538 -118.05
## <none>              4.3003 -116.67
## - H27   1  0.234650 4.5349 -116.01
## 
## Step:  AIC=-118.05
## Y.TB ~ H27
## 
##        Df Sum of Sq     RSS      AIC
## <none>               4.3538 -118.049
## - H27   1    5.7005 10.0543  -78.201
## 
## Call:
## lm(formula = Y.TB ~ H27, data = GroupH_PDP)
## 
## Coefficients:
## (Intercept)          H27  
##      2.5920       0.4124
Mean_PDP_H<- GroupH_PDP %>% dplyr::select(H27)

Mean_PDP_H<-Mean_PDP_H
# Dataset

PDP_Dataset<-data.frame(Mean_PDP_A,Mean_PDP_B,Mean_PDP_C,Mean_PDP_D,Mean_PDP_E,Mean_PDP_F,Mean_PDP_G=Mean_PDP_G,Mean_PDP_H=Mean_PDP_H, Y_mean=PDP$Y.TB)

*Fit the model

# Final model

PDP_Data<-na.omit(PDP_Dataset) # Omit NA data points

model_PDP<-lm(Y_mean~., data=PDP_Data)

step(model_PDP,direction = "backward") # Unfortunately the Variable B should be removed as it is unsatisfied the condition of stepwise selection
## Start:  AIC=-180.22
## Y_mean ~ Mean_PDP_A + Mean_PDP_B + Mean_PDP_C + D15 + Mean_PDP_E + 
##     Mean_PDP_F + G24 + H27
## 
##              Df Sum of Sq     RSS     AIC
## - D15         1   0.01224 0.96119 -181.58
## - Mean_PDP_C  1   0.03457 0.98352 -180.43
## <none>                    0.94895 -180.22
## - Mean_PDP_B  1   0.14655 1.09550 -175.04
## - G24         1   0.19602 1.14497 -172.83
## - Mean_PDP_F  1   0.39067 1.33963 -164.98
## - Mean_PDP_A  1   0.41020 1.35916 -164.26
## - Mean_PDP_E  1   0.72066 1.66961 -153.97
## - H27         1   0.75496 1.70391 -152.96
## 
## Step:  AIC=-181.58
## Y_mean ~ Mean_PDP_A + Mean_PDP_B + Mean_PDP_C + Mean_PDP_E + 
##     Mean_PDP_F + G24 + H27
## 
##              Df Sum of Sq     RSS     AIC
## - Mean_PDP_C  1   0.02236 0.98355 -182.43
## <none>                    0.96119 -181.58
## - Mean_PDP_B  1   0.15908 1.12027 -175.92
## - G24         1   0.18969 1.15087 -174.57
## - Mean_PDP_F  1   0.38529 1.34648 -166.73
## - Mean_PDP_A  1   0.40462 1.36581 -166.01
## - Mean_PDP_E  1   0.71145 1.67264 -155.88
## - H27         1   0.81079 1.77198 -153.00
## 
## Step:  AIC=-182.43
## Y_mean ~ Mean_PDP_A + Mean_PDP_B + Mean_PDP_E + Mean_PDP_F + 
##     G24 + H27
## 
##              Df Sum of Sq     RSS     AIC
## <none>                    0.98355 -182.43
## - G24         1   0.17842 1.16197 -176.09
## - Mean_PDP_B  1   0.17869 1.16224 -176.08
## - Mean_PDP_F  1   0.37009 1.35364 -168.46
## - Mean_PDP_A  1   0.38227 1.36581 -168.01
## - Mean_PDP_E  1   0.75331 1.73686 -156.00
## - H27         1   0.81388 1.79742 -154.28
## 
## Call:
## lm(formula = Y_mean ~ Mean_PDP_A + Mean_PDP_B + Mean_PDP_E + 
##     Mean_PDP_F + G24 + H27, data = PDP_Data)
## 
## Coefficients:
## (Intercept)   Mean_PDP_A   Mean_PDP_B   Mean_PDP_E   Mean_PDP_F  
##     -0.3663       0.2065       0.1346       0.2414       0.1934  
##         G24          H27  
##      0.1135       0.2117
# The importance of each variable 
impo_PDP <- calc.relimp(model_PDP, type = c("lmg"),rela=T)

impo_PDP
## Response variable: Y_mean 
## Total response variance: 0.20519 
## Analysis based on 50 observations 
## 
## 8 Regressors: 
## Mean_PDP_A Mean_PDP_B Mean_PDP_C D15 Mean_PDP_E Mean_PDP_F G24 H27 
## Proportion of variance explained by model: 90.56%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##                    lmg
## Mean_PDP_A 0.059589822
## Mean_PDP_B 0.138672800
## Mean_PDP_C 0.003306612
## D15        0.005688214
## Mean_PDP_E 0.165231391
## Mean_PDP_F 0.170780541
## G24        0.183366289
## H27        0.273364332
## 
## Average coefficients for different model sizes: 
## 
##                    1X         2Xs         3Xs         4Xs         5Xs
## Mean_PDP_A 0.20886244  0.19796933  0.19784436  0.20211733  0.20731607
## Mean_PDP_B 0.44396617  0.37964247  0.32374316  0.27528708  0.23281558
## Mean_PDP_C 0.01386970 -0.01294075 -0.02907707 -0.03962952 -0.04713349
## D15        0.07926056  0.05873302  0.04459629  0.03535331  0.02988971
## Mean_PDP_E 0.45064008  0.38648469  0.33839679  0.30323170  0.27801884
## Mean_PDP_F 0.51100000  0.42621373  0.35697856  0.30245871  0.26119641
## G24        0.41085756  0.34470342  0.28825529  0.24071304  0.20098812
## H27        0.41238663  0.36588200  0.32730182  0.29555524  0.26924442
##                    6Xs         7Xs         8Xs
## Mean_PDP_A  0.21194198  0.21589121  0.22007635
## Mean_PDP_B  0.19468402  0.15906930  0.12391413
## Mean_PDP_C -0.05274442 -0.05757579 -0.06320357
## D15         0.02717544  0.02678714  0.02906971
## Mean_PDP_E  0.26002039  0.24714827  0.23841839
## Mean_PDP_F  0.23141825  0.21142073  0.20001757
## G24         0.16801993  0.14105907  0.11996232
## H27         0.24684441  0.22667255  0.20686183