Q.대학성적에 미치는 요인들에서 통제변수를 몇 개를 추가하고 왜 그 변수들을 추가해야 하는지 설명을 쓰고 추정 결과를 제시하시오.
종속변수 : colGPA (대학교 성적)
gpa <- read.csv('gpa1.csv', header = T, sep='\t')
library(tidyverse)
## -- Attaching packages ------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ purrr 0.3.4
## √ tibble 3.0.3 √ dplyr 1.0.2
## √ tidyr 1.1.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts ---------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#전체 데이터 확인
str(gpa) #종속변수 colGPA를 제외한 나머지 변수 28개
## 'data.frame': 141 obs. of 29 variables:
## $ age : int 21 21 20 19 20 20 22 22 22 19 ...
## $ soph : int 0 0 0 1 0 0 0 0 0 1 ...
## $ junior : int 0 0 1 0 1 0 0 0 0 0 ...
## $ senior : int 1 1 0 0 0 1 0 0 0 0 ...
## $ senior5 : int 0 0 0 0 0 0 1 1 1 0 ...
## $ male : int 0 0 0 1 0 1 0 0 0 0 ...
## $ campus : int 0 0 0 1 0 1 0 0 0 0 ...
## $ business: int 1 1 1 1 1 1 1 0 0 1 ...
## $ engineer: int 0 0 0 0 0 0 0 0 0 0 ...
## $ colGPA : num 3 3.4 3 3.5 3.6 ...
## $ hsGPA : num 3 3.2 3.6 3.5 3.9 ...
## $ ACT : int 21 24 26 27 28 25 25 22 21 27 ...
## $ job19 : int 0 0 1 1 0 0 0 1 1 1 ...
## $ job20 : int 1 1 0 0 1 0 0 0 0 0 ...
## $ drive : int 1 1 0 0 0 0 0 1 1 0 ...
## $ bike : int 0 0 0 0 1 0 1 0 0 0 ...
## $ walk : int 0 0 1 1 0 1 0 0 0 1 ...
## $ voluntr : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PC : int 0 0 0 0 0 0 0 1 0 1 ...
## $ greek : int 0 0 0 0 0 0 1 0 0 0 ...
## $ car : int 1 1 1 0 1 1 1 0 1 0 ...
## $ siblings: int 1 0 1 1 1 1 1 1 1 1 ...
## $ bgfriend: int 0 1 0 0 1 0 0 1 1 0 ...
## $ clubs : int 0 1 1 0 0 0 1 0 1 1 ...
## $ skipped : num 2 0 0 0 0 0 0 3 2 0.5 ...
## $ alcohol : num 1 1 1 0 1.5 0 2 3 2.5 0.75 ...
## $ gradMI : int 1 1 1 0 1 0 1 1 1 1 ...
## $ fathcoll: int 0 1 1 0 1 1 0 1 1 0 ...
## $ mothcoll: int 0 1 1 0 0 0 1 1 1 1 ...
#결측치 확인
table(is.na(gpa)) #결측치 없음
##
## FALSE
## 4089
#전체 변수를 넣고 회귀분석
lm(formula = colGPA ~., data = gpa) -> model
summary(model)
##
## Call:
## lm(formula = colGPA ~ ., data = gpa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70062 -0.19192 0.01191 0.22293 0.56458
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3855097 0.8384647 0.460 0.64655
## age 0.0346817 0.0312473 1.110 0.26937
## soph 0.3317017 0.2513325 1.320 0.18956
## junior 0.0330885 0.1241377 0.267 0.79030
## senior -0.0005738 0.1073523 -0.005 0.99574
## senior5 NA NA NA NA
## male 0.0610685 0.0703393 0.868 0.38711
## campus -0.0929607 0.0832369 -1.117 0.26642
## business 0.0767623 0.0842040 0.912 0.36389
## engineer -0.1847074 0.1837136 -1.005 0.31683
## hsGPA 0.4520842 0.1055536 4.283 3.87e-05 ***
## ACT 0.0082167 0.0114974 0.715 0.47628
## job19 0.0025063 0.0673896 0.037 0.97040
## job20 -0.0454327 0.0875966 -0.519 0.60500
## drive 0.0135408 0.0904015 0.150 0.88120
## bike -0.0233484 0.0698659 -0.334 0.73885
## walk NA NA NA NA
## voluntr -0.0667434 0.0776006 -0.860 0.39155
## PC 0.1352781 0.0620784 2.179 0.03138 *
## greek 0.0750345 0.0652210 1.150 0.25236
## car -0.0571927 0.0732231 -0.781 0.43638
## siblings -0.0883061 0.1242033 -0.711 0.47855
## bgfriend 0.0993964 0.0599499 1.658 0.10007
## clubs 0.0932476 0.0611600 1.525 0.13012
## skipped -0.0889829 0.0323827 -2.748 0.00698 **
## alcohol 0.0181863 0.0262423 0.693 0.48971
## gradMI 0.1783460 0.0898218 1.986 0.04948 *
## fathcoll 0.0500783 0.0649191 0.771 0.44207
## mothcoll -0.0572119 0.0691025 -0.828 0.40944
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3234 on 114 degrees of freedom
## Multiple R-squared: 0.3858, Adjusted R-squared: 0.2457
## F-statistic: 2.754 on 26 and 114 DF, p-value: 0.0001204
#기여도가 낮은 변수를 제거
new_model <- step(model, direction = 'backward')
## Start: AIC=-294.35
## colGPA ~ age + soph + junior + senior + senior5 + male + campus +
## business + engineer + hsGPA + ACT + job19 + job20 + drive +
## bike + walk + voluntr + PC + greek + car + siblings + bgfriend +
## clubs + skipped + alcohol + gradMI + fathcoll + mothcoll
##
##
## Step: AIC=-294.35
## colGPA ~ age + soph + junior + senior + senior5 + male + campus +
## business + engineer + hsGPA + ACT + job19 + job20 + drive +
## bike + voluntr + PC + greek + car + siblings + bgfriend +
## clubs + skipped + alcohol + gradMI + fathcoll + mothcoll
##
##
## Step: AIC=-294.35
## colGPA ~ age + soph + junior + senior + male + campus + business +
## engineer + hsGPA + ACT + job19 + job20 + drive + bike + voluntr +
## PC + greek + car + siblings + bgfriend + clubs + skipped +
## alcohol + gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - senior 1 0.00000 11.920 -296.35
## - job19 1 0.00014 11.920 -296.35
## - drive 1 0.00235 11.922 -296.32
## - junior 1 0.00743 11.927 -296.26
## - bike 1 0.01168 11.931 -296.21
## - job20 1 0.02813 11.948 -296.02
## - alcohol 1 0.05022 11.970 -295.76
## - siblings 1 0.05285 11.973 -295.73
## - ACT 1 0.05340 11.973 -295.72
## - fathcoll 1 0.06222 11.982 -295.62
## - car 1 0.06379 11.983 -295.60
## - mothcoll 1 0.07167 11.991 -295.50
## - voluntr 1 0.07735 11.997 -295.44
## - male 1 0.07881 11.998 -295.42
## - business 1 0.08689 12.007 -295.32
## - engineer 1 0.10569 12.025 -295.11
## - age 1 0.12881 12.049 -294.83
## - campus 1 0.13042 12.050 -294.81
## - greek 1 0.13839 12.058 -294.72
## <none> 11.920 -294.35
## - soph 1 0.18212 12.102 -294.21
## - clubs 1 0.24305 12.163 -293.50
## - bgfriend 1 0.28743 12.207 -292.99
## - gradMI 1 0.41222 12.332 -291.56
## - PC 1 0.49652 12.416 -290.60
## - skipped 1 0.78950 12.709 -287.31
## - hsGPA 1 1.91802 13.838 -275.31
##
## Step: AIC=-296.35
## colGPA ~ age + soph + junior + male + campus + business + engineer +
## hsGPA + ACT + job19 + job20 + drive + bike + voluntr + PC +
## greek + car + siblings + bgfriend + clubs + skipped + alcohol +
## gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - job19 1 0.00015 11.920 -298.35
## - drive 1 0.00234 11.922 -298.32
## - bike 1 0.01173 11.931 -298.21
## - junior 1 0.02365 11.943 -298.07
## - job20 1 0.02813 11.948 -298.02
## - alcohol 1 0.05042 11.970 -297.75
## - siblings 1 0.05299 11.973 -297.72
## - ACT 1 0.05340 11.973 -297.72
## - fathcoll 1 0.06234 11.982 -297.61
## - car 1 0.06389 11.984 -297.60
## - mothcoll 1 0.07224 11.992 -297.50
## - voluntr 1 0.07833 11.998 -297.43
## - male 1 0.07983 12.000 -297.41
## - business 1 0.09121 12.011 -297.27
## - engineer 1 0.10911 12.029 -297.06
## - campus 1 0.13070 12.050 -296.81
## - age 1 0.13332 12.053 -296.78
## - greek 1 0.13842 12.058 -296.72
## <none> 11.920 -296.35
## - soph 1 0.23516 12.155 -295.60
## - clubs 1 0.24531 12.165 -295.48
## - bgfriend 1 0.28753 12.207 -294.99
## - gradMI 1 0.41358 12.333 -293.54
## - PC 1 0.49920 12.419 -292.56
## - skipped 1 0.79111 12.711 -289.29
## - hsGPA 1 1.91816 13.838 -277.31
##
## Step: AIC=-298.35
## colGPA ~ age + soph + junior + male + campus + business + engineer +
## hsGPA + ACT + job20 + drive + bike + voluntr + PC + greek +
## car + siblings + bgfriend + clubs + skipped + alcohol + gradMI +
## fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - drive 1 0.00234 11.922 -300.32
## - bike 1 0.01173 11.932 -300.21
## - junior 1 0.02351 11.943 -300.07
## - job20 1 0.03718 11.957 -299.91
## - siblings 1 0.05285 11.973 -299.72
## - alcohol 1 0.05285 11.973 -299.72
## - ACT 1 0.05481 11.975 -299.70
## - fathcoll 1 0.06228 11.982 -299.61
## - car 1 0.06455 11.984 -299.59
## - mothcoll 1 0.07250 11.992 -299.49
## - voluntr 1 0.07818 11.998 -299.43
## - male 1 0.07969 12.000 -299.41
## - business 1 0.09122 12.011 -299.27
## - engineer 1 0.10919 12.029 -299.06
## - campus 1 0.13056 12.050 -298.81
## - age 1 0.13374 12.054 -298.77
## - greek 1 0.13854 12.058 -298.72
## <none> 11.920 -298.35
## - soph 1 0.23526 12.155 -297.59
## - clubs 1 0.24574 12.166 -297.47
## - bgfriend 1 0.28810 12.208 -296.98
## - gradMI 1 0.42394 12.344 -295.42
## - PC 1 0.49910 12.419 -294.56
## - skipped 1 0.79254 12.712 -291.27
## - hsGPA 1 1.91815 13.838 -279.31
##
## Step: AIC=-300.32
## colGPA ~ age + soph + junior + male + campus + business + engineer +
## hsGPA + ACT + job20 + bike + voluntr + PC + greek + car +
## siblings + bgfriend + clubs + skipped + alcohol + gradMI +
## fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - bike 1 0.01705 11.939 -302.12
## - junior 1 0.02500 11.947 -302.02
## - job20 1 0.03837 11.961 -301.87
## - ACT 1 0.05250 11.975 -301.70
## - alcohol 1 0.05359 11.976 -301.69
## - siblings 1 0.05563 11.978 -301.66
## - car 1 0.06237 11.985 -301.58
## - fathcoll 1 0.06569 11.988 -301.55
## - mothcoll 1 0.07098 11.993 -301.48
## - male 1 0.07807 12.000 -301.40
## - voluntr 1 0.08041 12.003 -301.37
## - business 1 0.08888 12.011 -301.27
## - engineer 1 0.11415 12.036 -300.98
## - greek 1 0.14355 12.066 -300.63
## - campus 1 0.14376 12.066 -300.63
## - age 1 0.15500 12.077 -300.50
## <none> 11.922 -300.32
## - soph 1 0.23662 12.159 -299.55
## - clubs 1 0.24344 12.166 -299.47
## - bgfriend 1 0.28885 12.211 -298.94
## - gradMI 1 0.43770 12.360 -297.24
## - PC 1 0.50717 12.429 -296.45
## - skipped 1 0.82046 12.743 -292.94
## - hsGPA 1 1.92122 13.843 -281.25
##
## Step: AIC=-302.12
## colGPA ~ age + soph + junior + male + campus + business + engineer +
## hsGPA + ACT + job20 + voluntr + PC + greek + car + siblings +
## bgfriend + clubs + skipped + alcohol + gradMI + fathcoll +
## mothcoll
##
## Df Sum of Sq RSS AIC
## - junior 1 0.02555 11.965 -303.82
## - job20 1 0.03315 11.972 -303.73
## - alcohol 1 0.04504 11.984 -303.59
## - ACT 1 0.04556 11.985 -303.58
## - siblings 1 0.05435 11.994 -303.48
## - car 1 0.05932 11.999 -303.42
## - mothcoll 1 0.05983 11.999 -303.41
## - fathcoll 1 0.06727 12.007 -303.33
## - male 1 0.07099 12.010 -303.28
## - voluntr 1 0.08774 12.027 -303.09
## - business 1 0.09078 12.030 -303.05
## - engineer 1 0.13053 12.070 -302.58
## - greek 1 0.13157 12.071 -302.57
## - campus 1 0.13449 12.074 -302.54
## - age 1 0.16919 12.108 -302.13
## <none> 11.939 -302.12
## - clubs 1 0.24616 12.185 -301.24
## - soph 1 0.26029 12.200 -301.08
## - bgfriend 1 0.27929 12.219 -300.86
## - gradMI 1 0.42824 12.367 -299.15
## - PC 1 0.55252 12.492 -297.74
## - skipped 1 0.83291 12.772 -294.61
## - hsGPA 1 1.90663 13.846 -283.23
##
## Step: AIC=-303.82
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + job20 + voluntr + PC + greek + car + siblings + bgfriend +
## clubs + skipped + alcohol + gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - job20 1 0.03553 12.000 -305.40
## - alcohol 1 0.03853 12.003 -305.36
## - ACT 1 0.04933 12.014 -305.24
## - siblings 1 0.05339 12.018 -305.19
## - car 1 0.06203 12.027 -305.09
## - mothcoll 1 0.06682 12.032 -305.03
## - fathcoll 1 0.06806 12.033 -305.02
## - male 1 0.08423 12.049 -304.83
## - voluntr 1 0.09372 12.059 -304.72
## - business 1 0.10465 12.069 -304.59
## - greek 1 0.13141 12.096 -304.28
## - engineer 1 0.13689 12.102 -304.21
## - age 1 0.14514 12.110 -304.12
## - campus 1 0.14562 12.110 -304.11
## <none> 11.965 -303.82
## - soph 1 0.23579 12.201 -303.07
## - clubs 1 0.25145 12.216 -302.88
## - bgfriend 1 0.29116 12.256 -302.43
## - gradMI 1 0.44244 12.407 -300.70
## - PC 1 0.54404 12.509 -299.55
## - skipped 1 0.82856 12.793 -296.38
## - hsGPA 1 1.95851 13.923 -284.44
##
## Step: AIC=-305.4
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + voluntr + PC + greek + car + siblings + bgfriend +
## clubs + skipped + alcohol + gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - alcohol 1 0.04426 12.045 -306.88
## - ACT 1 0.04820 12.049 -306.83
## - siblings 1 0.05021 12.051 -306.81
## - mothcoll 1 0.06874 12.069 -306.59
## - car 1 0.07378 12.074 -306.54
## - fathcoll 1 0.07790 12.078 -306.49
## - male 1 0.07795 12.078 -306.49
## - business 1 0.10466 12.105 -306.18
## - voluntr 1 0.10699 12.107 -306.15
## - age 1 0.13481 12.135 -305.82
## - greek 1 0.13946 12.140 -305.77
## - campus 1 0.14084 12.141 -305.75
## - engineer 1 0.15460 12.155 -305.59
## <none> 12.000 -305.40
## - soph 1 0.22044 12.221 -304.83
## - clubs 1 0.25663 12.257 -304.42
## - bgfriend 1 0.26868 12.269 -304.28
## - gradMI 1 0.42739 12.428 -302.46
## - PC 1 0.61103 12.611 -300.40
## - skipped 1 0.80817 12.809 -298.21
## - hsGPA 1 1.95083 13.951 -286.16
##
## Step: AIC=-306.88
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + voluntr + PC + greek + car + siblings + bgfriend +
## clubs + skipped + gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - siblings 1 0.05002 12.095 -308.30
## - ACT 1 0.05671 12.101 -308.22
## - car 1 0.06154 12.106 -308.16
## - mothcoll 1 0.06173 12.106 -308.16
## - fathcoll 1 0.10514 12.150 -307.65
## - male 1 0.12081 12.165 -307.47
## - voluntr 1 0.12283 12.167 -307.45
## - business 1 0.12907 12.174 -307.38
## - engineer 1 0.13980 12.184 -307.25
## - age 1 0.14354 12.188 -307.21
## - greek 1 0.16177 12.206 -307.00
## <none> 12.045 -306.88
## - campus 1 0.18270 12.227 -306.76
## - soph 1 0.20584 12.251 -306.49
## - bgfriend 1 0.25613 12.301 -305.91
## - clubs 1 0.26032 12.305 -305.87
## - gradMI 1 0.44169 12.486 -303.80
## - PC 1 0.60890 12.653 -301.93
## - skipped 1 0.76458 12.809 -300.20
## - hsGPA 1 1.98614 14.031 -287.36
##
## Step: AIC=-308.3
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + voluntr + PC + greek + car + bgfriend + clubs + skipped +
## gradMI + fathcoll + mothcoll
##
## Df Sum of Sq RSS AIC
## - mothcoll 1 0.03831 12.133 -309.85
## - car 1 0.04612 12.141 -309.76
## - ACT 1 0.05186 12.146 -309.69
## - male 1 0.09700 12.192 -309.17
## - voluntr 1 0.10239 12.197 -309.11
## - fathcoll 1 0.10293 12.198 -309.10
## - business 1 0.12397 12.219 -308.86
## - age 1 0.15434 12.249 -308.51
## - engineer 1 0.15545 12.250 -308.50
## - campus 1 0.16851 12.263 -308.35
## - greek 1 0.16857 12.263 -308.34
## <none> 12.095 -308.30
## - soph 1 0.21539 12.310 -307.81
## - bgfriend 1 0.23687 12.332 -307.56
## - clubs 1 0.24007 12.335 -307.52
## - gradMI 1 0.44238 12.537 -305.23
## - PC 1 0.61919 12.714 -303.26
## - skipped 1 0.74664 12.841 -301.85
## - hsGPA 1 1.96287 14.057 -289.09
##
## Step: AIC=-309.85
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + voluntr + PC + greek + car + bgfriend + clubs + skipped +
## gradMI + fathcoll
##
## Df Sum of Sq RSS AIC
## - car 1 0.03171 12.165 -311.48
## - ACT 1 0.04557 12.178 -311.32
## - fathcoll 1 0.07361 12.207 -311.00
## - voluntr 1 0.08496 12.218 -310.87
## - male 1 0.12429 12.257 -310.41
## - business 1 0.13112 12.264 -310.33
## - greek 1 0.14698 12.280 -310.15
## - campus 1 0.15950 12.292 -310.01
## - engineer 1 0.16849 12.301 -309.90
## <none> 12.133 -309.85
## - age 1 0.18257 12.316 -309.74
## - clubs 1 0.21481 12.348 -309.38
## - soph 1 0.23418 12.367 -309.15
## - bgfriend 1 0.23628 12.369 -309.13
## - gradMI 1 0.45266 12.586 -306.69
## - PC 1 0.59784 12.731 -305.07
## - skipped 1 0.71068 12.844 -303.82
## - hsGPA 1 2.09668 14.230 -289.37
##
## Step: AIC=-311.48
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## ACT + voluntr + PC + greek + bgfriend + clubs + skipped +
## gradMI + fathcoll
##
## Df Sum of Sq RSS AIC
## - ACT 1 0.04773 12.212 -312.93
## - fathcoll 1 0.07056 12.235 -312.67
## - voluntr 1 0.09270 12.257 -312.41
## - male 1 0.12846 12.293 -312.00
## - business 1 0.13238 12.297 -311.96
## - greek 1 0.14683 12.312 -311.79
## - campus 1 0.15809 12.323 -311.66
## <none> 12.165 -311.48
## - engineer 1 0.17393 12.339 -311.48
## - age 1 0.17568 12.340 -311.46
## - clubs 1 0.21428 12.379 -311.02
## - bgfriend 1 0.23645 12.401 -310.77
## - soph 1 0.25459 12.419 -310.56
## - gradMI 1 0.43559 12.600 -308.52
## - PC 1 0.58343 12.748 -306.88
## - skipped 1 0.71268 12.877 -305.45
## - hsGPA 1 2.29409 14.459 -289.12
##
## Step: AIC=-312.93
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## voluntr + PC + greek + bgfriend + clubs + skipped + gradMI +
## fathcoll
##
## Df Sum of Sq RSS AIC
## - fathcoll 1 0.08268 12.295 -313.98
## - voluntr 1 0.08472 12.297 -313.95
## - business 1 0.13292 12.345 -313.40
## - greek 1 0.14905 12.361 -313.22
## - campus 1 0.16914 12.382 -312.99
## <none> 12.212 -312.93
## - engineer 1 0.17940 12.392 -312.87
## - male 1 0.18231 12.395 -312.84
## - age 1 0.18416 12.396 -312.82
## - clubs 1 0.21296 12.425 -312.49
## - bgfriend 1 0.26238 12.475 -311.93
## - soph 1 0.30008 12.512 -311.51
## - gradMI 1 0.43195 12.644 -310.03
## - PC 1 0.60352 12.816 -308.13
## - skipped 1 0.67995 12.892 -307.29
## - hsGPA 1 3.01974 15.232 -283.77
##
## Step: AIC=-313.98
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## voluntr + PC + greek + bgfriend + clubs + skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - voluntr 1 0.07189 12.367 -315.16
## - greek 1 0.13269 12.428 -314.46
## - business 1 0.14695 12.442 -314.30
## - age 1 0.16598 12.461 -314.09
## - campus 1 0.17439 12.469 -313.99
## <none> 12.295 -313.98
## - engineer 1 0.18186 12.477 -313.91
## - clubs 1 0.22743 12.523 -313.39
## - male 1 0.24590 12.541 -313.19
## - soph 1 0.27034 12.565 -312.91
## - bgfriend 1 0.28969 12.585 -312.69
## - gradMI 1 0.38874 12.684 -311.59
## - PC 1 0.69656 12.992 -308.21
## - skipped 1 0.70256 12.998 -308.14
## - hsGPA 1 3.05308 15.348 -284.70
##
## Step: AIC=-315.16
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## PC + greek + bgfriend + clubs + skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - greek 1 0.09759 12.464 -316.05
## - business 1 0.13370 12.501 -315.64
## - age 1 0.16343 12.530 -315.31
## <none> 12.367 -315.16
## - clubs 1 0.18278 12.550 -315.09
## - campus 1 0.20987 12.577 -314.78
## - engineer 1 0.25358 12.620 -314.29
## - bgfriend 1 0.27627 12.643 -314.04
## - male 1 0.28584 12.653 -313.93
## - soph 1 0.29563 12.663 -313.82
## - gradMI 1 0.40923 12.776 -312.57
## - skipped 1 0.65556 13.023 -309.87
## - PC 1 0.70729 13.074 -309.31
## - hsGPA 1 3.09130 15.458 -285.70
##
## Step: AIC=-316.05
## colGPA ~ age + soph + male + campus + business + engineer + hsGPA +
## PC + bgfriend + clubs + skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - business 1 0.1455 12.610 -316.41
## - age 1 0.1557 12.620 -316.30
## - clubs 1 0.1710 12.636 -316.13
## <none> 12.464 -316.05
## - campus 1 0.2486 12.713 -315.26
## - soph 1 0.2611 12.726 -315.12
## - engineer 1 0.2627 12.727 -315.11
## - bgfriend 1 0.2769 12.741 -314.95
## - male 1 0.3065 12.771 -314.62
## - gradMI 1 0.4136 12.878 -313.45
## - skipped 1 0.5987 13.063 -311.43
## - PC 1 0.7431 13.208 -309.88
## - hsGPA 1 3.2038 15.668 -285.79
##
## Step: AIC=-316.41
## colGPA ~ age + soph + male + campus + engineer + hsGPA + PC +
## bgfriend + clubs + skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - age 1 0.07654 12.687 -317.56
## - clubs 1 0.16169 12.772 -316.62
## <none> 12.610 -316.41
## - soph 1 0.25386 12.864 -315.60
## - bgfriend 1 0.27566 12.886 -315.36
## - campus 1 0.30437 12.914 -315.05
## - male 1 0.31984 12.930 -314.88
## - gradMI 1 0.41307 13.023 -313.87
## - engineer 1 0.48970 13.100 -313.04
## - skipped 1 0.65016 13.260 -311.32
## - PC 1 0.72615 13.336 -310.52
## - hsGPA 1 3.08992 15.700 -287.51
##
## Step: AIC=-317.56
## colGPA ~ soph + male + campus + engineer + hsGPA + PC + bgfriend +
## clubs + skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - clubs 1 0.17297 12.860 -317.65
## <none> 12.687 -317.56
## - soph 1 0.21626 12.903 -317.18
## - campus 1 0.32376 13.010 -316.01
## - male 1 0.33962 13.026 -315.83
## - bgfriend 1 0.35088 13.037 -315.71
## - gradMI 1 0.42888 13.116 -314.87
## - engineer 1 0.45989 13.146 -314.54
## - PC 1 0.68623 13.373 -312.13
## - skipped 1 0.72288 13.409 -311.74
## - hsGPA 1 3.03775 15.724 -289.29
##
## Step: AIC=-317.65
## colGPA ~ soph + male + campus + engineer + hsGPA + PC + bgfriend +
## skipped + gradMI
##
## Df Sum of Sq RSS AIC
## - soph 1 0.1784 13.038 -317.71
## <none> 12.860 -317.65
## - campus 1 0.2890 13.149 -316.51
## - male 1 0.3396 13.199 -315.97
## - bgfriend 1 0.3902 13.250 -315.43
## - engineer 1 0.4057 13.265 -315.27
## - gradMI 1 0.4289 13.289 -315.02
## - PC 1 0.6820 13.542 -312.36
## - skipped 1 0.8116 13.671 -311.02
## - hsGPA 1 3.2761 16.136 -287.65
##
## Step: AIC=-317.71
## colGPA ~ male + campus + engineer + hsGPA + PC + bgfriend + skipped +
## gradMI
##
## Df Sum of Sq RSS AIC
## <none> 13.038 -317.71
## - campus 1 0.2652 13.303 -316.87
## - engineer 1 0.3191 13.357 -316.30
## - bgfriend 1 0.3344 13.372 -316.14
## - male 1 0.3363 13.374 -316.12
## - gradMI 1 0.3889 13.427 -315.56
## - PC 1 0.7243 13.762 -312.08
## - skipped 1 0.7653 13.803 -311.66
## - hsGPA 1 3.4741 16.512 -286.40
summary(new_model)
##
## Call:
## lm(formula = colGPA ~ male + campus + engineer + hsGPA + PC +
## bgfriend + skipped + gradMI, data = gpa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85577 -0.22859 -0.02129 0.22900 0.74754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.12762 0.31802 3.546 0.000542 ***
## male 0.10696 0.05797 1.845 0.067250 .
## campus -0.12079 0.07371 -1.639 0.103664
## engineer -0.26659 0.14833 -1.797 0.074571 .
## hsGPA 0.50911 0.08584 5.931 2.49e-08 ***
## PC 0.15307 0.05653 2.708 0.007666 **
## bgfriend 0.10062 0.05469 1.840 0.068020 .
## skipped -0.07248 0.02604 -2.784 0.006167 **
## gradMI 0.16086 0.08106 1.984 0.049287 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3143 on 132 degrees of freedom
## Multiple R-squared: 0.3281, Adjusted R-squared: 0.2874
## F-statistic: 8.059 on 8 and 132 DF, p-value: 8.088e-09
\[y = 1.12762 + 0.50911*hs + 0.15307*PC + (-0.07248*skip) + 0.16086*MI +0.10696*male + (-0.26659*eng) + 0.10062*bg + (-0.12079*cam)\]
t분포에서 나온 p값이 보통은 0.05보다 작은 것을 유의미하다고 판단한다. 하지만, 예측의 정확성을 높이기 위하여 p값이 0.05보다 작은 수준을 유지하면서 유의수준을 약 1로 올려 변수를 선정하였다. 예측한 값은 28%의 신뢰도를 갖고 p값이 0.05보다 작으므로 통계적으로 유의하다.
lm(formula = colGPA ~ hsGPA + skipped + PC, data = gpa) -> ex_model
summary(ex_model)
##
## Call:
## lm(formula = colGPA ~ hsGPA + skipped + PC, data = gpa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82176 -0.23846 -0.03308 0.23807 0.78926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.52658 0.29992 5.090 1.16e-06 ***
## hsGPA 0.45543 0.08644 5.269 5.21e-07 ***
## skipped -0.06547 0.02595 -2.523 0.0128 *
## PC 0.12888 0.05734 2.248 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3259 on 137 degrees of freedom
## Multiple R-squared: 0.2504, Adjusted R-squared: 0.234
## F-statistic: 15.25 on 3 and 137 DF, p-value: 1.281e-08
# 예측한 값의 신뢰도가 23%이므로 p값이 0.05보다 작은 수준을 유지하면서 유의수준을 조정하여 위의 최종 회귀모형이 나왔다.