여자자료를 이용하여 \(p=1,2,3,...6\)일 때 총6개의 팔다리뼈 길이 중 \(p\)개의 팔다리뼈길이\((x_1, ..., x_p)\)를
이용하여 사망자의 생존 시 키\((y_i)\)를 예측하는 최적 선형모형\((H_p)\)을 각각 찾으시오.
rm(list=ls())
options(scipen=100)
options(crayon.enabled = FALSE)
setwd("D:\\0_grad\\data")
library(tidyverse)
library(readxl)
library(optimx)
library(leaps)
library(knitr)
library(kableExtra)
##### Pre
fdb <- read_xls("Locomotive-FDB-datasets.xls", sheet = 2) #female=0, male=1
fdb$Gender <- as.factor(fdb$Gender)
summary(fdb)
## Gender Stature Humerus Radius Ulna
## 0:40 Min. :143.0 Min. :280.0 Min. :201.0 Min. :215.0
## 1:47 1st Qu.:165.0 1st Qu.:309.5 1st Qu.:235.5 1st Qu.:250.5
## Median :170.0 Median :325.0 Median :249.0 Median :266.0
## Mean :171.5 Mean :327.6 Mean :248.9 Mean :266.4
## 3rd Qu.:179.5 3rd Qu.:343.5 3rd Qu.:262.5 3rd Qu.:279.5
## Max. :198.0 Max. :390.0 Max. :299.0 Max. :318.0
## Femur Tibia Fibula
## Min. :383.0 Min. :334.0 Min. :318.0
## 1st Qu.:441.5 1st Qu.:363.0 1st Qu.:356.0
## Median :463.0 Median :386.0 Median :377.0
## Mean :464.6 Mean :386.2 Mean :378.2
## 3rd Qu.:484.0 3rd Qu.:404.0 3rd Qu.:394.0
## Max. :547.0 Max. :479.0 Max. :466.0
# female data
fdb.female <- fdb[fdb$Gender==0, c(2:8)]
# male data
fdb.male <- fdb[fdb$Gender==1, c(2:8)]
reg.matrix <- as.matrix(expand.grid(c(0, 1), c(0, 1), c(0, 1), c(0, 1), c(0, 1), c(0, 1)))
names(reg.matrix) <- c("Humerus", "Radius", "Ulna", "Femur", "Tibia", "Fibula")
a.AIC <- function(matrix, data){
p <- sum(matrix)
lm <- lm(Stature ~ I(matrix[1]*Humerus) + I(matrix[2]*Radius) + I(matrix[3]*Ulna) + I(matrix[4]*Femur) +
I(matrix[5]*Tibia) + I(matrix[6]*Fibula), data = data)
SSE <- sum(lm$residuals^2)
sigma.p <- sqrt(SSE/nrow(data))
l.theta <- -(nrow(data)/2)*log(2*pi*sigma.p^2) - nrow(data)/2
AIC <- -2*l.theta + 2*(p+2)
return(c(p, matrix, AIC ))
}
AIC.table <- matrix(0, nrow=2^6-1, ncol=8)
for(i in 2:2^6){
AIC.table[i-1,] <- a.AIC(reg.matrix[i,], fdb.female)
}
AIC.table <- as.data.frame(AIC.table)
colnames(AIC.table) <- c("p", "Humerus", "Radius", "Ulna", "Femur", "Tibia", "Fibula", "AIC")
kable(AIC.table) %>% kable_styling(full_width = F, position = "left")
p | Humerus | Radius | Ulna | Femur | Tibia | Fibula | AIC |
---|---|---|---|---|---|---|---|
1 | 1 | 0 | 0 | 0 | 0 | 0 | 259.9771 |
1 | 0 | 1 | 0 | 0 | 0 | 0 | 269.1086 |
2 | 1 | 1 | 0 | 0 | 0 | 0 | 261.6635 |
1 | 0 | 0 | 1 | 0 | 0 | 0 | 271.7366 |
2 | 1 | 0 | 1 | 0 | 0 | 0 | 261.9126 |
2 | 0 | 1 | 1 | 0 | 0 | 0 | 270.4172 |
3 | 1 | 1 | 1 | 0 | 0 | 0 | 263.1569 |
1 | 0 | 0 | 0 | 1 | 0 | 0 | 237.7462 |
2 | 1 | 0 | 0 | 1 | 0 | 0 | 238.3343 |
2 | 0 | 1 | 0 | 1 | 0 | 0 | 238.8233 |
3 | 1 | 1 | 0 | 1 | 0 | 0 | 237.1838 |
2 | 0 | 0 | 1 | 1 | 0 | 0 | 239.1219 |
3 | 1 | 0 | 1 | 1 | 0 | 0 | 238.0431 |
3 | 0 | 1 | 1 | 1 | 0 | 0 | 240.7194 |
4 | 1 | 1 | 1 | 1 | 0 | 0 | 239.0882 |
1 | 0 | 0 | 0 | 0 | 1 | 0 | 256.2848 |
2 | 1 | 0 | 0 | 0 | 1 | 0 | 255.7972 |
2 | 0 | 1 | 0 | 0 | 1 | 0 | 258.2440 |
3 | 1 | 1 | 0 | 0 | 1 | 0 | 257.2224 |
2 | 0 | 0 | 1 | 0 | 1 | 0 | 258.0804 |
3 | 1 | 0 | 1 | 0 | 1 | 0 | 256.8880 |
3 | 0 | 1 | 1 | 0 | 1 | 0 | 259.7961 |
4 | 1 | 1 | 1 | 0 | 1 | 0 | 258.8199 |
2 | 0 | 0 | 0 | 1 | 1 | 0 | 239.6103 |
3 | 1 | 0 | 0 | 1 | 1 | 0 | 239.0677 |
3 | 0 | 1 | 0 | 1 | 1 | 0 | 240.8151 |
4 | 1 | 1 | 0 | 1 | 1 | 0 | 238.7761 |
3 | 0 | 0 | 1 | 1 | 1 | 0 | 241.1219 |
4 | 1 | 0 | 1 | 1 | 1 | 0 | 239.5636 |
4 | 0 | 1 | 1 | 1 | 1 | 0 | 242.7162 |
5 | 1 | 1 | 1 | 1 | 1 | 0 | 240.6299 |
1 | 0 | 0 | 0 | 0 | 0 | 1 | 254.1055 |
2 | 1 | 0 | 0 | 0 | 0 | 1 | 253.0946 |
2 | 0 | 1 | 0 | 0 | 0 | 1 | 255.8753 |
3 | 1 | 1 | 0 | 0 | 0 | 1 | 253.4944 |
2 | 0 | 0 | 1 | 0 | 0 | 1 | 255.3335 |
3 | 1 | 0 | 1 | 0 | 0 | 1 | 252.6699 |
3 | 0 | 1 | 1 | 0 | 0 | 1 | 256.7320 |
4 | 1 | 1 | 1 | 0 | 0 | 1 | 254.5758 |
2 | 0 | 0 | 0 | 1 | 0 | 1 | 239.7362 |
3 | 1 | 0 | 0 | 1 | 0 | 1 | 240.2033 |
3 | 0 | 1 | 0 | 1 | 0 | 1 | 240.4046 |
4 | 1 | 1 | 0 | 1 | 0 | 1 | 239.0754 |
3 | 0 | 0 | 1 | 1 | 0 | 1 | 240.7552 |
4 | 1 | 0 | 1 | 1 | 0 | 1 | 239.9393 |
4 | 0 | 1 | 1 | 1 | 0 | 1 | 242.3736 |
5 | 1 | 1 | 1 | 1 | 0 | 1 | 241.0186 |
2 | 0 | 0 | 0 | 0 | 1 | 1 | 256.0422 |
3 | 1 | 0 | 0 | 0 | 1 | 1 | 254.8405 |
3 | 0 | 1 | 0 | 0 | 1 | 1 | 257.7656 |
4 | 1 | 1 | 0 | 0 | 1 | 1 | 255.2343 |
3 | 0 | 0 | 1 | 0 | 1 | 1 | 257.2406 |
4 | 1 | 0 | 1 | 0 | 1 | 1 | 254.2475 |
4 | 0 | 1 | 1 | 0 | 1 | 1 | 258.7188 |
5 | 1 | 1 | 1 | 0 | 1 | 1 | 256.0629 |
3 | 0 | 0 | 0 | 1 | 1 | 1 | 240.9977 |
4 | 1 | 0 | 0 | 1 | 1 | 1 | 240.0424 |
4 | 0 | 1 | 0 | 1 | 1 | 1 | 241.8762 |
5 | 1 | 1 | 0 | 1 | 1 | 1 | 238.6931 |
4 | 0 | 0 | 1 | 1 | 1 | 1 | 242.1036 |
5 | 1 | 0 | 1 | 1 | 1 | 1 | 239.3107 |
5 | 0 | 1 | 1 | 1 | 1 | 1 | 243.8708 |
6 | 1 | 1 | 1 | 1 | 1 | 1 | 240.6926 |
AIC_Hp <- AIC.table %>% group_by(p) %>% filter(AIC==min(AIC))
kable(AIC_Hp) %>% kable_styling(full_width = F, position = "left")
p | Humerus | Radius | Ulna | Femur | Tibia | Fibula | AIC |
---|---|---|---|---|---|---|---|
1 | 0 | 0 | 0 | 1 | 0 | 0 | 237.7462 |
2 | 1 | 0 | 0 | 1 | 0 | 0 | 238.3343 |
3 | 1 | 1 | 0 | 1 | 0 | 0 | 237.1838 |
4 | 1 | 1 | 0 | 1 | 1 | 0 | 238.7761 |
5 | 1 | 1 | 0 | 1 | 1 | 1 | 238.6931 |
6 | 1 | 1 | 1 | 1 | 1 | 1 | 240.6926 |
b) \((AIC(H_p), p)\)그래프를 그리고 AIC가 최소가 되는 최적 모형을 구하시오.
ggplot(AIC_Hp, aes(x=p, y=AIC)) +
geom_point() + geom_line() +
theme_bw() + scale_x_continuous(breaks = 1:6)
H.star <- lm(Stature ~ Humerus + Radius + Femur, data=fdb.female)
#Humerus=324 / Radius = 245 / Tibia = 372 /
#최적 방정식
H_c <- lm (Stature ~ Humerus+ Radius + Tibia, data=fdb.female)
new.data <- data.frame(Stature=172.72, Humerus=324, Radius = 245, Tibia = 372)
pred <- predict(H_c, new.data, interval = "predict", levels=.95)
colnames(pred) <- c("예측값", "Lwr", "Upr")
#예측값 및 예측 오차
kable(pred) %>% kable_styling(full_width = F, position = "left")
예측값 | Lwr | Upr |
---|---|---|
166.338 | 154.5363 | 178.1397 |
d) a,b)와 동일한 방법으로 남자의 팔다리뼈를 이용하여 키를 예측하는 최적모형을 찾고 남/녀 별 최적 예측방정식의 차이를 비교 검토하시오.
sub.m <- regsubsets(Stature ~ Humerus + Radius + Ulna + Femur + Tibia + Fibula,
data = fdb.male, nbest = 1, nvmax = NULL, method = "exhaustive")
# nbest = AIC 최적이 모형 1개만 보여줌
# nvmax = NULL, p = 1 ~ 6에 대한 모든 경우에 대해
summary(sub.m)
## Subset selection object
## Call: regsubsets.formula(Stature ~ Humerus + Radius + Ulna + Femur +
## Tibia + Fibula, data = fdb.male, nbest = 1, nvmax = NULL,
## method = "exhaustive")
## 6 Variables (and intercept)
## Forced in Forced out
## Humerus FALSE FALSE
## Radius FALSE FALSE
## Ulna FALSE FALSE
## Femur FALSE FALSE
## Tibia FALSE FALSE
## Fibula FALSE FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
## Humerus Radius Ulna Femur Tibia Fibula
## 1 ( 1 ) " " " " " " " " " " "*"
## 2 ( 1 ) " " " " " " "*" " " "*"
## 3 ( 1 ) " " " " "*" "*" " " "*"
## 4 ( 1 ) " " "*" "*" "*" " " "*"
## 5 ( 1 ) " " "*" "*" "*" "*" "*"
## 6 ( 1 ) "*" "*" "*" "*" "*" "*"
tf.matrix <- summary(sub.m)$which
lm.m <- list(NA)
AIC.m <- rep(0, 6)
for (i in 1:6) {
lm.m[[i]] <- lm(Stature ~ ., data = fdb.male[, tf.matrix[i,]])
AIC.m[i] <-AIC(lm.m[[i]])
}
AIC.data <- data.frame(p=c(1:6), AIC.m)
ggplot(AIC.data, aes(x=p, y=AIC.m)) +
geom_point() + geom_line() + theme_bw()
H.male.star <- lm(Stature ~ Femur + Fibula, data=fdb.male)
e) 남녀자료를 합쳐서 성별 및 팔다리뼈 길이 성별과 팔다리뼈의 교호작용(interactions) 을 가능한 독립변수로 사용하여
사망자의 키를 예측하는 최적 방정식을 구하시오.
data.e <- as.numeric(fdb$Gender)*fdb[,3:8]
names(data.e) <- c("G.Humerus", "G.Radius", "G.Ulna", "G.Femur", "G.Tibia", "G.Fibula")
data.final <- cbind(fdb[,2], fdb[,-2], data.e)
sub.e <- regsubsets(Stature ~ . + Gender*.,
data = fdb, nbest = 1, nvmax = NULL, method = "exhaustive")
e.matrix <- summary(sub.e)$which
lm.e <- list(NA)
AIC.e <- rep(0, 13)
for (i in 1:13) {
lm.e[[i]] <- lm(Stature ~ ., data = data.final[, e.matrix[i,]])
AIC.e[i] <-AIC(lm.e[[i]])
}
AIC.e.data <- data.frame(p=c(1:13), AIC.e)
ggplot(AIC.e.data, aes(x=p, y=AIC.e)) +
geom_point() + geom_line() + theme_bw() + scale_x_continuous(breaks = c(1:13))
# final.model <- lm.e[[which(min(AIC.e)==AIC.e)]]