Subject Sex Age Peabody Time 1 Time 2 Time 3 1 Sub1 M 10.41 113 1.67935e-05 1.66073e-05 1.64011e-05 2 Sub1 M 10.41 113 1.21074e-05 1.20908e-05 1.20587e-05 3 Sub1 M 10.41 113 5.22465e-06 5.05071e-06 4.86925e-06
'data.frame': 160 obs. of 7 variables: $ Subject: Factor w/ 20 levels "Sub1","Sub10",..: 1 1 1 1 1 1 1 1 2 2 ... $ Sex : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ... $ Age : num 10.4 10.4 10.4 10.4 10.4 ... $ Peabody: int 113 113 113 113 113 113 113 113 119 119 ... $ Time 1 : num 1.68e-05 1.21e-05 5.22e-06 3.50e-06 9.27e-06 ... $ Time 2 : num 1.66e-05 1.21e-05 5.05e-06 3.21e-06 9.44e-06 ... $ Time 3 : num 1.64e-05 1.21e-05 4.87e-06 2.92e-06 9.61e-06 ...
Peabody Channel_2_Channel_1 Channel_3_Channel_1 Channel_3_Channel_2 1 113 0.2485940 0.46288574 0.21175610 2 119 0.7715194 -0.20377880 -0.29934220 3 77 0.8711941 0.23371935 0.20358104 4 112 0.2212408 0.06037749 0.08020694 5 115 0.3854612 0.29656393 0.15374033 6 93 0.7481600 0.46951380 0.23374250
'data.frame': 20 obs. of 6 variables: $ Peabody : int 113 119 77 112 115 93 105 125 109 86 ... $ Channel_2_Channel_1: num 0.249 0.772 0.871 0.221 0.385 ... $ Channel_3_Channel_1: num 0.4629 -0.2038 0.2337 0.0604 0.2966 ... $ Channel_3_Channel_2: num 0.2118 -0.2993 0.2036 0.0802 0.1537 ... $ Ch2_mean : num 1.09e-06 -4.08e-07 2.94e-07 -6.49e-07 3.64e-08 ... $ Ch2_var : num 6.07e-11 9.48e-11 4.13e-11 1.19e-10 1.35e-11 ...
dta <- readRDS("ML_NIRS_Connectivity_MeanVar.Rdata")
Peabodydta<- dta[-12,-c(1,2,3,5:11)]
# Log Transformation Do not Improve Performance
randomnum = 10 ; kf <- c(3,15)
cv.errors.svm<- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
cv.errors.tree<- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
cv.errors.rf<- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
cv.errors.boost<- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
svm_tune <- tune(svm,Peabody~., data =Peabodydta,
kernel="radial",
ranges = list(cost = seq(3, 10, 0.1),
gamma =seq(0, 1, 0.1)),
tunecontrol = tune.control(sampling = "cross",
cross = 10))
for (i in 1:randomnum){
set.seed(i)
for (k in kf[1]:kf[2]){
folds.svm <- sample(1:k,nrow(Peabodydta),replace =T)
mat <- matrix(NA,k,1)
for (j in 1:k){
rst_svm <- svm(Peabody~., data = Peabodydta[folds.svm != j,],
type = "eps-regression",
cost = svm_tune$best.model$cost, kernel = "radial",
gamma = svm_tune$best.model$gamma)
pred <- predict(rst_svm,Peabodydta[folds.svm == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.svm == j]-pred)^2,
na.rm = T)
}
cv.errors.svm[k-2,i] <- mean(mat[j,1],na.rm = T)
}
}
mean.cv.errors.svm <- mean(cv.errors.svm,na.rm = T) mean.cv.errors.svm
[1] 151.4226
for (i in 1:randomnum){
set.seed(i+105)
for (k in kf[1]:kf[2]){
folds.tree <- sample(1:k,nrow(Peabodydta),replace =T) ; mat <- matrix(NA,k,1)
for (j in 1:k){
treemodel <- tree(Peabody~. ,data = Peabodydta[folds.tree != j,])
if (! inherits(treemodel, "singlenode")){
treemodel2 <- prune.tree(treemodel,
newdata =Peabodydta[folds.tree != j,])
treemodel3 <- prune.tree(treemodel,
newdata = Peabodydta[folds.tree != j,],
best = treemodel2$size[which.min(treemodel2$dev)])
pred <- predict(treemodel3,Peabodydta[folds.tree == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.tree == j]-pred)^2,
na.rm = T)
} else {
pred <- predict(treemodel,Peabodydta[folds.tree == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.tree == j]-pred)^2,
na.rm = T)
}
}
cv.errors.tree[k-2,i] <- mean(mat[j,1],na.rm = T)
}
}
mean.cv.errors.tree <- mean(cv.errors.tree,na.rm = T) mean.cv.errors.tree
[1] 239.9233
TRF <- tuneRF(Peabodydta[,-1],Peabodydta[,1], ntreeTry=100001,stepFactor=1.5,trace = F)
for (i in 1:randomnum){
set.seed(i+105)
for (k in kf[1]:kf[2]){
folds.rf <- sample(1:k,nrow(Peabodydta),replace =T)
mat <- matrix(NA,k,1)
for (j in 1:k){
rst_rf <- randomForest(Peabody ~.,
data = Peabodydta[folds.rf != j,],
mtry = 14,ntree = 1000,
nodesize = 1,
importance = F)
pred <- predict(rst_rf,Peabodydta[folds.rf == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.rf == j]-pred)^2,
na.rm = T)
}
cv.errors.rf[k-2,i] <- mean(mat[j,1],na.rm = T)
}
}
mean.cv.errors.rf <- mean(cv.errors.rf,na.rm = T) mean.cv.errors.rf
[1] 147.9699
randomnum = 2 ###
for (i in 1:randomnum){
set.seed(i)
for (k in (kf[1]+7):kf[2]){
folds.boost <- sample(1:k,nrow(Peabodydta),replace =T)
mat <- matrix(NA,k,1)
for (j in 1:k){
rst_gbm <- gbm(Peabody ~ . ,
data = Peabodydta[folds.boost != j,],
distribution = "gaussian",
n.trees = 1000, cv.folds = 2,
interaction.depth = 10,
n.minobsinnode = 1, shrinkage = 0.001)
pred <- predict(rst_gbm,Peabodydta[folds.boost == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.boost== j]-pred)^2,
na.rm = T)
}
cv.errors.boost[k-2,i] <- mean(mat[j,1],na.rm = T)
}
}
mean.cv.errors.boost <- mean(cv.errors.boost,na.rm = T) mean.cv.errors.boost
[1] 126.5121
# variable information # R-Squared Interval ?
k <- 10 ; nvmax= 10 ; seed <- 50
mean.cv.errorsP <- matrix(NA,seed,nvmax,dimnames =list(NULL , paste (1:10) ))
for (m in 1:seed){
set.seed(m+105)
foldsP <- sample(1:k,nrow(Peabodydta),replace =T)
cv.errorsP <- matrix (NA ,k ,nvmax)
for(j in 1:k){
best.fit <- regsubsets(Peabody~. ,data=Peabodydta[foldsP != j,],
method ="forward",nvmax= nvmax)
for(i in 1:nvmax) {
pred <- predict(best.fit ,Peabodydta[foldsP == j,] , id=i)
cv.errorsP [j,i]=mean( (Peabodydta$Peabody[foldsP == j]-pred)^2 ,
na.rm = T)
}
}
mean.cv.errorsP[m,] <- apply(cv.errorsP ,2, mean,na.rm =T)
}
sort(apply(mean.cv.errorsP,2, mean,na.rm =T))[1:10]
1 2 3 4 5 6 7
459.4132 629.5945 853.4165 1191.0588 1498.7264 1787.4391 1831.0421
8 9 10
1950.2570 2153.6866 2208.4213
reg.bestP <- regsubsets(Peabody~. ,data=Peabodydta, nvmax =1) coef(reg.bestP,1)
(Intercept) Ch4_var 1.171425e+02 -2.009040e+11
cv.errors.lm<- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
Rs.lm <- matrix (NA ,kf[2] - kf[1] +1 ,randomnum)
for (i in 1:randomnum){
set.seed(i)
for (k in kf[1]:kf[2]){
folds.lm <- sample(1:k,nrow(Peabodydta),replace =T)
mat <- matrix(NA,k,1)
mat2 <- matrix(NA,k,1)
for (j in 1:k){
best.fit <- lm(Peabody~Ch4_var ,data=Peabodydta[folds.lm != j,])
pred <- predict(best.fit ,Peabodydta[folds.lm == j,])
mat[j,1] = mean((Peabodydta$Peabody[folds.lm == j]-pred)^2,na.rm = T)
}
cv.errors.lm[k-2,i] <- mean(mat[j,1],na.rm = T)
}
}
mean.cv.errors.lm <- mean(cv.errors.lm,na.rm = T) mean.cv.errors.lm
[1] 79.22216
fit1 <- summary(lm.beta(lm(Peabody~Ch4_var,data=Peabodydta))) fit1$r.squared
[1] 0.4390768
fit1$adj.r.squared
[1] 0.4060813
# 如何以別的模型得到Fitted R-Squared ? # or 以LM 得到R-Squared Interval