library(AppliedPredictiveModeling)
data(hepatic)
# See page 8 of the text for a description of the dataset. Ignore the chem predictors, and ony use the bio
# predictors.
# 1. Logistic Regression
# For this probelm, do not worry about pre-processing or model tuning.
# a. Split the data into a training and test set. Decide whether to use random or stratefied sampling and
# explain your decision.
# b. Use logistic regression and the one-vs-all strategy to build a classification model.
# c. Predict the class for each of the samples in the test set.
# d. Print the accuracy of your model
## 加载包
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
## a. Split the data into a training and test set. use random
# 从281个中抽取211个作为训练集
bio$injury <- injury
nfolds <- round(281*0.75)
nfolds
## [1] 211
set.seed(12334)
ind <- sample(seq(1,281,by = 1),nfolds)
## 训练集
biotrain <- bio[ind,]
## 测试集
biotest <- bio[-ind,]
## 复制数据
biotrainlog <- biotrain
biotestlog <- biotest
## 数据类别
class <- unique(injury)
output <- data.frame()
# b. Use logistic regression and the one-vs-all strategy to build a classification model.
for(i in 1:length(unique(injury))){
biotrainlog$injury <- ifelse(biotrain$injury==class[i],1,0)
biotestlog$injury <- ifelse(biotest$injury==class[i],1,0)
logits <- glm(injury~.,data=biotrainlog, family=binomial(link = "logit"))
# c. Predict the class for each of the samples in the test set.
pred <- predict(logits, biotestlog,type="response")
accuracy <- table(pred, biotestlog$injury)
acc <- sum(diag(accuracy))/sum(accuracy)
output <- rbind(output,data.frame(class=class[i],Accuracy=acc))
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
# d. Print the accuracy of your model
output
## class Accuracy
## 1 Mild 0.1571429
## 2 None 0.2000000
## 3 Severe 0.4714286