models.R

library(AppliedPredictiveModeling)
data(hepatic)

# See page 8 of the text for a description of the dataset. Ignore the chem predictors, and ony use the bio
# predictors.



# 1. Logistic Regression
# For this probelm, do not worry about pre-processing or model tuning.
# a. Split the data into a training and test set. Decide whether to use random or stratefied sampling and
# explain your decision.
# b. Use logistic regression and the one-vs-all strategy to build a classification model.
# c. Predict the class for each of the samples in the test set.
# d. Print the accuracy of your model

## 加载包
library(glmnet)

## Loading required package: Matrix

## Loading required package: foreach

## Loaded glmnet 2.0-5

##  a. Split the data into a training and test set. use random
# 从281个中抽取211个作为训练集
bio$injury <- injury
nfolds <- round(281*0.75)
nfolds

## [1] 211

set.seed(12334)
ind <- sample(seq(1,281,by = 1),nfolds)
## 训练集
biotrain <- bio[ind,]
## 测试集
biotest <- bio[-ind,]

## 复制数据
biotrainlog <- biotrain
biotestlog <- biotest

## 数据类别
class <- unique(injury)
output <- data.frame()
# b. Use logistic regression and the one-vs-all strategy to build a classification model.
for(i in 1:length(unique(injury))){
  biotrainlog$injury <- ifelse(biotrain$injury==class[i],1,0)
  biotestlog$injury <- ifelse(biotest$injury==class[i],1,0)
  logits <- glm(injury~.,data=biotrainlog, family=binomial(link = "logit"))
  # c. Predict the class for each of the samples in the test set.
  pred <-  predict(logits, biotestlog,type="response")
  accuracy <- table(pred, biotestlog$injury)
  acc <- sum(diag(accuracy))/sum(accuracy)
  output <- rbind(output,data.frame(class=class[i],Accuracy=acc))
  
}

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

# d. Print the accuracy of your model
output

##    class  Accuracy
## 1   Mild 0.1571429
## 2   None 0.2000000
## 3 Severe 0.4714286

models.R

daitu

Sun Mar 5 12:55:39 2017