##
## 0 1
## 0 119 30
## 1 5 27
## [1] "number in class 0"
## [1] 124
## [1] 0.8066298
## [1] 0.1933702
## [1] 1
## [1] 0.84375
## [1] 0.4736842
## [1] 0.9596774
## [1] 0.6067416
The caret package returns the same sensitivity and specificity as our functions. So long as we pass 1 in as the positive value, the results from our functions are the same as the results from the caret package.
confusionMatrix(classification_data$scored.class,classification_data$class,positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 119 30
## 1 5 27
##
## Accuracy : 0.8066
## 95% CI : (0.7415, 0.8615)
## No Information Rate : 0.6851
## P-Value [Acc > NIR] : 0.0001712
##
## Kappa : 0.4916
## Mcnemar's Test P-Value : 4.976e-05
##
## Sensitivity : 0.4737
## Specificity : 0.9597
## Pos Pred Value : 0.8438
## Neg Pred Value : 0.7987
## Prevalence : 0.3149
## Detection Rate : 0.1492
## Detection Prevalence : 0.1768
## Balanced Accuracy : 0.7167
##
## 'Positive' Class : 1
##
## $AUC
## sum(a, na.rm = TRUE)
## 1 0.8417516
## $plot
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(caret)))
suppressWarnings(suppressMessages(library(pROC)))
library(rmarkdown)
classification_data<-read.csv(‘C:/Users/dawig/Desktop/Data621/classification-output-data.csv’)
roc_tester<-classification_data
classification_data<-classification_data[,c(9:11)]
classification_data_ints<-classification_data
classification_data[,1]<- as.factor(classification_data[,1])
classification_data[,2]<- as.factor(classification_data[,2])
diffusion_categories<-function(df){
df %>%
mutate(real_factor_helper =(class +1)*2) %>%
mutate(real_factor = real_factor_helper+scored.class)->df
diffusion_c<- list(1,2,3,4,5)
diffusion_c[[1]]<-length(df$real_factor)
diffusion_c[[2]]<-length(which(df$real_factor == 2))
diffusion_c[[3]]<-length(which(df$real_factor == 3))
diffusion_c[[4]]<-length(which(df$real_factor == 4))
diffusion_c[[5]]<-length(which(df$real_factor == 5))
return(diffusion_c)
}
Table_Set<-diffusion_categories(classification_data_ints)
Table_Set<-matrix(c(Table_Set[2],Table_Set[3],Table_Set[4],Table_Set[5]),nrow =2)
table(classification_data\(scored.class,classification_data\)class)
print(‘number in class 0’)
print(length(classification_data[which(classification_data$class==0),1]))
accuracy_defined<-function(df){return((diffusion_categories(df)[[5]]+diffusion_categories(df)[[2]])/diffusion_categories(df)[[1]])}
classification_error_rate_defined<-function(df){return((diffusion_categories(df)[[3]]+diffusion_categories(df)[[4]])/diffusion_categories(df)[[1]])}
precision_defined<-function(df){return(diffusion_categories(df)[[5]]/(diffusion_categories(df)[[5]]+diffusion_categories(df)[[3]]))}
sensitivity_defined<-function(df){return(diffusion_categories(df)[[5]]/(diffusion_categories(df)[[5]]+diffusion_categories(df)[[4]]))}
specificity_defined<-function(df){return(diffusion_categories(df)[[2]]/(diffusion_categories(df)[[2]]+diffusion_categories(df)[[3]]))}
F1_score_defined<-function(df){return((2precision_defined(df)sensitivity_defined(df))/(precision_defined(df)+sensitivity_defined(df)))}
#Using our functions:
accuracy_defined(classification_data_ints)
classification_error_rate_defined(classification_data_ints)
accuracy_defined(classification_data_ints) + classification_error_rate_defined(classification_data_ints)
precision_defined(classification_data_ints)
sensitivity_defined(classification_data_ints)
specificity_defined(classification_data_ints)
F1_score_defined(classification_data_ints)
ROC_CURVE_illustrated<-function(df) {
#This class requires that the actual class be stored as ‘class’, predicted class as ‘scored.class’
#and percentage of prediction as ‘scored.probability’
x<-rep(4,192)
y<-rep(0,192)
sets<-cbind(x,y)
sets
for(i in 1:192){
threshold<-(.02+i*.005)
indices<-which(df\(scored.probability > threshold) df\)scored.class[indices]<-1
df\(scored.class[-indices]<-0 sets[i,1]<-1-specificity_defined(df) sets[i,2]<-sensitivity_defined(df) } sets<-as.data.frame(sets) sets %>% mutate(z = lag(x)-x) %>% mutate(a = (lag(y)+y)*.5*z) %>% summarize(sum(a,na.rm = TRUE))->area_under_curve #print(area_under_curve) title.color <- element_text(face = "bold", color = '#4941a3',size=18) plotA<-ggplot(data=sets)+geom_path(x=sets\)x,y=sets$y,color=‘#4941a3’,size=1)+ scale_x_continuous(limit =
c(0,1),breaks=c(seq(0,1,.1)))+
scale_y_continuous(limit = c(0,1),breaks=c(seq(0,1,.1)))+ theme(axis.text.x = element_text(angle = 70, hjust = .3, vjust = .4,size=14,color=‘#4941a3’),axis.text.y = element_text(size=14,color=‘#4941a3’),axis.title = element_text(size=12))+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_rect(fill = ‘#d7e6f7’),title = title.color)+labs(title=“ROC Curve”,y=‘True Positive’
,x=‘False Positive’)+geom_abline(slope=1,color=‘white’)
function_return<-list(plot=plotA,AUC=area_under_curve)
return(function_return)
}
ROC_Curve_Set<-ROC_CURVE_illustrated(roc_tester)
confusionMatrix(classification_data\(scored.class,classification_data\)class,positive=“1”)
print(ROC_Curve_Set[2])
print(ROC_Curve_Set[1])
roc_function_object<-roc(roc_tester\(class,roc_tester\)scored.probability)
plot(roc_function_object)