#Load Packages Into Working Memory
library("ridittools")
library("Amelia")
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library("ggvis")
library("lattice")
library("ggplot2")
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:ggvis':
##
## resolution
library("e1071")
library("caret")
library("FactoMineR")
library("ROCR")
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library("arules")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:ggvis':
##
## band
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library("mlr")
## Loading required package: ParamHelpers
##
## Attaching package: 'mlr'
## The following object is masked from 'package:ROCR':
##
## performance
## The following object is masked from 'package:caret':
##
## train
## The following object is masked from 'package:e1071':
##
## impute
## The following object is masked from 'package:ridittools':
##
## acc
library("Matrix")
library("psych")
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:arules':
##
## intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("corrplot")
## corrplot 0.84 loaded
#############################################################################
# Part II: Data Import and Transformation
#############################################################################
#Load Dataset Into Working Memory
mammMasses <- read.csv("c:/users/Joshu/documents/datasets/mammMasses.csv")
#Reorder Columns so "Age"s is First and "Class" is Last
mammMasses <- mammMasses[c(2,1,3,4,5,6)]
#View the Structure of the Dataset
str(mammMasses)
## 'data.frame': 829 obs. of 6 variables:
## $ Age : int 67 58 28 57 76 42 36 60 54 52 ...
## $ BI_RADS: int 5 5 4 5 5 3 4 4 4 3 ...
## $ Shape : int 3 4 1 1 1 2 3 2 1 3 ...
## $ Margin : int 5 5 1 5 4 1 1 1 1 4 ...
## $ Density: int 3 3 3 3 3 3 2 2 3 3 ...
## $ Class : int 1 1 0 1 1 1 0 0 0 0 ...
#Create Dot Products
#Create a Copy of mammMasses, "cross"
cross = mammMasses
#Make Each Column Into a Unique Vector
dot1 <- cross[,1]
dot2 <- cross[,2]
dot3 <- cross[,3]
dot4 <- cross[,4]
dot5 <- cross[,5]
#Use tcrossprod to Create Cross Product Variables for All Combinations
dotp1 <- tcrossprod(dot1, dot2)
dotp2 <- tcrossprod(dot1, dot3)
dotp3 <- tcrossprod(dot1, dot4)
dotp4 <- tcrossprod(dot1, dot5)
dotp5 <- tcrossprod(dot2, dot3)
dotp6 <- tcrossprod(dot2, dot4)
dotp7 <- tcrossprod(dot2, dot5)
dotp8 <- tcrossprod(dot3, dot4)
dotp9 <- tcrossprod(dot3, dot5)
dotp10 <- tcrossprod(dot4, dot5)
#Use diag to Isolate the New Vector, the Cross Product
dotp1 <- diag(dotp1)
dotp2 <- diag(dotp2)
dotp3 <- diag(dotp3)
dotp4 <- diag(dotp4)
dotp5 <- diag(dotp5)
dotp6 <- diag(dotp6)
dotp7 <- diag(dotp7)
dotp8 <- diag(dotp8)
dotp9 <- diag(dotp9)
dotp10 <- diag(dotp10)
#Discretize the Dot Products
dotp1 <- as.numeric(unlist(dotp1))
dotp1 <- discretize(dotp1, breaks = 5)
dotp1 <- as.integer(dotp1)
dotp2 <- as.numeric(unlist(dotp2))
dotp2 <- discretize(dotp2, breaks = 5)
dotp2 <- as.integer(dotp2)
dotp3 <- as.numeric(unlist(dotp3))
dotp3 <- discretize(dotp3, breaks = 5)
dotp3 <- as.integer(dotp3)
dotp4 <- as.numeric(unlist(dotp4))
dotp4 <- discretize(dotp4, breaks = 5)
dotp4 <- as.integer(dotp4)
dotp5 <- as.numeric(unlist(dotp5))
dotp5 <- discretize(dotp5, breaks = 5)
dotp5 <- as.integer(dotp5)
dotp6 <- as.numeric(unlist(dotp6))
dotp6 <- discretize(dotp6, breaks = 5)
dotp6 <- as.integer(dotp6)
dotp7 <- as.numeric(unlist(dotp7))
dotp7 <- discretize(dotp7, breaks = 3)
dotp7 <- as.integer(dotp7)
dotp8 <- as.numeric(unlist(dotp8))
dotp8 <- discretize(dotp8, breaks = 3)
dotp8 <- as.integer(dotp8)
dotp9 <- as.numeric(unlist(dotp9))
dotp9 <- discretize(dotp9, breaks = 3)
dotp9 <- as.integer(dotp9)
dotp10 <- as.numeric(unlist(dotp10))
dotp10 <- discretize(dotp10, breaks = 3)
dotp10 <- as.integer(dotp10)
#Bind New Variables Into a Dataset, "cross2"
cross2 <- as.data.frame(cbind(dotp1, dotp2, dotp3, dotp4, dotp5, dotp6, dotp7, dotp8, dotp9, dotp10))
#Rename Columns to Reflect Cross Products
colnames(cross2) <- c("age_birads", "age_shape", "age_margin", "age_density", "birads_shape", "birads_margin", "birads_density", "shape_margin", "shape_density", "margin_density")
#Cbind cross2 onto mammMasses as mammMasses2
mammMasses2 <- cbind(mammMasses, cross2)
#Principal Component Analysis & CorrPlot
pca1 <- PCA(mammMasses2, graph = T)


pca1
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 16 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
#PCA Eigenvalues
pca1$eig
## eigenvalue percentage of variance
## comp 1 9.51428195 59.4642622
## comp 2 1.83649903 11.4781189
## comp 3 1.44938826 9.0586766
## comp 4 1.19992961 7.4995601
## comp 5 0.91726785 5.7329241
## comp 6 0.47778656 2.9861660
## comp 7 0.14444314 0.9027696
## comp 8 0.09218602 0.5761626
## comp 9 0.08020689 0.5012931
## comp 10 0.06574653 0.4109158
## comp 11 0.05631033 0.3519396
## comp 12 0.04819639 0.3012275
## comp 13 0.03651406 0.2282128
## comp 14 0.03154426 0.1971516
## comp 15 0.02772238 0.1732649
## comp 16 0.02197674 0.1373547
## cumulative percentage of variance
## comp 1 59.46426
## comp 2 70.94238
## comp 3 80.00106
## comp 4 87.50062
## comp 5 93.23354
## comp 6 96.21971
## comp 7 97.12248
## comp 8 97.69864
## comp 9 98.19993
## comp 10 98.61085
## comp 11 98.96279
## comp 12 99.26402
## comp 13 99.49223
## comp 14 99.68938
## comp 15 99.86265
## comp 16 100.00000
#Descriptive Stats for PCs
dimdesc(pca1)
## $Dim.1
## $Dim.1$quanti
## correlation p.value
## age_margin 0.9051527 1.725999e-309
## age_shape 0.8981288 2.549486e-297
## birads_margin 0.8777307 1.789348e-266
## Margin 0.8647586 1.325292e-249
## birads_shape 0.8614019 1.596551e-245
## Shape 0.8498026 3.309454e-232
## shape_density 0.8486520 6.012753e-231
## shape_margin 0.8371301 6.888295e-219
## age_birads 0.7792566 5.275521e-170
## margin_density 0.7650969 2.885561e-160
## Class 0.7137701 4.786398e-130
## birads_density 0.6936864 5.669078e-120
## age_density 0.6608153 3.635665e-105
## Age 0.6472360 1.394858e-99
## BI_RADS 0.6240130 1.147573e-90
## Density 0.2380871 3.788557e-12
##
##
## $Dim.2
## $Dim.2$quanti
## correlation p.value
## age_density 0.65153216 2.564356e-101
## Age 0.60861890 3.683379e-85
## age_birads 0.52185252 4.305369e-59
## birads_density 0.26379977 1.152999e-14
## Density 0.23825756 3.653567e-12
## BI_RADS 0.15053613 1.345468e-05
## Class 0.08197549 1.824143e-02
## margin_density -0.13388294 1.104498e-04
## birads_margin -0.18739477 5.460630e-08
## Margin -0.25640462 6.527642e-14
## birads_shape -0.27246623 1.409126e-15
## shape_density -0.31275583 2.868384e-20
## Shape -0.37267541 1.037324e-28
## shape_margin -0.42451122 1.345538e-37
##
##
## $Dim.3
## $Dim.3$quanti
## correlation p.value
## Density 0.85966501 1.874660e-243
## birads_density 0.47577662 4.719915e-48
## margin_density 0.42644766 5.829272e-38
## age_margin -0.07291163 3.582355e-02
## Class -0.08946469 9.960938e-03
## birads_shape -0.09622983 5.555224e-03
## Shape -0.14997710 1.449491e-05
## age_birads -0.22862694 2.716067e-11
## age_shape -0.25772116 4.813420e-14
## Age -0.34707472 7.053441e-25
#PCA Loadings
model <- princomp(~.,mammMasses2[1:829,1:12], na.action = na.omit)
model$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age 0.985 0.126
## BI_RADS -0.129 0.587 0.222
## Shape -0.385 0.424 -0.294 -0.124
## Margin -0.504 -0.443 -0.205
## Density 0.366 0.245
## Class 0.102 -0.247 0.940
## age_birads -0.129 0.584 0.105 -0.629 -0.259
## age_shape -0.325 0.423 -0.256 -0.304 -0.134 0.541
## age_margin -0.345 -0.345 -0.111 0.124 -0.261 -0.476
## age_density 0.889 0.161
## birads_shape -0.369 0.437 0.190 -0.106 0.378 -0.427
## birads_margin -0.424 -0.343 0.239 -0.117 0.314 0.518
## Comp.9 Comp.10 Comp.11 Comp.12
## Age
## BI_RADS -0.106 -0.423 -0.324 0.519
## Shape 0.268 0.431 0.546
## Margin 0.542 -0.446
## Density 0.226 -0.692 0.410 -0.324
## Class -0.146
## age_birads 0.301 0.191 -0.106
## age_shape -0.269 -0.266 -0.255 -0.161
## age_margin -0.609 -0.160 0.134 0.108
## age_density 0.340 -0.176 0.152
## birads_shape 0.190 -0.173 -0.474
## birads_margin -0.156 0.266 0.396
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083 0.083 0.083 0.083 0.083 0.083
## Cumulative Var 0.083 0.167 0.250 0.333 0.417 0.500 0.583 0.667
## Comp.9 Comp.10 Comp.11 Comp.12
## SS loadings 1.000 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083 0.083
## Cumulative Var 0.750 0.833 0.917 1.000
#CorrPlot
corrplot(cor(mammMasses2), method = "circle")

#REMOVE THE VARIABLES THAT ARE STRONGLY CORRELATED WITH VARIABLES THAT ARE *NOT*
#ONE OF THEIR DERIVED COUNTERPARTS(SHAPE, DENSITY, MARGIN)
#HELPS SMOOTH ASSUMPTIONS OF INDEPENDENCE FOR NB CLASSIFIER
#REMOVED SHAPE DENSITY AFTER FALSEPOS/NEG ANALYSIS
#Reorder Variables to Reflect Correct Ordering
mammMasses2 <- mammMasses2[c(1,2,7,8,9,10,11,12,13,14,16,6)]
str(mammMasses2)
## 'data.frame': 829 obs. of 12 variables:
## $ Age : int 67 58 28 57 76 42 36 60 54 52 ...
## $ BI_RADS : int 5 5 4 5 5 3 4 4 4 3 ...
## $ age_birads : int 5 4 1 4 5 1 1 3 2 1 ...
## $ age_shape : int 4 4 1 1 2 2 2 3 1 3 ...
## $ age_margin : int 5 5 1 5 5 1 1 2 2 3 ...
## $ age_density : int 4 3 1 3 5 2 1 2 3 3 ...
## $ birads_shape : int 3 5 2 2 2 2 3 3 2 3 ...
## $ birads_margin : int 5 5 2 5 5 1 2 2 2 3 ...
## $ birads_density: int 3 3 2 3 3 1 1 1 2 1 ...
## $ shape_margin : int 2 3 1 2 2 2 2 2 1 2 ...
## $ margin_density: int 3 3 2 3 3 2 1 1 2 3 ...
## $ Class : int 1 1 0 1 1 1 0 0 0 0 ...
#Export mammMasses2 (Adjust Your Working Directory as Needed)
write.csv(mammMasses2, "mammMasses2.csv")
##########################################################################
#Part III: Data Exploration
##########################################################################
#Use Describe from "psych" Package for Summary Statistics
describe(mammMasses2)
## vars n mean sd median trimmed mad min max range
## Age 1 829 55.79 14.68 57 56.15 14.83 18 96 78
## BI_RADS 2 829 4.33 0.69 4 4.37 0.00 0 6 6
## age_birads 3 829 3.03 1.42 3 3.04 1.48 1 5 4
## age_shape 4 829 3.02 1.41 3 3.02 1.48 1 5 4
## age_margin 5 829 3.00 1.41 3 3.00 1.48 1 5 4
## age_density 6 829 3.05 1.41 3 3.06 1.48 1 5 4
## birads_shape 7 829 3.49 1.20 3 3.51 1.48 1 5 4
## birads_margin 8 829 3.35 1.34 3 3.35 1.48 1 5 4
## birads_density 9 829 2.25 0.65 2 2.32 0.00 1 3 2
## shape_margin 10 829 2.15 0.73 2 2.19 1.48 1 3 2
## margin_density 11 829 2.39 0.57 2 2.42 0.00 1 3 2
## Class 12 829 0.48 0.50 0 0.48 0.00 0 1 1
## skew kurtosis se
## Age -0.22 -0.32 0.51
## BI_RADS -1.61 8.74 0.02
## age_birads -0.04 -1.30 0.05
## age_shape -0.03 -1.30 0.05
## age_margin 0.00 -1.30 0.05
## age_density -0.05 -1.32 0.05
## birads_shape 0.00 -1.30 0.04
## birads_margin 0.08 -1.57 0.05
## birads_density -0.31 -0.74 0.02
## shape_margin -0.24 -1.10 0.03
## margin_density -0.28 -0.76 0.02
## Class 0.06 -2.00 0.02
#Lots of Histograms (filter these later for what are most useful)
mammMasses2 %>% ggvis(~Age) %>% layer_densities()
mammMasses2 %>% ggvis(~BI_RADS) %>% layer_bars()
mammMasses2 %>% ggvis(~age_birads) %>% layer_bars()
mammMasses2 %>% ggvis(~age_shape) %>% layer_bars()
mammMasses2 %>% ggvis(~age_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~age_density) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_shape) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_density) %>% layer_bars()
mammMasses2 %>% ggvis(~shape_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~margin_density) %>% layer_bars()
##########################################################################
#Part IV: Naive-Bayesian Modeling
##########################################################################
#Call the NaiveBayes() Function on MammMasses2
NBModel <- naiveBayes(Class ~., mammMasses2)
NBModel
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 0 1
## 0.5150784 0.4849216
##
## Conditional probabilities:
## Age
## Y [,1] [,2]
## 0 49.29742 13.70268
## 1 62.69403 12.35463
##
## BI_RADS
## Y [,1] [,2]
## 0 3.983607 0.5282739
## 1 4.703980 0.6429623
##
## age_birads
## Y [,1] [,2]
## 0 2.227166 1.126885
## 1 3.888060 1.166835
##
## age_shape
## Y [,1] [,2]
## 0 2.189696 1.152294
## 1 3.900498 1.100918
##
## age_margin
## Y [,1] [,2]
## 0 2.168618 1.162662
## 1 3.890547 1.077061
##
## age_density
## Y [,1] [,2]
## 0 2.435597 1.275426
## 1 3.694030 1.252793
##
## birads_shape
## Y [,1] [,2]
## 0 2.805621 0.8866068
## 1 4.211443 1.0605454
##
## birads_margin
## Y [,1] [,2]
## 0 2.583138 1.054610
## 1 4.164179 1.104473
##
## birads_density
## Y [,1] [,2]
## 0 1.913349 0.4742028
## 1 2.614428 0.6181711
##
## shape_margin
## Y [,1] [,2]
## 0 1.791569 0.6686291
## 1 2.532338 0.5825381
##
## margin_density
## Y [,1] [,2]
## 0 2.133489 0.5249438
## 1 2.659204 0.4951408
#Create a Classification Task For the Model
require(mlr)
task <- makeClassifTask(data = mammMasses2, target = "Class")
#Initialize the NB Classifier
selected_model <- makeLearner("classif.naiveBayes")
#Train the Model:
NBPred <- train(selected_model, task)
NBPred
## Model for learner.id=classif.naiveBayes; learner.class=classif.naiveBayes
## Trained on: task.id = mammMasses2; obs = 829; features = 11
## Hyperparameters:
#Apply Predictive Model to mammMasses2 Without Passing on the Target Variable
predictions_mlr <- as.data.frame(predict(NBPred, newdata = mammMasses2[1:11]))
#Create a Confusion Matrix
require(caret)
table1 <- table(predictions_mlr[,1], mammMasses2$Class)
table1
##
## 0 1
## 0 352 64
## 1 75 338
table2 <- prop.table(table1)
table2
##
## 0 1
## 0 0.42460796 0.07720145
## 1 0.09047045 0.40772014
confusionMatrix(table1)
## Confusion Matrix and Statistics
##
##
## 0 1
## 0 352 64
## 1 75 338
##
## Accuracy : 0.8323
## 95% CI : (0.8051, 0.8571)
## No Information Rate : 0.5151
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6646
## Mcnemar's Test P-Value : 0.3963
##
## Sensitivity : 0.8244
## Specificity : 0.8408
## Pos Pred Value : 0.8462
## Neg Pred Value : 0.8184
## Prevalence : 0.5151
## Detection Rate : 0.4246
## Detection Prevalence : 0.5018
## Balanced Accuracy : 0.8326
##
## 'Positive' Class : 0
##
#Save the model as an RDS object
#With a new dataset, the RDS model can be re-loaded and used
#in a new predict() function:
saveRDS(NBPred, file = "initialNaiveBayesModel.rds")
#to restore: readRDS(file = "initialNaiveBayesModel.rds")
##########################################################################
#Part V: Principal Component Analysis, Collinearity Tests, Investigation of Errors
###########################################################################
#Principal Component Analysis & CorrPlot
pca1 <- PCA(mammMasses2, graph = T)


pca1
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 12 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
#PCA Eigenvalues
pca1$eig
## eigenvalue percentage of variance
## comp 1 7.40580591 61.7150493
## comp 2 1.51329451 12.6107876
## comp 3 1.13373013 9.4477511
## comp 4 0.71656487 5.9713739
## comp 5 0.48299930 4.0249942
## comp 6 0.35894776 2.9912313
## comp 7 0.10456047 0.8713372
## comp 8 0.07813585 0.6511321
## comp 9 0.07413212 0.6177677
## comp 10 0.05548710 0.4623925
## comp 11 0.04705571 0.3921309
## comp 12 0.02928626 0.2440522
## cumulative percentage of variance
## comp 1 61.71505
## comp 2 74.32584
## comp 3 83.77359
## comp 4 89.74496
## comp 5 93.76996
## comp 6 96.76119
## comp 7 97.63252
## comp 8 98.28366
## comp 9 98.90142
## comp 10 99.36382
## comp 11 99.75595
## comp 12 100.00000
#Descriptive Stats for PCs
dimdesc(pca1)
## $Dim.1
## $Dim.1$quanti
## correlation p.value
## age_margin 0.9045089 2.462463e-308
## age_shape 0.8754777 2.075583e-263
## birads_margin 0.8650794 5.327234e-250
## age_birads 0.8483931 1.150708e-230
## birads_shape 0.8226945 4.780083e-205
## shape_margin 0.7765891 4.086616e-168
## margin_density 0.7351575 8.877094e-142
## Class 0.7319167 6.288946e-140
## birads_density 0.7214297 4.028703e-134
## age_density 0.7173997 5.834177e-132
## Age 0.7143900 2.265915e-130
## BI_RADS 0.6719548 5.715066e-110
##
##
## $Dim.2
## $Dim.2$quanti
## correlation p.value
## Age 0.65173544 2.119139e-101
## age_density 0.61422116 3.958359e-87
## age_birads 0.42228127 3.502068e-37
## birads_density -0.09139484 8.463065e-03
## BI_RADS -0.16739481 1.255595e-06
## margin_density -0.30915980 8.075185e-20
## birads_shape -0.33096064 1.218149e-22
## birads_margin -0.35039693 2.347539e-25
## shape_margin -0.40395879 6.947770e-34
##
##
## $Dim.3
## $Dim.3$quanti
## correlation p.value
## BI_RADS 0.66400772 1.603686e-106
## birads_density 0.52008188 1.230822e-58
## age_birads 0.17340011 5.082710e-07
## Class 0.15532157 7.035278e-06
## birads_shape 0.07856782 2.368274e-02
## age_density -0.07752222 2.561236e-02
## birads_margin -0.08488172 1.449845e-02
## Age -0.12162913 4.485386e-04
## age_shape -0.22465041 6.059748e-11
## age_margin -0.27488264 7.735424e-16
## margin_density -0.28175084 1.361043e-16
## shape_margin -0.35854341 1.494855e-26
#PCA Loadings
model <- princomp(~.,mammMasses2[1:829,1:11], na.action = na.omit)
model$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age 0.987 -0.119
## BI_RADS 0.183 -0.234 0.461 0.232 0.125 0.108 0.115
## age_birads 0.186 -0.260 0.450 -0.673 -0.339 -0.179
## age_shape 0.385 -0.348 -0.494 -0.204 -0.276 0.584
## age_margin 0.401 0.498 -0.377 0.599 -0.184
## age_density 0.195 -0.854 -0.229
## birads_shape 0.457 -0.487 -0.130 0.110 0.295 0.265 -0.425
## birads_margin 0.513 0.419 0.179 0.244 0.160 -0.330 0.176
## birads_density 0.166 -0.133 0.418 -0.216 0.322 0.271 0.346
## shape_margin 0.272 -0.271 0.204 -0.426 -0.410
## margin_density 0.184 0.278 -0.207 0.219 -0.260 0.165
## Comp.9 Comp.10 Comp.11
## Age
## BI_RADS -0.344 0.389 -0.579
## age_birads 0.253 -0.132
## age_shape
## age_margin 0.141 -0.119
## age_density -0.399
## birads_shape -0.428
## birads_margin -0.375 -0.120 0.384
## birads_density 0.514 0.193 0.366
## shape_margin 0.306 0.604
## margin_density 0.377 -0.448 -0.599
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.091 0.091 0.091 0.091 0.091 0.091 0.091 0.091
## Cumulative Var 0.091 0.182 0.273 0.364 0.455 0.545 0.636 0.727
## Comp.9 Comp.10 Comp.11
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.091 0.091 0.091
## Cumulative Var 0.818 0.909 1.000
#CorrPlot
corrplot(cor(mammMasses2), method = "circle")

#Create mammMasses3
#Append Predictions to Dataset
predictions_mlr[,1]
## [1] 1 1 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1
## [36] 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1
## [71] 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 1
## [106] 1 0 1 0 0 1 0 1 1 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0
## [141] 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0
## [176] 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 1 1 0 1
## [211] 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0 0 0 0
## [246] 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0
## [281] 0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1
## [316] 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1
## [351] 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0
## [386] 0 0 1 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1
## [421] 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1
## [456] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0
## [491] 1 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 1 0
## [526] 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 0 0
## [561] 0 1 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1
## [596] 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 1
## [631] 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0
## [666] 0 0 1 1 1 0 0 1 1 0 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1
## [701] 1 0 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1
## [736] 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 1 1
## [771] 1 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [806] 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
## Levels: 0 1
Preds <- as.integer(predictions_mlr[,1])
Preds <- ifelse(Preds == 2, 1, 0) #evaluates if preds = 2 and returns a boolean. T = re-evaluates to 1. F = 0.
Preds #therefore, all 2s evaluate to 1s and all 1s evaluate to 0
## [1] 1 1 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1
## [36] 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1
## [71] 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 1
## [106] 1 0 1 0 0 1 0 1 1 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0
## [141] 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0
## [176] 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 1 1 0 1
## [211] 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0 0 0 0
## [246] 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0
## [281] 0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1
## [316] 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1
## [351] 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0
## [386] 0 0 1 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1
## [421] 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1
## [456] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0
## [491] 1 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 1 0
## [526] 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 0 0
## [561] 0 1 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1
## [596] 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 1
## [631] 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0
## [666] 0 0 1 1 1 0 0 1 1 0 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1
## [701] 1 0 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1
## [736] 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 1 1
## [771] 1 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [806] 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
table(Preds)
## Preds
## 0 1
## 416 413
mammMasses3 <- cbind(mammMasses2, Preds)
str(mammMasses3)
## 'data.frame': 829 obs. of 13 variables:
## $ Age : int 67 58 28 57 76 42 36 60 54 52 ...
## $ BI_RADS : int 5 5 4 5 5 3 4 4 4 3 ...
## $ age_birads : int 5 4 1 4 5 1 1 3 2 1 ...
## $ age_shape : int 4 4 1 1 2 2 2 3 1 3 ...
## $ age_margin : int 5 5 1 5 5 1 1 2 2 3 ...
## $ age_density : int 4 3 1 3 5 2 1 2 3 3 ...
## $ birads_shape : int 3 5 2 2 2 2 3 3 2 3 ...
## $ birads_margin : int 5 5 2 5 5 1 2 2 2 3 ...
## $ birads_density: int 3 3 2 3 3 1 1 1 2 1 ...
## $ shape_margin : int 2 3 1 2 2 2 2 2 1 2 ...
## $ margin_density: int 3 3 2 3 3 2 1 1 2 3 ...
## $ Class : int 1 1 0 1 1 1 0 0 0 0 ...
## $ Preds : num 1 1 0 1 1 0 0 0 0 0 ...
#Create Error Table
#A Subset of MammMasses3 Where Class != Preds
#Copy mammMasses3
errorTable <- mammMasses3
#Filter Out Correct Predictions, Reassign to errortable
errortable2 <- errorTable %>% filter(errorTable$Class != errorTable$Preds)
str(errortable2)
## 'data.frame': 139 obs. of 13 variables:
## $ Age : int 42 59 54 55 57 52 74 67 67 68 ...
## $ BI_RADS : int 3 4 4 5 4 5 5 5 4 3 ...
## $ age_birads : int 1 3 2 4 3 3 5 5 4 2 ...
## $ age_shape : int 2 3 1 4 2 4 2 3 5 2 ...
## $ age_margin : int 1 2 2 4 5 4 2 4 5 2 ...
## $ age_density : int 2 4 3 3 3 3 2 1 4 5 ...
## $ birads_shape : int 2 3 2 5 3 5 2 3 4 1 ...
## $ birads_margin : int 1 2 2 5 5 5 2 5 5 1 ...
## $ birads_density: int 1 2 2 3 2 3 1 1 2 1 ...
## $ shape_margin : int 2 2 1 3 2 3 1 2 3 1 ...
## $ margin_density: int 2 2 2 3 3 3 1 2 3 2 ...
## $ Class : int 1 1 1 0 0 0 1 0 0 1 ...
## $ Preds : num 0 0 0 1 1 1 0 1 1 0 ...
describe(errortable2)
## vars n mean sd median trimmed mad min max range
## Age 1 139 56.66 11.80 57 56.91 13.34 28 86 58
## BI_RADS 2 139 4.30 0.64 4 4.31 0.00 0 6 6
## age_birads 3 139 3.01 1.17 3 3.01 1.48 1 5 4
## age_shape 4 139 3.18 1.14 3 3.20 1.48 1 5 4
## age_margin 5 139 3.17 1.17 3 3.20 1.48 1 5 4
## age_density 6 139 3.17 1.29 3 3.21 1.48 1 5 4
## birads_shape 7 139 3.50 1.03 4 3.52 1.48 1 5 4
## birads_margin 8 139 3.55 1.23 4 3.58 1.48 1 5 4
## birads_density 9 139 2.21 0.60 2 2.26 0.00 1 3 2
## shape_margin 10 139 2.27 0.69 2 2.34 1.48 1 3 2
## margin_density 11 139 2.50 0.56 3 2.54 0.00 1 3 2
## Class 12 139 0.46 0.50 0 0.45 0.00 0 1 1
## Preds 13 139 0.54 0.50 1 0.55 0.00 0 1 1
## skew kurtosis se
## Age -0.18 -0.50 1.00
## BI_RADS -1.82 12.49 0.05
## age_birads -0.09 -0.85 0.10
## age_shape -0.12 -0.74 0.10
## age_margin -0.15 -0.86 0.10
## age_density -0.14 -1.15 0.11
## birads_shape -0.13 -0.86 0.09
## birads_margin -0.21 -1.35 0.10
## birads_density -0.10 -0.46 0.05
## shape_margin -0.41 -0.89 0.06
## margin_density -0.51 -0.84 0.05
## Class 0.16 -1.99 0.04
## Preds -0.16 -1.99 0.04
#Re-Classify mammMasses3
#Call the NaiveBayes() Function on MammMasses2
NBModel2 <- naiveBayes(Class ~., mammMasses3)
NBModel2
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 0 1
## 0.5150784 0.4849216
##
## Conditional probabilities:
## Age
## Y [,1] [,2]
## 0 49.29742 13.70268
## 1 62.69403 12.35463
##
## BI_RADS
## Y [,1] [,2]
## 0 3.983607 0.5282739
## 1 4.703980 0.6429623
##
## age_birads
## Y [,1] [,2]
## 0 2.227166 1.126885
## 1 3.888060 1.166835
##
## age_shape
## Y [,1] [,2]
## 0 2.189696 1.152294
## 1 3.900498 1.100918
##
## age_margin
## Y [,1] [,2]
## 0 2.168618 1.162662
## 1 3.890547 1.077061
##
## age_density
## Y [,1] [,2]
## 0 2.435597 1.275426
## 1 3.694030 1.252793
##
## birads_shape
## Y [,1] [,2]
## 0 2.805621 0.8866068
## 1 4.211443 1.0605454
##
## birads_margin
## Y [,1] [,2]
## 0 2.583138 1.054610
## 1 4.164179 1.104473
##
## birads_density
## Y [,1] [,2]
## 0 1.913349 0.4742028
## 1 2.614428 0.6181711
##
## shape_margin
## Y [,1] [,2]
## 0 1.791569 0.6686291
## 1 2.532338 0.5825381
##
## margin_density
## Y [,1] [,2]
## 0 2.133489 0.5249438
## 1 2.659204 0.4951408
##
## Preds
## Y [,1] [,2]
## 0 0.175644 0.3809634
## 1 0.840796 0.3663221
#Create a Classification Task For the Model
require(mlr)
task2 <- makeClassifTask(data = mammMasses3, target = "Class")
#Initialize the NB Classifier
selected_model2 <- makeLearner("classif.naiveBayes")
#Train the Model:
NBPred2 <- train(selected_model2, task2)
NBPred2
## Model for learner.id=classif.naiveBayes; learner.class=classif.naiveBayes
## Trained on: task.id = mammMasses3; obs = 829; features = 12
## Hyperparameters:
#Apply Predictive Model to mammMasses3 Without Passing on the Target Variable
predictions_mlr2 <- as.data.frame(predict(NBPred2, newdata = mammMasses3[1:11]))
#Create a Confusion Matrix
require(caret)
table3 <- table(predictions_mlr2[,1], mammMasses3$Class)
table3
##
## 0 1
## 0 352 64
## 1 75 338
table4 <- prop.table(table3)
table4
##
## 0 1
## 0 0.42460796 0.07720145
## 1 0.09047045 0.40772014
confusionMatrix(table3)
## Confusion Matrix and Statistics
##
##
## 0 1
## 0 352 64
## 1 75 338
##
## Accuracy : 0.8323
## 95% CI : (0.8051, 0.8571)
## No Information Rate : 0.5151
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6646
## Mcnemar's Test P-Value : 0.3963
##
## Sensitivity : 0.8244
## Specificity : 0.8408
## Pos Pred Value : 0.8462
## Neg Pred Value : 0.8184
## Prevalence : 0.5151
## Detection Rate : 0.4246
## Detection Prevalence : 0.5018
## Balanced Accuracy : 0.8326
##
## 'Positive' Class : 0
##
#Repeat CorrPlot and PCA for mammMasses3
pca2 <- PCA(mammMasses3, graph = T)


pca2
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 13 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
#PCA Eigenvalues
pca2$eig
## eigenvalue percentage of variance
## comp 1 8.19227698 63.0175152
## comp 2 1.51967894 11.6898380
## comp 3 1.13373023 8.7210018
## comp 4 0.71664498 5.5126537
## comp 5 0.48544256 3.7341736
## comp 6 0.36086399 2.7758769
## comp 7 0.21802267 1.6770974
## comp 8 0.09281985 0.7139989
## comp 9 0.07805217 0.6004013
## comp 10 0.07261489 0.5585761
## comp 11 0.05386614 0.4143550
## comp 12 0.04670883 0.3592987
## comp 13 0.02927776 0.2252135
## cumulative percentage of variance
## comp 1 63.01752
## comp 2 74.70735
## comp 3 83.42835
## comp 4 88.94101
## comp 5 92.67518
## comp 6 95.45106
## comp 7 97.12816
## comp 8 97.84216
## comp 9 98.44256
## comp 10 99.00113
## comp 11 99.41549
## comp 12 99.77479
## comp 13 100.00000
#Descriptive Stats for PCs
dimdesc(pca2)
## $Dim.1
## $Dim.1$quanti
## correlation p.value
## age_margin 0.9057727 1.310226e-310
## Preds 0.8987110 2.704215e-298
## age_shape 0.8757269 9.575348e-264
## birads_margin 0.8682581 5.613188e-254
## age_birads 0.8437193 1.151611e-225
## birads_shape 0.8243256 1.511704e-206
## shape_margin 0.7764800 4.876520e-168
## margin_density 0.7363932 1.720095e-142
## Class 0.7338906 4.729922e-141
## birads_density 0.7202396 1.767090e-133
## age_density 0.7078561 5.438749e-127
## Age 0.7047593 2.018494e-125
## BI_RADS 0.6721192 4.836705e-110
##
##
## $Dim.2
## $Dim.2$quanti
## correlation p.value
## Age 0.66086311 3.470493e-105
## age_density 0.62374653 1.437848e-90
## age_birads 0.43175082 5.735273e-39
## Preds -0.08082056 1.994868e-02
## birads_density -0.08191164 1.833237e-02
## BI_RADS -0.15896109 4.240490e-06
## margin_density -0.29951534 1.208984e-18
## birads_shape -0.32058946 2.863134e-21
## birads_margin -0.34026032 6.465781e-24
## shape_margin -0.39275273 5.750172e-32
##
##
## $Dim.3
## $Dim.3$quanti
## correlation p.value
## BI_RADS 0.66396423 1.673764e-106
## birads_density 0.52003765 1.263442e-58
## age_birads 0.17339321 5.088084e-07
## Class 0.15529743 7.058672e-06
## birads_shape 0.07850660 2.379214e-02
## age_density -0.07752021 2.561618e-02
## birads_margin -0.08494221 1.442832e-02
## Age -0.12162179 4.488988e-04
## age_shape -0.22467958 6.024513e-11
## age_margin -0.27491468 7.673831e-16
## margin_density -0.28180949 1.340715e-16
## shape_margin -0.35861493 1.458627e-26
#PCA Loadings
model <- princomp(~.,mammMasses3[1:829,1:11], na.action = na.omit)
model$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age 0.987 -0.119
## BI_RADS 0.183 -0.234 0.461 0.232 0.125 0.108 0.115
## age_birads 0.186 -0.260 0.450 -0.673 -0.339 -0.179
## age_shape 0.385 -0.348 -0.494 -0.204 -0.276 0.584
## age_margin 0.401 0.498 -0.377 0.599 -0.184
## age_density 0.195 -0.854 -0.229
## birads_shape 0.457 -0.487 -0.130 0.110 0.295 0.265 -0.425
## birads_margin 0.513 0.419 0.179 0.244 0.160 -0.330 0.176
## birads_density 0.166 -0.133 0.418 -0.216 0.322 0.271 0.346
## shape_margin 0.272 -0.271 0.204 -0.426 -0.410
## margin_density 0.184 0.278 -0.207 0.219 -0.260 0.165
## Comp.9 Comp.10 Comp.11
## Age
## BI_RADS -0.344 0.389 -0.579
## age_birads 0.253 -0.132
## age_shape
## age_margin 0.141 -0.119
## age_density -0.399
## birads_shape -0.428
## birads_margin -0.375 -0.120 0.384
## birads_density 0.514 0.193 0.366
## shape_margin 0.306 0.604
## margin_density 0.377 -0.448 -0.599
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.091 0.091 0.091 0.091 0.091 0.091 0.091 0.091
## Cumulative Var 0.091 0.182 0.273 0.364 0.455 0.545 0.636 0.727
## Comp.9 Comp.10 Comp.11
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.091 0.091 0.091
## Cumulative Var 0.818 0.909 1.000
#CorrPlot
corrplot(cor(mammMasses3), method = "circle")

#Make errortable2 Into False Positives and False Negatives:
falsePos <- errortable2 %>% filter(errortable2$Class == 0, errortable2$Preds == 1)
falseNeg <- errortable2 %>% filter(errortable2$Class == 1, errortable2$Preds == 0)
str(falsePos)
## 'data.frame': 75 obs. of 13 variables:
## $ Age : int 55 57 52 67 67 62 59 84 33 86 ...
## $ BI_RADS : int 5 4 5 5 4 5 5 5 5 5 ...
## $ age_birads : int 4 3 3 5 4 4 4 5 1 5 ...
## $ age_shape : int 4 2 4 3 5 4 4 5 3 5 ...
## $ age_margin : int 4 5 4 4 5 4 4 5 3 5 ...
## $ age_density : int 3 3 3 1 4 4 1 5 1 5 ...
## $ birads_shape : int 5 3 5 3 4 5 5 5 5 5 ...
## $ birads_margin : int 5 5 5 5 5 5 5 5 5 5 ...
## $ birads_density: int 3 2 3 1 2 3 1 3 3 3 ...
## $ shape_margin : int 3 2 3 2 3 3 3 3 3 3 ...
## $ margin_density: int 3 3 3 2 3 3 2 3 3 3 ...
## $ Class : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Preds : num 1 1 1 1 1 1 1 1 1 1 ...
str(falseNeg)
## 'data.frame': 64 obs. of 13 variables:
## $ Age : int 42 59 54 74 68 68 41 43 43 54 ...
## $ BI_RADS : int 3 4 4 5 3 4 4 4 5 5 ...
## $ age_birads : int 1 3 2 5 2 4 1 2 2 4 ...
## $ age_shape : int 2 3 1 2 2 2 1 3 2 2 ...
## $ age_margin : int 1 2 2 2 2 2 1 3 1 2 ...
## $ age_density : int 2 4 3 2 5 5 2 2 2 3 ...
## $ birads_shape : int 2 3 2 2 1 2 2 4 3 3 ...
## $ birads_margin : int 1 2 2 2 1 2 2 3 2 2 ...
## $ birads_density: int 1 2 2 1 1 2 2 2 3 3 ...
## $ shape_margin : int 2 2 1 1 1 1 1 2 2 2 ...
## $ margin_density: int 2 2 2 1 2 2 2 2 2 2 ...
## $ Class : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Preds : num 0 0 0 0 0 0 0 0 0 0 ...
#Get Summary Statistics and Histograms for False Pos and False Neg Tables
describe(falsePos)
## vars n mean sd median trimmed mad min max range skew
## Age 1 75 60.71 9.50 60 60.67 8.90 33 86 53 -0.01
## BI_RADS 2 75 4.44 0.53 4 4.41 0.00 4 6 2 0.51
## age_birads 3 75 3.52 0.94 3 3.54 1.48 1 5 4 -0.11
## age_shape 4 75 3.85 0.87 4 3.90 1.48 2 5 3 -0.34
## age_margin 5 75 3.96 0.80 4 4.00 1.48 2 5 3 -0.41
## age_density 6 75 3.61 1.08 4 3.69 1.48 1 5 4 -0.55
## birads_shape 7 75 4.01 0.81 4 4.05 1.48 2 5 3 -0.32
## birads_margin 8 75 4.27 0.84 4 4.38 1.48 2 5 3 -0.92
## birads_density 9 75 2.37 0.54 2 2.38 0.00 1 3 2 0.00
## shape_margin 10 75 2.61 0.52 3 2.66 0.00 1 3 2 -0.74
## margin_density 11 75 2.81 0.39 3 2.89 0.00 2 3 1 -1.58
## Class 12 75 0.00 0.00 0 0.00 0.00 0 0 0 NaN
## Preds 13 75 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## kurtosis se
## Age 0.48 1.10
## BI_RADS -1.16 0.06
## age_birads -0.49 0.11
## age_shape -0.62 0.10
## age_margin -0.34 0.09
## age_density -0.39 0.12
## birads_shape -0.76 0.09
## birads_margin 0.04 0.10
## birads_density -1.02 0.06
## shape_margin -0.83 0.06
## margin_density 0.49 0.05
## Class NaN 0.00
## Preds NaN 0.00
describe(falseNeg)
## vars n mean sd median trimmed mad min max range
## Age 1 64 51.92 12.53 50 51.77 13.34 28 74 46
## BI_RADS 2 64 4.14 0.73 4 4.19 0.00 0 5 5
## age_birads 3 64 2.41 1.14 2 2.35 1.48 1 5 4
## age_shape 4 64 2.39 0.88 2 2.38 1.48 1 5 4
## age_margin 5 64 2.25 0.80 2 2.27 1.48 1 4 3
## age_density 6 64 2.66 1.34 2 2.58 1.48 1 5 4
## birads_shape 7 64 2.91 0.94 3 2.85 1.48 1 5 4
## birads_margin 8 64 2.70 1.06 2 2.58 0.00 1 5 4
## birads_density 9 64 2.02 0.60 2 2.02 0.00 1 3 2
## shape_margin 10 64 1.88 0.65 2 1.85 0.00 1 3 2
## margin_density 11 64 2.14 0.50 2 2.13 0.00 1 3 2
## Class 12 64 1.00 0.00 1 1.00 0.00 1 1 0
## Preds 13 64 0.00 0.00 0 0.00 0.00 0 0 0
## skew kurtosis se
## Age 0.16 -1.12 1.57
## BI_RADS -2.61 13.77 0.09
## age_birads 0.39 -0.88 0.14
## age_shape 0.12 -0.14 0.11
## age_margin -0.09 -0.84 0.10
## age_density 0.48 -1.02 0.17
## birads_shape 0.41 -0.31 0.12
## birads_margin 0.83 -0.28 0.13
## birads_density -0.01 -0.30 0.08
## shape_margin 0.13 -0.74 0.08
## margin_density 0.27 0.48 0.06
## Class NaN NaN 0.00
## Preds NaN NaN 0.00
falsePos %>% ggvis(~Age) %>% layer_densities()
falsePos %>% ggvis(~Age) %>% layer_densities()
falsePos %>% ggvis(~BI_RADS) %>% layer_bars()
falsePos %>% ggvis(~BI_RADS) %>% layer_bars()
falsePos %>% ggvis(~age_birads) %>% layer_bars()
falsePos %>% ggvis(~age_birads) %>% layer_bars()
falsePos %>% ggvis(~age_shape) %>% layer_bars()
falsePos %>% ggvis(~age_shape) %>% layer_bars()
falsePos %>% ggvis(~age_density) %>% layer_bars()
falsePos %>% ggvis(~age_density) %>% layer_bars()
falsePos %>% ggvis(~birads_shape) %>% layer_bars()
falsePos %>% ggvis(~birads_shape) %>% layer_bars()
falsePos %>% ggvis(~birads_margin) %>% layer_bars()
falsePos %>% ggvis(~birads_margin) %>% layer_bars()
falsePos %>% ggvis(~birads_density) %>% layer_bars()
falsePos %>% ggvis(~birads_density) %>% layer_bars()
falsePos %>% ggvis(~shape_margin) %>% layer_bars()
falsePos %>% ggvis(~shape_margin) %>% layer_bars()
falsePos %>% ggvis(~margin_density) %>% layer_bars()
falsePos %>% ggvis(~margin_density) %>% layer_bars()