#Load Packages Into Working Memory
library("ridittools")
library("Amelia")
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library("ggvis")
library("lattice")
library("ggplot2")
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:ggvis':
## 
##     resolution
library("e1071")
library("caret")
library("FactoMineR")
library("ROCR")
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("arules")
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:ggvis':
## 
##     band
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library("mlr")
## Loading required package: ParamHelpers
## 
## Attaching package: 'mlr'
## The following object is masked from 'package:ROCR':
## 
##     performance
## The following object is masked from 'package:caret':
## 
##     train
## The following object is masked from 'package:e1071':
## 
##     impute
## The following object is masked from 'package:ridittools':
## 
##     acc
library("Matrix")
library("psych")
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:arules':
## 
##     intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("corrplot")
## corrplot 0.84 loaded
#############################################################################
# Part II: Data Import and Transformation
#############################################################################

#Load Dataset Into Working Memory
mammMasses <- read.csv("c:/users/Joshu/documents/datasets/mammMasses.csv")

#Reorder Columns so "Age"s is First and "Class" is Last
mammMasses <- mammMasses[c(2,1,3,4,5,6)]

#View the Structure of the Dataset
str(mammMasses)
## 'data.frame':    829 obs. of  6 variables:
##  $ Age    : int  67 58 28 57 76 42 36 60 54 52 ...
##  $ BI_RADS: int  5 5 4 5 5 3 4 4 4 3 ...
##  $ Shape  : int  3 4 1 1 1 2 3 2 1 3 ...
##  $ Margin : int  5 5 1 5 4 1 1 1 1 4 ...
##  $ Density: int  3 3 3 3 3 3 2 2 3 3 ...
##  $ Class  : int  1 1 0 1 1 1 0 0 0 0 ...
#Create Dot Products
#Create a Copy of mammMasses, "cross"
cross = mammMasses

#Make Each Column Into a Unique Vector
dot1 <- cross[,1]
dot2 <- cross[,2]
dot3 <- cross[,3]
dot4 <- cross[,4]
dot5 <- cross[,5]

#Use tcrossprod to Create Cross Product Variables for All Combinations
dotp1 <- tcrossprod(dot1, dot2)
dotp2 <- tcrossprod(dot1, dot3)
dotp3 <- tcrossprod(dot1, dot4)
dotp4 <- tcrossprod(dot1, dot5)
dotp5 <- tcrossprod(dot2, dot3)
dotp6 <- tcrossprod(dot2, dot4)
dotp7 <- tcrossprod(dot2, dot5)
dotp8 <- tcrossprod(dot3, dot4)
dotp9 <- tcrossprod(dot3, dot5)
dotp10 <- tcrossprod(dot4, dot5)

#Use diag to Isolate the New Vector, the Cross Product
dotp1 <- diag(dotp1)
dotp2 <- diag(dotp2)
dotp3 <- diag(dotp3)
dotp4 <- diag(dotp4)
dotp5 <- diag(dotp5)
dotp6 <- diag(dotp6)
dotp7 <- diag(dotp7)
dotp8 <- diag(dotp8)
dotp9 <- diag(dotp9)
dotp10 <- diag(dotp10)

#Discretize the Dot Products
dotp1 <- as.numeric(unlist(dotp1)) 
dotp1 <- discretize(dotp1, breaks = 5)
dotp1 <- as.integer(dotp1)

dotp2 <- as.numeric(unlist(dotp2)) 
dotp2 <- discretize(dotp2, breaks = 5)
dotp2 <- as.integer(dotp2)

dotp3 <- as.numeric(unlist(dotp3)) 
dotp3 <- discretize(dotp3, breaks = 5)
dotp3 <- as.integer(dotp3)

dotp4 <- as.numeric(unlist(dotp4)) 
dotp4 <- discretize(dotp4, breaks = 5)
dotp4 <- as.integer(dotp4)

dotp5 <- as.numeric(unlist(dotp5)) 
dotp5 <- discretize(dotp5, breaks = 5)
dotp5 <- as.integer(dotp5)

dotp6 <- as.numeric(unlist(dotp6)) 
dotp6 <- discretize(dotp6, breaks = 5)
dotp6 <- as.integer(dotp6)

dotp7 <- as.numeric(unlist(dotp7)) 
dotp7 <- discretize(dotp7, breaks = 3)
dotp7 <- as.integer(dotp7)

dotp8 <- as.numeric(unlist(dotp8)) 
dotp8 <- discretize(dotp8, breaks = 3)
dotp8 <- as.integer(dotp8)

dotp9 <- as.numeric(unlist(dotp9)) 
dotp9 <- discretize(dotp9, breaks = 3)
dotp9 <- as.integer(dotp9)

dotp10 <- as.numeric(unlist(dotp10)) 
dotp10 <- discretize(dotp10, breaks = 3)
dotp10 <- as.integer(dotp10)

#Bind New Variables Into a Dataset, "cross2"
cross2 <- as.data.frame(cbind(dotp1, dotp2, dotp3, dotp4, dotp5, dotp6, dotp7, dotp8, dotp9, dotp10))

#Rename Columns to Reflect Cross Products
colnames(cross2) <- c("age_birads", "age_shape", "age_margin", "age_density", "birads_shape", "birads_margin", "birads_density", "shape_margin", "shape_density", "margin_density")

#Cbind cross2 onto mammMasses as mammMasses2
mammMasses2 <- cbind(mammMasses, cross2)

#Principal  Component Analysis & CorrPlot

pca1 <- PCA(mammMasses2, graph = T)

pca1
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 16 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#PCA Eigenvalues
pca1$eig
##         eigenvalue percentage of variance
## comp 1  9.51428195             59.4642622
## comp 2  1.83649903             11.4781189
## comp 3  1.44938826              9.0586766
## comp 4  1.19992961              7.4995601
## comp 5  0.91726785              5.7329241
## comp 6  0.47778656              2.9861660
## comp 7  0.14444314              0.9027696
## comp 8  0.09218602              0.5761626
## comp 9  0.08020689              0.5012931
## comp 10 0.06574653              0.4109158
## comp 11 0.05631033              0.3519396
## comp 12 0.04819639              0.3012275
## comp 13 0.03651406              0.2282128
## comp 14 0.03154426              0.1971516
## comp 15 0.02772238              0.1732649
## comp 16 0.02197674              0.1373547
##         cumulative percentage of variance
## comp 1                           59.46426
## comp 2                           70.94238
## comp 3                           80.00106
## comp 4                           87.50062
## comp 5                           93.23354
## comp 6                           96.21971
## comp 7                           97.12248
## comp 8                           97.69864
## comp 9                           98.19993
## comp 10                          98.61085
## comp 11                          98.96279
## comp 12                          99.26402
## comp 13                          99.49223
## comp 14                          99.68938
## comp 15                          99.86265
## comp 16                         100.00000
#Descriptive Stats for PCs
dimdesc(pca1)
## $Dim.1
## $Dim.1$quanti
##                correlation       p.value
## age_margin       0.9051527 1.725999e-309
## age_shape        0.8981288 2.549486e-297
## birads_margin    0.8777307 1.789348e-266
## Margin           0.8647586 1.325292e-249
## birads_shape     0.8614019 1.596551e-245
## Shape            0.8498026 3.309454e-232
## shape_density    0.8486520 6.012753e-231
## shape_margin     0.8371301 6.888295e-219
## age_birads       0.7792566 5.275521e-170
## margin_density   0.7650969 2.885561e-160
## Class            0.7137701 4.786398e-130
## birads_density   0.6936864 5.669078e-120
## age_density      0.6608153 3.635665e-105
## Age              0.6472360  1.394858e-99
## BI_RADS          0.6240130  1.147573e-90
## Density          0.2380871  3.788557e-12
## 
## 
## $Dim.2
## $Dim.2$quanti
##                correlation       p.value
## age_density     0.65153216 2.564356e-101
## Age             0.60861890  3.683379e-85
## age_birads      0.52185252  4.305369e-59
## birads_density  0.26379977  1.152999e-14
## Density         0.23825756  3.653567e-12
## BI_RADS         0.15053613  1.345468e-05
## Class           0.08197549  1.824143e-02
## margin_density -0.13388294  1.104498e-04
## birads_margin  -0.18739477  5.460630e-08
## Margin         -0.25640462  6.527642e-14
## birads_shape   -0.27246623  1.409126e-15
## shape_density  -0.31275583  2.868384e-20
## Shape          -0.37267541  1.037324e-28
## shape_margin   -0.42451122  1.345538e-37
## 
## 
## $Dim.3
## $Dim.3$quanti
##                correlation       p.value
## Density         0.85966501 1.874660e-243
## birads_density  0.47577662  4.719915e-48
## margin_density  0.42644766  5.829272e-38
## age_margin     -0.07291163  3.582355e-02
## Class          -0.08946469  9.960938e-03
## birads_shape   -0.09622983  5.555224e-03
## Shape          -0.14997710  1.449491e-05
## age_birads     -0.22862694  2.716067e-11
## age_shape      -0.25772116  4.813420e-14
## Age            -0.34707472  7.053441e-25
#PCA Loadings
model <- princomp(~.,mammMasses2[1:829,1:12], na.action = na.omit)
model$loadings
## 
## Loadings:
##               Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age            0.985  0.126                                          
## BI_RADS              -0.129         0.587         0.222              
## Shape                -0.385  0.424 -0.294                      -0.124
## Margin               -0.504 -0.443 -0.205                            
## Density                                    0.366  0.245              
## Class                               0.102        -0.247  0.940       
## age_birads           -0.129         0.584  0.105 -0.629 -0.259       
## age_shape            -0.325  0.423 -0.256        -0.304 -0.134  0.541
## age_margin           -0.345 -0.345 -0.111  0.124 -0.261        -0.476
## age_density                                0.889  0.161              
## birads_shape         -0.369  0.437  0.190 -0.106  0.378        -0.427
## birads_margin        -0.424 -0.343  0.239 -0.117  0.314         0.518
##               Comp.9 Comp.10 Comp.11 Comp.12
## Age                                         
## BI_RADS       -0.106 -0.423  -0.324   0.519 
## Shape          0.268          0.431   0.546 
## Margin         0.542         -0.446         
## Density        0.226 -0.692   0.410  -0.324 
## Class                                -0.146 
## age_birads     0.301          0.191  -0.106 
## age_shape     -0.269 -0.266  -0.255  -0.161 
## age_margin    -0.609 -0.160   0.134   0.108 
## age_density           0.340  -0.176   0.152 
## birads_shape          0.190  -0.173  -0.474 
## birads_margin -0.156  0.266   0.396         
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.083  0.083  0.083  0.083  0.083  0.083  0.083  0.083
## Cumulative Var  0.083  0.167  0.250  0.333  0.417  0.500  0.583  0.667
##                Comp.9 Comp.10 Comp.11 Comp.12
## SS loadings     1.000   1.000   1.000   1.000
## Proportion Var  0.083   0.083   0.083   0.083
## Cumulative Var  0.750   0.833   0.917   1.000
#CorrPlot
corrplot(cor(mammMasses2), method = "circle")

#REMOVE THE VARIABLES THAT ARE STRONGLY CORRELATED WITH VARIABLES THAT ARE *NOT*
#ONE OF THEIR DERIVED COUNTERPARTS(SHAPE, DENSITY, MARGIN)
#HELPS SMOOTH ASSUMPTIONS OF INDEPENDENCE FOR NB CLASSIFIER

#REMOVED SHAPE DENSITY AFTER FALSEPOS/NEG ANALYSIS

#Reorder Variables to Reflect Correct Ordering
mammMasses2 <- mammMasses2[c(1,2,7,8,9,10,11,12,13,14,16,6)]
str(mammMasses2)
## 'data.frame':    829 obs. of  12 variables:
##  $ Age           : int  67 58 28 57 76 42 36 60 54 52 ...
##  $ BI_RADS       : int  5 5 4 5 5 3 4 4 4 3 ...
##  $ age_birads    : int  5 4 1 4 5 1 1 3 2 1 ...
##  $ age_shape     : int  4 4 1 1 2 2 2 3 1 3 ...
##  $ age_margin    : int  5 5 1 5 5 1 1 2 2 3 ...
##  $ age_density   : int  4 3 1 3 5 2 1 2 3 3 ...
##  $ birads_shape  : int  3 5 2 2 2 2 3 3 2 3 ...
##  $ birads_margin : int  5 5 2 5 5 1 2 2 2 3 ...
##  $ birads_density: int  3 3 2 3 3 1 1 1 2 1 ...
##  $ shape_margin  : int  2 3 1 2 2 2 2 2 1 2 ...
##  $ margin_density: int  3 3 2 3 3 2 1 1 2 3 ...
##  $ Class         : int  1 1 0 1 1 1 0 0 0 0 ...
#Export mammMasses2 (Adjust Your Working Directory as Needed)
write.csv(mammMasses2, "mammMasses2.csv")

##########################################################################
#Part III: Data Exploration
##########################################################################
#Use Describe from "psych" Package for Summary Statistics
describe(mammMasses2)
##                vars   n  mean    sd median trimmed   mad min max range
## Age               1 829 55.79 14.68     57   56.15 14.83  18  96    78
## BI_RADS           2 829  4.33  0.69      4    4.37  0.00   0   6     6
## age_birads        3 829  3.03  1.42      3    3.04  1.48   1   5     4
## age_shape         4 829  3.02  1.41      3    3.02  1.48   1   5     4
## age_margin        5 829  3.00  1.41      3    3.00  1.48   1   5     4
## age_density       6 829  3.05  1.41      3    3.06  1.48   1   5     4
## birads_shape      7 829  3.49  1.20      3    3.51  1.48   1   5     4
## birads_margin     8 829  3.35  1.34      3    3.35  1.48   1   5     4
## birads_density    9 829  2.25  0.65      2    2.32  0.00   1   3     2
## shape_margin     10 829  2.15  0.73      2    2.19  1.48   1   3     2
## margin_density   11 829  2.39  0.57      2    2.42  0.00   1   3     2
## Class            12 829  0.48  0.50      0    0.48  0.00   0   1     1
##                 skew kurtosis   se
## Age            -0.22    -0.32 0.51
## BI_RADS        -1.61     8.74 0.02
## age_birads     -0.04    -1.30 0.05
## age_shape      -0.03    -1.30 0.05
## age_margin      0.00    -1.30 0.05
## age_density    -0.05    -1.32 0.05
## birads_shape    0.00    -1.30 0.04
## birads_margin   0.08    -1.57 0.05
## birads_density -0.31    -0.74 0.02
## shape_margin   -0.24    -1.10 0.03
## margin_density -0.28    -0.76 0.02
## Class           0.06    -2.00 0.02
#Lots of Histograms (filter these later for what are most useful)
mammMasses2 %>% ggvis(~Age) %>% layer_densities()
mammMasses2 %>% ggvis(~BI_RADS) %>% layer_bars()
mammMasses2 %>% ggvis(~age_birads) %>% layer_bars()
mammMasses2 %>% ggvis(~age_shape) %>% layer_bars()
mammMasses2 %>% ggvis(~age_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~age_density) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_shape) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~birads_density) %>% layer_bars()
mammMasses2 %>% ggvis(~shape_margin) %>% layer_bars()
mammMasses2 %>% ggvis(~margin_density) %>% layer_bars()
##########################################################################
#Part IV: Naive-Bayesian Modeling
##########################################################################

#Call the NaiveBayes() Function on MammMasses2
NBModel <- naiveBayes(Class ~., mammMasses2)
NBModel
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##         0         1 
## 0.5150784 0.4849216 
## 
## Conditional probabilities:
##    Age
## Y       [,1]     [,2]
##   0 49.29742 13.70268
##   1 62.69403 12.35463
## 
##    BI_RADS
## Y       [,1]      [,2]
##   0 3.983607 0.5282739
##   1 4.703980 0.6429623
## 
##    age_birads
## Y       [,1]     [,2]
##   0 2.227166 1.126885
##   1 3.888060 1.166835
## 
##    age_shape
## Y       [,1]     [,2]
##   0 2.189696 1.152294
##   1 3.900498 1.100918
## 
##    age_margin
## Y       [,1]     [,2]
##   0 2.168618 1.162662
##   1 3.890547 1.077061
## 
##    age_density
## Y       [,1]     [,2]
##   0 2.435597 1.275426
##   1 3.694030 1.252793
## 
##    birads_shape
## Y       [,1]      [,2]
##   0 2.805621 0.8866068
##   1 4.211443 1.0605454
## 
##    birads_margin
## Y       [,1]     [,2]
##   0 2.583138 1.054610
##   1 4.164179 1.104473
## 
##    birads_density
## Y       [,1]      [,2]
##   0 1.913349 0.4742028
##   1 2.614428 0.6181711
## 
##    shape_margin
## Y       [,1]      [,2]
##   0 1.791569 0.6686291
##   1 2.532338 0.5825381
## 
##    margin_density
## Y       [,1]      [,2]
##   0 2.133489 0.5249438
##   1 2.659204 0.4951408
#Create a Classification Task For the Model
require(mlr)
task <- makeClassifTask(data = mammMasses2, target = "Class")

#Initialize the NB Classifier
selected_model <- makeLearner("classif.naiveBayes")

#Train the Model:
NBPred <- train(selected_model, task)
NBPred
## Model for learner.id=classif.naiveBayes; learner.class=classif.naiveBayes
## Trained on: task.id = mammMasses2; obs = 829; features = 11
## Hyperparameters:
#Apply Predictive Model to mammMasses2 Without Passing on the Target Variable
predictions_mlr <- as.data.frame(predict(NBPred, newdata = mammMasses2[1:11]))

#Create a Confusion Matrix 
require(caret)
table1 <- table(predictions_mlr[,1], mammMasses2$Class)
table1
##    
##       0   1
##   0 352  64
##   1  75 338
table2 <- prop.table(table1)
table2
##    
##              0          1
##   0 0.42460796 0.07720145
##   1 0.09047045 0.40772014
confusionMatrix(table1)
## Confusion Matrix and Statistics
## 
##    
##       0   1
##   0 352  64
##   1  75 338
##                                           
##                Accuracy : 0.8323          
##                  95% CI : (0.8051, 0.8571)
##     No Information Rate : 0.5151          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.6646          
##  Mcnemar's Test P-Value : 0.3963          
##                                           
##             Sensitivity : 0.8244          
##             Specificity : 0.8408          
##          Pos Pred Value : 0.8462          
##          Neg Pred Value : 0.8184          
##              Prevalence : 0.5151          
##          Detection Rate : 0.4246          
##    Detection Prevalence : 0.5018          
##       Balanced Accuracy : 0.8326          
##                                           
##        'Positive' Class : 0               
## 
#Save the model as an RDS object
#With a new dataset, the RDS model can be re-loaded and used 
#in a new predict() function:
saveRDS(NBPred, file = "initialNaiveBayesModel.rds")
#to restore: readRDS(file = "initialNaiveBayesModel.rds")

##########################################################################
#Part V: Principal Component Analysis, Collinearity Tests, Investigation of Errors
###########################################################################
#Principal  Component Analysis & CorrPlot

pca1 <- PCA(mammMasses2, graph = T)

pca1
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 12 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#PCA Eigenvalues
pca1$eig
##         eigenvalue percentage of variance
## comp 1  7.40580591             61.7150493
## comp 2  1.51329451             12.6107876
## comp 3  1.13373013              9.4477511
## comp 4  0.71656487              5.9713739
## comp 5  0.48299930              4.0249942
## comp 6  0.35894776              2.9912313
## comp 7  0.10456047              0.8713372
## comp 8  0.07813585              0.6511321
## comp 9  0.07413212              0.6177677
## comp 10 0.05548710              0.4623925
## comp 11 0.04705571              0.3921309
## comp 12 0.02928626              0.2440522
##         cumulative percentage of variance
## comp 1                           61.71505
## comp 2                           74.32584
## comp 3                           83.77359
## comp 4                           89.74496
## comp 5                           93.76996
## comp 6                           96.76119
## comp 7                           97.63252
## comp 8                           98.28366
## comp 9                           98.90142
## comp 10                          99.36382
## comp 11                          99.75595
## comp 12                         100.00000
#Descriptive Stats for PCs
dimdesc(pca1)
## $Dim.1
## $Dim.1$quanti
##                correlation       p.value
## age_margin       0.9045089 2.462463e-308
## age_shape        0.8754777 2.075583e-263
## birads_margin    0.8650794 5.327234e-250
## age_birads       0.8483931 1.150708e-230
## birads_shape     0.8226945 4.780083e-205
## shape_margin     0.7765891 4.086616e-168
## margin_density   0.7351575 8.877094e-142
## Class            0.7319167 6.288946e-140
## birads_density   0.7214297 4.028703e-134
## age_density      0.7173997 5.834177e-132
## Age              0.7143900 2.265915e-130
## BI_RADS          0.6719548 5.715066e-110
## 
## 
## $Dim.2
## $Dim.2$quanti
##                correlation       p.value
## Age             0.65173544 2.119139e-101
## age_density     0.61422116  3.958359e-87
## age_birads      0.42228127  3.502068e-37
## birads_density -0.09139484  8.463065e-03
## BI_RADS        -0.16739481  1.255595e-06
## margin_density -0.30915980  8.075185e-20
## birads_shape   -0.33096064  1.218149e-22
## birads_margin  -0.35039693  2.347539e-25
## shape_margin   -0.40395879  6.947770e-34
## 
## 
## $Dim.3
## $Dim.3$quanti
##                correlation       p.value
## BI_RADS         0.66400772 1.603686e-106
## birads_density  0.52008188  1.230822e-58
## age_birads      0.17340011  5.082710e-07
## Class           0.15532157  7.035278e-06
## birads_shape    0.07856782  2.368274e-02
## age_density    -0.07752222  2.561236e-02
## birads_margin  -0.08488172  1.449845e-02
## Age            -0.12162913  4.485386e-04
## age_shape      -0.22465041  6.059748e-11
## age_margin     -0.27488264  7.735424e-16
## margin_density -0.28175084  1.361043e-16
## shape_margin   -0.35854341  1.494855e-26
#PCA Loadings
model <- princomp(~.,mammMasses2[1:829,1:11], na.action = na.omit)
model$loadings
## 
## Loadings:
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age             0.987 -0.119                                          
## BI_RADS                0.183 -0.234  0.461  0.232  0.125  0.108  0.115
## age_birads             0.186 -0.260  0.450        -0.673 -0.339 -0.179
## age_shape              0.385 -0.348 -0.494 -0.204 -0.276         0.584
## age_margin             0.401  0.498               -0.377  0.599 -0.184
## age_density                          0.195 -0.854               -0.229
## birads_shape           0.457 -0.487 -0.130  0.110  0.295  0.265 -0.425
## birads_margin          0.513  0.419  0.179  0.244  0.160 -0.330  0.176
## birads_density         0.166 -0.133  0.418 -0.216  0.322  0.271  0.346
## shape_margin           0.272        -0.271         0.204 -0.426 -0.410
## margin_density         0.184  0.278        -0.207  0.219 -0.260  0.165
##                Comp.9 Comp.10 Comp.11
## Age                                  
## BI_RADS        -0.344  0.389  -0.579 
## age_birads      0.253 -0.132         
## age_shape                            
## age_margin             0.141  -0.119 
## age_density    -0.399                
## birads_shape          -0.428         
## birads_margin  -0.375 -0.120   0.384 
## birads_density  0.514  0.193   0.366 
## shape_margin    0.306  0.604         
## margin_density  0.377 -0.448  -0.599 
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.091  0.091  0.091  0.091  0.091  0.091  0.091  0.091
## Cumulative Var  0.091  0.182  0.273  0.364  0.455  0.545  0.636  0.727
##                Comp.9 Comp.10 Comp.11
## SS loadings     1.000   1.000   1.000
## Proportion Var  0.091   0.091   0.091
## Cumulative Var  0.818   0.909   1.000
#CorrPlot
corrplot(cor(mammMasses2), method = "circle")

#Create mammMasses3

#Append Predictions to Dataset
predictions_mlr[,1]
##   [1] 1 1 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1
##  [36] 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1
##  [71] 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 1
## [106] 1 0 1 0 0 1 0 1 1 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0
## [141] 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0
## [176] 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 1 1 0 1
## [211] 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0 0 0 0
## [246] 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0
## [281] 0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1
## [316] 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1
## [351] 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0
## [386] 0 0 1 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1
## [421] 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1
## [456] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0
## [491] 1 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 1 0
## [526] 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 0 0
## [561] 0 1 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1
## [596] 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 1
## [631] 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0
## [666] 0 0 1 1 1 0 0 1 1 0 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1
## [701] 1 0 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1
## [736] 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 1 1
## [771] 1 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [806] 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
## Levels: 0 1
Preds <- as.integer(predictions_mlr[,1])
Preds <- ifelse(Preds == 2, 1, 0) #evaluates if preds = 2 and returns a boolean. T = re-evaluates to 1. F = 0. 
Preds                             #therefore, all 2s evaluate to 1s and all 1s evaluate to 0
##   [1] 1 1 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1
##  [36] 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1
##  [71] 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 1
## [106] 1 0 1 0 0 1 0 1 1 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0
## [141] 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0
## [176] 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 1 1 0 1
## [211] 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0 0 0 0
## [246] 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0
## [281] 0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1
## [316] 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1
## [351] 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0
## [386] 0 0 1 0 1 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1
## [421] 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1
## [456] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 0 0
## [491] 1 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 1 0
## [526] 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 0 0
## [561] 0 1 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1
## [596] 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 1
## [631] 1 1 1 0 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0
## [666] 0 0 1 1 1 0 0 1 1 0 0 0 0 1 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1
## [701] 1 0 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1
## [736] 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 1 1
## [771] 1 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [806] 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
table(Preds)
## Preds
##   0   1 
## 416 413
mammMasses3 <- cbind(mammMasses2, Preds)
str(mammMasses3)
## 'data.frame':    829 obs. of  13 variables:
##  $ Age           : int  67 58 28 57 76 42 36 60 54 52 ...
##  $ BI_RADS       : int  5 5 4 5 5 3 4 4 4 3 ...
##  $ age_birads    : int  5 4 1 4 5 1 1 3 2 1 ...
##  $ age_shape     : int  4 4 1 1 2 2 2 3 1 3 ...
##  $ age_margin    : int  5 5 1 5 5 1 1 2 2 3 ...
##  $ age_density   : int  4 3 1 3 5 2 1 2 3 3 ...
##  $ birads_shape  : int  3 5 2 2 2 2 3 3 2 3 ...
##  $ birads_margin : int  5 5 2 5 5 1 2 2 2 3 ...
##  $ birads_density: int  3 3 2 3 3 1 1 1 2 1 ...
##  $ shape_margin  : int  2 3 1 2 2 2 2 2 1 2 ...
##  $ margin_density: int  3 3 2 3 3 2 1 1 2 3 ...
##  $ Class         : int  1 1 0 1 1 1 0 0 0 0 ...
##  $ Preds         : num  1 1 0 1 1 0 0 0 0 0 ...
#Create Error Table
#A Subset of MammMasses3 Where Class != Preds
#Copy mammMasses3

errorTable <- mammMasses3

#Filter Out Correct Predictions, Reassign to errortable
errortable2 <- errorTable %>% filter(errorTable$Class != errorTable$Preds)
str(errortable2)
## 'data.frame':    139 obs. of  13 variables:
##  $ Age           : int  42 59 54 55 57 52 74 67 67 68 ...
##  $ BI_RADS       : int  3 4 4 5 4 5 5 5 4 3 ...
##  $ age_birads    : int  1 3 2 4 3 3 5 5 4 2 ...
##  $ age_shape     : int  2 3 1 4 2 4 2 3 5 2 ...
##  $ age_margin    : int  1 2 2 4 5 4 2 4 5 2 ...
##  $ age_density   : int  2 4 3 3 3 3 2 1 4 5 ...
##  $ birads_shape  : int  2 3 2 5 3 5 2 3 4 1 ...
##  $ birads_margin : int  1 2 2 5 5 5 2 5 5 1 ...
##  $ birads_density: int  1 2 2 3 2 3 1 1 2 1 ...
##  $ shape_margin  : int  2 2 1 3 2 3 1 2 3 1 ...
##  $ margin_density: int  2 2 2 3 3 3 1 2 3 2 ...
##  $ Class         : int  1 1 1 0 0 0 1 0 0 1 ...
##  $ Preds         : num  0 0 0 1 1 1 0 1 1 0 ...
describe(errortable2)
##                vars   n  mean    sd median trimmed   mad min max range
## Age               1 139 56.66 11.80     57   56.91 13.34  28  86    58
## BI_RADS           2 139  4.30  0.64      4    4.31  0.00   0   6     6
## age_birads        3 139  3.01  1.17      3    3.01  1.48   1   5     4
## age_shape         4 139  3.18  1.14      3    3.20  1.48   1   5     4
## age_margin        5 139  3.17  1.17      3    3.20  1.48   1   5     4
## age_density       6 139  3.17  1.29      3    3.21  1.48   1   5     4
## birads_shape      7 139  3.50  1.03      4    3.52  1.48   1   5     4
## birads_margin     8 139  3.55  1.23      4    3.58  1.48   1   5     4
## birads_density    9 139  2.21  0.60      2    2.26  0.00   1   3     2
## shape_margin     10 139  2.27  0.69      2    2.34  1.48   1   3     2
## margin_density   11 139  2.50  0.56      3    2.54  0.00   1   3     2
## Class            12 139  0.46  0.50      0    0.45  0.00   0   1     1
## Preds            13 139  0.54  0.50      1    0.55  0.00   0   1     1
##                 skew kurtosis   se
## Age            -0.18    -0.50 1.00
## BI_RADS        -1.82    12.49 0.05
## age_birads     -0.09    -0.85 0.10
## age_shape      -0.12    -0.74 0.10
## age_margin     -0.15    -0.86 0.10
## age_density    -0.14    -1.15 0.11
## birads_shape   -0.13    -0.86 0.09
## birads_margin  -0.21    -1.35 0.10
## birads_density -0.10    -0.46 0.05
## shape_margin   -0.41    -0.89 0.06
## margin_density -0.51    -0.84 0.05
## Class           0.16    -1.99 0.04
## Preds          -0.16    -1.99 0.04
#Re-Classify mammMasses3
#Call the NaiveBayes() Function on MammMasses2
NBModel2 <- naiveBayes(Class ~., mammMasses3)
NBModel2
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##         0         1 
## 0.5150784 0.4849216 
## 
## Conditional probabilities:
##    Age
## Y       [,1]     [,2]
##   0 49.29742 13.70268
##   1 62.69403 12.35463
## 
##    BI_RADS
## Y       [,1]      [,2]
##   0 3.983607 0.5282739
##   1 4.703980 0.6429623
## 
##    age_birads
## Y       [,1]     [,2]
##   0 2.227166 1.126885
##   1 3.888060 1.166835
## 
##    age_shape
## Y       [,1]     [,2]
##   0 2.189696 1.152294
##   1 3.900498 1.100918
## 
##    age_margin
## Y       [,1]     [,2]
##   0 2.168618 1.162662
##   1 3.890547 1.077061
## 
##    age_density
## Y       [,1]     [,2]
##   0 2.435597 1.275426
##   1 3.694030 1.252793
## 
##    birads_shape
## Y       [,1]      [,2]
##   0 2.805621 0.8866068
##   1 4.211443 1.0605454
## 
##    birads_margin
## Y       [,1]     [,2]
##   0 2.583138 1.054610
##   1 4.164179 1.104473
## 
##    birads_density
## Y       [,1]      [,2]
##   0 1.913349 0.4742028
##   1 2.614428 0.6181711
## 
##    shape_margin
## Y       [,1]      [,2]
##   0 1.791569 0.6686291
##   1 2.532338 0.5825381
## 
##    margin_density
## Y       [,1]      [,2]
##   0 2.133489 0.5249438
##   1 2.659204 0.4951408
## 
##    Preds
## Y       [,1]      [,2]
##   0 0.175644 0.3809634
##   1 0.840796 0.3663221
#Create a Classification Task For the Model
require(mlr)
task2 <- makeClassifTask(data = mammMasses3, target = "Class")

#Initialize the NB Classifier
selected_model2 <- makeLearner("classif.naiveBayes")

#Train the Model:
NBPred2 <- train(selected_model2, task2)
NBPred2
## Model for learner.id=classif.naiveBayes; learner.class=classif.naiveBayes
## Trained on: task.id = mammMasses3; obs = 829; features = 12
## Hyperparameters:
#Apply Predictive Model to mammMasses3 Without Passing on the Target Variable
predictions_mlr2 <- as.data.frame(predict(NBPred2, newdata = mammMasses3[1:11]))

#Create a Confusion Matrix 
require(caret)
table3 <- table(predictions_mlr2[,1], mammMasses3$Class)
table3
##    
##       0   1
##   0 352  64
##   1  75 338
table4 <- prop.table(table3)
table4
##    
##              0          1
##   0 0.42460796 0.07720145
##   1 0.09047045 0.40772014
confusionMatrix(table3)
## Confusion Matrix and Statistics
## 
##    
##       0   1
##   0 352  64
##   1  75 338
##                                           
##                Accuracy : 0.8323          
##                  95% CI : (0.8051, 0.8571)
##     No Information Rate : 0.5151          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.6646          
##  Mcnemar's Test P-Value : 0.3963          
##                                           
##             Sensitivity : 0.8244          
##             Specificity : 0.8408          
##          Pos Pred Value : 0.8462          
##          Neg Pred Value : 0.8184          
##              Prevalence : 0.5151          
##          Detection Rate : 0.4246          
##    Detection Prevalence : 0.5018          
##       Balanced Accuracy : 0.8326          
##                                           
##        'Positive' Class : 0               
## 
#Repeat CorrPlot and PCA for mammMasses3

pca2 <- PCA(mammMasses3, graph = T)

pca2
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 829 individuals, described by 13 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#PCA Eigenvalues
pca2$eig
##         eigenvalue percentage of variance
## comp 1  8.19227698             63.0175152
## comp 2  1.51967894             11.6898380
## comp 3  1.13373023              8.7210018
## comp 4  0.71664498              5.5126537
## comp 5  0.48544256              3.7341736
## comp 6  0.36086399              2.7758769
## comp 7  0.21802267              1.6770974
## comp 8  0.09281985              0.7139989
## comp 9  0.07805217              0.6004013
## comp 10 0.07261489              0.5585761
## comp 11 0.05386614              0.4143550
## comp 12 0.04670883              0.3592987
## comp 13 0.02927776              0.2252135
##         cumulative percentage of variance
## comp 1                           63.01752
## comp 2                           74.70735
## comp 3                           83.42835
## comp 4                           88.94101
## comp 5                           92.67518
## comp 6                           95.45106
## comp 7                           97.12816
## comp 8                           97.84216
## comp 9                           98.44256
## comp 10                          99.00113
## comp 11                          99.41549
## comp 12                          99.77479
## comp 13                         100.00000
#Descriptive Stats for PCs
dimdesc(pca2)
## $Dim.1
## $Dim.1$quanti
##                correlation       p.value
## age_margin       0.9057727 1.310226e-310
## Preds            0.8987110 2.704215e-298
## age_shape        0.8757269 9.575348e-264
## birads_margin    0.8682581 5.613188e-254
## age_birads       0.8437193 1.151611e-225
## birads_shape     0.8243256 1.511704e-206
## shape_margin     0.7764800 4.876520e-168
## margin_density   0.7363932 1.720095e-142
## Class            0.7338906 4.729922e-141
## birads_density   0.7202396 1.767090e-133
## age_density      0.7078561 5.438749e-127
## Age              0.7047593 2.018494e-125
## BI_RADS          0.6721192 4.836705e-110
## 
## 
## $Dim.2
## $Dim.2$quanti
##                correlation       p.value
## Age             0.66086311 3.470493e-105
## age_density     0.62374653  1.437848e-90
## age_birads      0.43175082  5.735273e-39
## Preds          -0.08082056  1.994868e-02
## birads_density -0.08191164  1.833237e-02
## BI_RADS        -0.15896109  4.240490e-06
## margin_density -0.29951534  1.208984e-18
## birads_shape   -0.32058946  2.863134e-21
## birads_margin  -0.34026032  6.465781e-24
## shape_margin   -0.39275273  5.750172e-32
## 
## 
## $Dim.3
## $Dim.3$quanti
##                correlation       p.value
## BI_RADS         0.66396423 1.673764e-106
## birads_density  0.52003765  1.263442e-58
## age_birads      0.17339321  5.088084e-07
## Class           0.15529743  7.058672e-06
## birads_shape    0.07850660  2.379214e-02
## age_density    -0.07752021  2.561618e-02
## birads_margin  -0.08494221  1.442832e-02
## Age            -0.12162179  4.488988e-04
## age_shape      -0.22467958  6.024513e-11
## age_margin     -0.27491468  7.673831e-16
## margin_density -0.28180949  1.340715e-16
## shape_margin   -0.35861493  1.458627e-26
#PCA Loadings
model <- princomp(~.,mammMasses3[1:829,1:11], na.action = na.omit)
model$loadings
## 
## Loadings:
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Age             0.987 -0.119                                          
## BI_RADS                0.183 -0.234  0.461  0.232  0.125  0.108  0.115
## age_birads             0.186 -0.260  0.450        -0.673 -0.339 -0.179
## age_shape              0.385 -0.348 -0.494 -0.204 -0.276         0.584
## age_margin             0.401  0.498               -0.377  0.599 -0.184
## age_density                          0.195 -0.854               -0.229
## birads_shape           0.457 -0.487 -0.130  0.110  0.295  0.265 -0.425
## birads_margin          0.513  0.419  0.179  0.244  0.160 -0.330  0.176
## birads_density         0.166 -0.133  0.418 -0.216  0.322  0.271  0.346
## shape_margin           0.272        -0.271         0.204 -0.426 -0.410
## margin_density         0.184  0.278        -0.207  0.219 -0.260  0.165
##                Comp.9 Comp.10 Comp.11
## Age                                  
## BI_RADS        -0.344  0.389  -0.579 
## age_birads      0.253 -0.132         
## age_shape                            
## age_margin             0.141  -0.119 
## age_density    -0.399                
## birads_shape          -0.428         
## birads_margin  -0.375 -0.120   0.384 
## birads_density  0.514  0.193   0.366 
## shape_margin    0.306  0.604         
## margin_density  0.377 -0.448  -0.599 
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.091  0.091  0.091  0.091  0.091  0.091  0.091  0.091
## Cumulative Var  0.091  0.182  0.273  0.364  0.455  0.545  0.636  0.727
##                Comp.9 Comp.10 Comp.11
## SS loadings     1.000   1.000   1.000
## Proportion Var  0.091   0.091   0.091
## Cumulative Var  0.818   0.909   1.000
#CorrPlot
corrplot(cor(mammMasses3), method = "circle")

#Make errortable2 Into False Positives and False Negatives:
falsePos <- errortable2 %>% filter(errortable2$Class == 0, errortable2$Preds == 1)
falseNeg <- errortable2 %>% filter(errortable2$Class == 1, errortable2$Preds == 0)
str(falsePos)
## 'data.frame':    75 obs. of  13 variables:
##  $ Age           : int  55 57 52 67 67 62 59 84 33 86 ...
##  $ BI_RADS       : int  5 4 5 5 4 5 5 5 5 5 ...
##  $ age_birads    : int  4 3 3 5 4 4 4 5 1 5 ...
##  $ age_shape     : int  4 2 4 3 5 4 4 5 3 5 ...
##  $ age_margin    : int  4 5 4 4 5 4 4 5 3 5 ...
##  $ age_density   : int  3 3 3 1 4 4 1 5 1 5 ...
##  $ birads_shape  : int  5 3 5 3 4 5 5 5 5 5 ...
##  $ birads_margin : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ birads_density: int  3 2 3 1 2 3 1 3 3 3 ...
##  $ shape_margin  : int  3 2 3 2 3 3 3 3 3 3 ...
##  $ margin_density: int  3 3 3 2 3 3 2 3 3 3 ...
##  $ Class         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Preds         : num  1 1 1 1 1 1 1 1 1 1 ...
str(falseNeg)
## 'data.frame':    64 obs. of  13 variables:
##  $ Age           : int  42 59 54 74 68 68 41 43 43 54 ...
##  $ BI_RADS       : int  3 4 4 5 3 4 4 4 5 5 ...
##  $ age_birads    : int  1 3 2 5 2 4 1 2 2 4 ...
##  $ age_shape     : int  2 3 1 2 2 2 1 3 2 2 ...
##  $ age_margin    : int  1 2 2 2 2 2 1 3 1 2 ...
##  $ age_density   : int  2 4 3 2 5 5 2 2 2 3 ...
##  $ birads_shape  : int  2 3 2 2 1 2 2 4 3 3 ...
##  $ birads_margin : int  1 2 2 2 1 2 2 3 2 2 ...
##  $ birads_density: int  1 2 2 1 1 2 2 2 3 3 ...
##  $ shape_margin  : int  2 2 1 1 1 1 1 2 2 2 ...
##  $ margin_density: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ Class         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Preds         : num  0 0 0 0 0 0 0 0 0 0 ...
#Get Summary Statistics and Histograms for False Pos and False Neg Tables
describe(falsePos)
##                vars  n  mean   sd median trimmed  mad min max range  skew
## Age               1 75 60.71 9.50     60   60.67 8.90  33  86    53 -0.01
## BI_RADS           2 75  4.44 0.53      4    4.41 0.00   4   6     2  0.51
## age_birads        3 75  3.52 0.94      3    3.54 1.48   1   5     4 -0.11
## age_shape         4 75  3.85 0.87      4    3.90 1.48   2   5     3 -0.34
## age_margin        5 75  3.96 0.80      4    4.00 1.48   2   5     3 -0.41
## age_density       6 75  3.61 1.08      4    3.69 1.48   1   5     4 -0.55
## birads_shape      7 75  4.01 0.81      4    4.05 1.48   2   5     3 -0.32
## birads_margin     8 75  4.27 0.84      4    4.38 1.48   2   5     3 -0.92
## birads_density    9 75  2.37 0.54      2    2.38 0.00   1   3     2  0.00
## shape_margin     10 75  2.61 0.52      3    2.66 0.00   1   3     2 -0.74
## margin_density   11 75  2.81 0.39      3    2.89 0.00   2   3     1 -1.58
## Class            12 75  0.00 0.00      0    0.00 0.00   0   0     0   NaN
## Preds            13 75  1.00 0.00      1    1.00 0.00   1   1     0   NaN
##                kurtosis   se
## Age                0.48 1.10
## BI_RADS           -1.16 0.06
## age_birads        -0.49 0.11
## age_shape         -0.62 0.10
## age_margin        -0.34 0.09
## age_density       -0.39 0.12
## birads_shape      -0.76 0.09
## birads_margin      0.04 0.10
## birads_density    -1.02 0.06
## shape_margin      -0.83 0.06
## margin_density     0.49 0.05
## Class               NaN 0.00
## Preds               NaN 0.00
describe(falseNeg)
##                vars  n  mean    sd median trimmed   mad min max range
## Age               1 64 51.92 12.53     50   51.77 13.34  28  74    46
## BI_RADS           2 64  4.14  0.73      4    4.19  0.00   0   5     5
## age_birads        3 64  2.41  1.14      2    2.35  1.48   1   5     4
## age_shape         4 64  2.39  0.88      2    2.38  1.48   1   5     4
## age_margin        5 64  2.25  0.80      2    2.27  1.48   1   4     3
## age_density       6 64  2.66  1.34      2    2.58  1.48   1   5     4
## birads_shape      7 64  2.91  0.94      3    2.85  1.48   1   5     4
## birads_margin     8 64  2.70  1.06      2    2.58  0.00   1   5     4
## birads_density    9 64  2.02  0.60      2    2.02  0.00   1   3     2
## shape_margin     10 64  1.88  0.65      2    1.85  0.00   1   3     2
## margin_density   11 64  2.14  0.50      2    2.13  0.00   1   3     2
## Class            12 64  1.00  0.00      1    1.00  0.00   1   1     0
## Preds            13 64  0.00  0.00      0    0.00  0.00   0   0     0
##                 skew kurtosis   se
## Age             0.16    -1.12 1.57
## BI_RADS        -2.61    13.77 0.09
## age_birads      0.39    -0.88 0.14
## age_shape       0.12    -0.14 0.11
## age_margin     -0.09    -0.84 0.10
## age_density     0.48    -1.02 0.17
## birads_shape    0.41    -0.31 0.12
## birads_margin   0.83    -0.28 0.13
## birads_density -0.01    -0.30 0.08
## shape_margin    0.13    -0.74 0.08
## margin_density  0.27     0.48 0.06
## Class            NaN      NaN 0.00
## Preds            NaN      NaN 0.00
falsePos %>% ggvis(~Age) %>% layer_densities()
falsePos %>% ggvis(~Age) %>% layer_densities()
falsePos %>% ggvis(~BI_RADS) %>% layer_bars()
falsePos %>% ggvis(~BI_RADS) %>% layer_bars()
falsePos %>% ggvis(~age_birads) %>% layer_bars()
falsePos %>% ggvis(~age_birads) %>% layer_bars()
falsePos %>% ggvis(~age_shape) %>% layer_bars()
falsePos %>% ggvis(~age_shape) %>% layer_bars()
falsePos %>% ggvis(~age_density) %>% layer_bars()
falsePos %>% ggvis(~age_density) %>% layer_bars()
falsePos %>% ggvis(~birads_shape) %>% layer_bars()
falsePos %>% ggvis(~birads_shape) %>% layer_bars()
falsePos %>% ggvis(~birads_margin) %>% layer_bars()
falsePos %>% ggvis(~birads_margin) %>% layer_bars()
falsePos %>% ggvis(~birads_density) %>% layer_bars()
falsePos %>% ggvis(~birads_density) %>% layer_bars()
falsePos %>% ggvis(~shape_margin) %>% layer_bars()
falsePos %>% ggvis(~shape_margin) %>% layer_bars()
falsePos %>% ggvis(~margin_density) %>% layer_bars()
falsePos %>% ggvis(~margin_density) %>% layer_bars()