#2 Go Through and run every algorithm
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
data("iris")
summary(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("scale"))
print(preprocessParams)
## Created from 150 samples and 4 variables
##
## Pre-processing:
## - ignored (0)
## - scaled (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :5.193 Min. : 4.589 Min. :0.5665 Min. :0.1312
## 1st Qu.:6.159 1st Qu.: 6.424 1st Qu.:0.9064 1st Qu.:0.3936
## Median :7.004 Median : 6.883 Median :2.4642 Median :1.7055
## Mean :7.057 Mean : 7.014 Mean :2.1288 Mean :1.5734
## 3rd Qu.:7.729 3rd Qu.: 7.571 3rd Qu.:2.8890 3rd Qu.:2.3615
## Max. :9.540 Max. :10.095 Max. :3.9087 Max. :3.2798
library(caret)
data("iris")
summary(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("center"))
print(preprocessParams)
## Created from 150 samples and 4 variables
##
## Pre-processing:
## - centered (4)
## - ignored (0)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :-1.54333 Min. :-1.05733 Min. :-2.758 Min. :-1.0993
## 1st Qu.:-0.74333 1st Qu.:-0.25733 1st Qu.:-2.158 1st Qu.:-0.8993
## Median :-0.04333 Median :-0.05733 Median : 0.592 Median : 0.1007
## Mean : 0.00000 Mean : 0.00000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 0.55667 3rd Qu.: 0.24267 3rd Qu.: 1.342 3rd Qu.: 0.6007
## Max. : 2.05667 Max. : 1.34267 Max. : 3.142 Max. : 1.3007
library(caret)
data("iris")
summary(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("center","scale"))
print(preprocessParams)
## Created from 150 samples and 4 variables
##
## Pre-processing:
## - centered (4)
## - ignored (0)
## - scaled (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :-1.86378 Min. :-2.4258 Min. :-1.5623 Min. :-1.4422
## 1st Qu.:-0.89767 1st Qu.:-0.5904 1st Qu.:-1.2225 1st Qu.:-1.1799
## Median :-0.05233 Median :-0.1315 Median : 0.3354 Median : 0.1321
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.67225 3rd Qu.: 0.5567 3rd Qu.: 0.7602 3rd Qu.: 0.7880
## Max. : 2.48370 Max. : 3.0805 Max. : 1.7799 Max. : 1.7064
library(caret)
data("iris")
summary(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("range"))
print(preprocessParams)
## Created from 150 samples and 4 variables
##
## Pre-processing:
## - ignored (0)
## - re-scaling to [0, 1] (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.2222 1st Qu.:0.3333 1st Qu.:0.1017 1st Qu.:0.08333
## Median :0.4167 Median :0.4167 Median :0.5678 Median :0.50000
## Mean :0.4287 Mean :0.4406 Mean :0.4675 Mean :0.45806
## 3rd Qu.:0.5833 3rd Qu.:0.5417 3rd Qu.:0.6949 3rd Qu.:0.70833
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.3.3
library(caret)
data(PimaIndiansDiabetes)
summary(PimaIndiansDiabetes[,7:8])
## pedigree age
## Min. :0.0780 Min. :21.00
## 1st Qu.:0.2437 1st Qu.:24.00
## Median :0.3725 Median :29.00
## Mean :0.4719 Mean :33.24
## 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :2.4200 Max. :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,7:8], method=c("BoxCox"))
print(preprocessParams)
## Created from 768 samples and 2 variables
##
## Pre-processing:
## - Box-Cox transformation (2)
## - ignored (0)
##
## Lambda estimates for Box-Cox transformation:
## -0.1, -1.1
transformed<-predict(preprocessParams,PimaIndiansDiabetes[,7:8])
summary(transformed)
## pedigree age
## Min. :-2.5510 Min. :0.8772
## 1st Qu.:-1.4116 1st Qu.:0.8815
## Median :-0.9875 Median :0.8867
## Mean :-0.9599 Mean :0.8874
## 3rd Qu.:-0.4680 3rd Qu.:0.8938
## Max. : 0.8838 Max. :0.9019
library(mlbench)
library(caret)
data(PimaIndiansDiabetes)
summary(PimaIndiansDiabetes[,7:8])
## pedigree age
## Min. :0.0780 Min. :21.00
## 1st Qu.:0.2437 1st Qu.:24.00
## Median :0.3725 Median :29.00
## Mean :0.4719 Mean :33.24
## 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :2.4200 Max. :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,7:8], method=c("YeoJohnson"))
print(preprocessParams)
## Created from 768 samples and 2 variables
##
## Pre-processing:
## - ignored (0)
## - Yeo-Johnson transformation (2)
##
## Lambda estimates for Yeo-Johnson transformation:
## -2.25, -1.15
transformed<-predict(preprocessParams,PimaIndiansDiabetes[,7:8])
summary(transformed)
## pedigree age
## Min. :0.0691 Min. :0.8450
## 1st Qu.:0.1724 1st Qu.:0.8484
## Median :0.2265 Median :0.8524
## Mean :0.2317 Mean :0.8530
## 3rd Qu.:0.2956 3rd Qu.:0.8580
## Max. :0.4164 Max. :0.8644
library(mlbench)
data("iris")
preprocessParams<-preProcess(iris,method = c("center","scale","pca"))
print(preprocessParams)
## Created from 150 samples and 5 variables
##
## Pre-processing:
## - centered (4)
## - ignored (1)
## - principal component signal extraction (4)
## - scaled (4)
##
## PCA needed 2 components to capture 95 percent of the variance
transformed<-predict(preprocessParams,iris)
transformed
## Species PC1 PC2
## 1 setosa -2.25714118 -0.478423832
## 2 setosa -2.07401302 0.671882687
## 3 setosa -2.35633511 0.340766425
## 4 setosa -2.29170679 0.595399863
## 5 setosa -2.38186270 -0.644675659
## 6 setosa -2.06870061 -1.484205297
## 7 setosa -2.43586845 -0.047485118
## 8 setosa -2.22539189 -0.222403002
## 9 setosa -2.32684533 1.111603700
## 10 setosa -2.17703491 0.467447569
## 11 setosa -2.15907699 -1.040205867
## 12 setosa -2.31836413 -0.132633999
## 13 setosa -2.21104370 0.726243183
## 14 setosa -2.62430902 0.958296347
## 15 setosa -2.19139921 -1.853846555
## 16 setosa -2.25466121 -2.677315230
## 17 setosa -2.20021676 -1.478655729
## 18 setosa -2.18303613 -0.487206131
## 19 setosa -1.89223284 -1.400327567
## 20 setosa -2.33554476 -1.124083597
## 21 setosa -1.90793125 -0.407490576
## 22 setosa -2.19964383 -0.921035871
## 23 setosa -2.76508142 -0.456813301
## 24 setosa -1.81259716 -0.085272854
## 25 setosa -2.21972701 -0.136796175
## 26 setosa -1.94532930 0.623529705
## 27 setosa -2.04430277 -0.241354991
## 28 setosa -2.16133650 -0.525389422
## 29 setosa -2.13241965 -0.312172005
## 30 setosa -2.25769799 0.336604248
## 31 setosa -2.13297647 0.502856075
## 32 setosa -1.82547925 -0.422280389
## 33 setosa -2.60621687 -1.787587272
## 34 setosa -2.43800983 -2.143546796
## 35 setosa -2.10292986 0.458665270
## 36 setosa -2.20043723 0.205419224
## 37 setosa -2.03831765 -0.659349230
## 38 setosa -2.51889339 -0.590315163
## 39 setosa -2.42152026 0.901161067
## 40 setosa -2.16246625 -0.267981199
## 41 setosa -2.27884081 -0.440240541
## 42 setosa -1.85191836 2.329610745
## 43 setosa -2.54511203 0.477501017
## 44 setosa -1.95788857 -0.470749613
## 45 setosa -2.12992356 -1.138415464
## 46 setosa -2.06283361 0.708678586
## 47 setosa -2.37677076 -1.116688691
## 48 setosa -2.38638171 0.384957230
## 49 setosa -2.22200263 -0.994627669
## 50 setosa -2.19647504 -0.009185585
## 51 versicolor 1.09810244 -0.860091033
## 52 versicolor 0.72889556 -0.592629362
## 53 versicolor 1.23683580 -0.614239894
## 54 versicolor 0.40612251 1.748546197
## 55 versicolor 1.07188379 0.207725147
## 56 versicolor 0.38738955 0.591302717
## 57 versicolor 0.74403715 -0.770438272
## 58 versicolor -0.48569562 1.846243998
## 59 versicolor 0.92480346 -0.032118478
## 60 versicolor 0.01138804 1.030565784
## 61 versicolor -0.10982834 2.645211115
## 62 versicolor 0.43922201 0.063083852
## 63 versicolor 0.56023148 1.758832129
## 64 versicolor 0.71715934 0.185602819
## 65 versicolor -0.03324333 0.437537419
## 66 versicolor 0.87248429 -0.507364239
## 67 versicolor 0.34908221 0.195656268
## 68 versicolor 0.15827980 0.789451008
## 69 versicolor 1.22100316 1.616827281
## 70 versicolor 0.16436725 1.298259939
## 71 versicolor 0.73521959 -0.395247446
## 72 versicolor 0.47469691 0.415926887
## 73 versicolor 1.23005729 0.930209441
## 74 versicolor 0.63074514 0.414997441
## 75 versicolor 0.70031506 0.063200094
## 76 versicolor 0.87135454 -0.249956017
## 77 versicolor 1.25231375 0.076998069
## 78 versicolor 1.35386953 -0.330205463
## 79 versicolor 0.66258066 0.225173502
## 80 versicolor -0.04012419 1.055183583
## 81 versicolor 0.13035846 1.557055553
## 82 versicolor 0.02337438 1.567225244
## 83 versicolor 0.24073180 0.774661195
## 84 versicolor 1.05755171 0.631726901
## 85 versicolor 0.22323093 0.286812663
## 86 versicolor 0.42770626 -0.842758920
## 87 versicolor 1.04522645 -0.520308714
## 88 versicolor 1.04104379 1.378371048
## 89 versicolor 0.06935597 0.218770433
## 90 versicolor 0.28253073 1.324886147
## 91 versicolor 0.27814596 1.116288852
## 92 versicolor 0.62248441 -0.024839814
## 93 versicolor 0.33540673 0.985103828
## 94 versicolor -0.36097409 2.012495825
## 95 versicolor 0.28762268 0.852873116
## 96 versicolor 0.09105561 0.180587142
## 97 versicolor 0.22695654 0.383634868
## 98 versicolor 0.57446378 0.154356489
## 99 versicolor -0.44617230 1.538637456
## 100 versicolor 0.25587339 0.596852285
## 101 virginica 1.83841002 -0.867515056
## 102 virginica 1.15401555 0.696536401
## 103 virginica 2.19790361 -0.560133976
## 104 virginica 1.43534213 0.046830701
## 105 virginica 1.86157577 -0.294059697
## 106 virginica 2.74268509 -0.797736709
## 107 virginica 0.36579225 1.556289178
## 108 virginica 2.29475181 -0.418663020
## 109 virginica 1.99998633 0.709063226
## 110 virginica 2.25223216 -1.914596301
## 111 virginica 1.35962064 -0.690443405
## 112 virginica 1.59732747 0.420292431
## 113 virginica 1.87761053 -0.417849815
## 114 virginica 1.25590769 1.158379741
## 115 virginica 1.46274487 0.440794883
## 116 virginica 1.58476820 -0.673986887
## 117 virginica 1.46651849 -0.254768327
## 118 virginica 2.41822770 -2.548124795
## 119 virginica 3.29964148 -0.017721580
## 120 virginica 1.25954707 1.701046715
## 121 virginica 2.03091256 -0.907427443
## 122 virginica 0.97471535 0.569855257
## 123 virginica 2.88797650 -0.412259950
## 124 virginica 1.32878064 0.480202496
## 125 virginica 1.69505530 -1.010536476
## 126 virginica 1.94780139 -1.004412720
## 127 virginica 1.17118007 0.315338060
## 128 virginica 1.01754169 -0.064131184
## 129 virginica 1.78237879 0.186735633
## 130 virginica 1.85742501 -0.560413289
## 131 virginica 2.42782030 -0.258418706
## 132 virginica 2.29723178 -2.617554417
## 133 virginica 1.85648383 0.177953334
## 134 virginica 1.11042770 0.291944582
## 135 virginica 1.19845835 0.808606364
## 136 virginica 2.78942561 -0.853942542
## 137 virginica 1.57099294 -1.065013214
## 138 virginica 1.34179696 -0.421020154
## 139 virginica 0.92173701 -0.017165594
## 140 virginica 1.84586124 -0.673870645
## 141 virginica 2.00808316 -0.611835930
## 142 virginica 1.89543421 -0.687273065
## 143 virginica 1.15401555 0.696536401
## 144 virginica 2.03374499 -0.864624030
## 145 virginica 1.99147547 -1.045665670
## 146 virginica 1.86425786 -0.385674038
## 147 virginica 1.55935649 0.893692855
## 148 virginica 1.51609145 -0.268170747
## 149 virginica 1.36820418 -1.007877934
## 150 virginica 0.95744849 0.024250427
library(fastICA)
## Warning: package 'fastICA' was built under R version 4.3.3
library(mlbench)
library(caret)
data("PimaIndiansDiabetes")
summary(PimaIndiansDiabetes[,1:8])
## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.: 0.00
## Median : 3.000 Median :117.0 Median : 72.00 Median :23.00
## Mean : 3.845 Mean :120.9 Mean : 69.11 Mean :20.54
## 3rd Qu.: 6.000 3rd Qu.:140.2 3rd Qu.: 80.00 3rd Qu.:32.00
## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00
## insulin mass pedigree age
## Min. : 0.0 Min. : 0.00 Min. :0.0780 Min. :21.00
## 1st Qu.: 0.0 1st Qu.:27.30 1st Qu.:0.2437 1st Qu.:24.00
## Median : 30.5 Median :32.00 Median :0.3725 Median :29.00
## Mean : 79.8 Mean :31.99 Mean :0.4719 Mean :33.24
## 3rd Qu.:127.2 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,1:8],method = c("center","scale","ica"),n.comp=5)
print(preprocessParams)
## Created from 768 samples and 8 variables
##
## Pre-processing:
## - centered (8)
## - independent component signal extraction (8)
## - ignored (0)
## - scaled (8)
##
## ICA used 5 components
transformed <- predict(preprocessParams, PimaIndiansDiabetes[,1:8])
summary(transformed)
## ICA1 ICA2 ICA3 ICA4
## Min. :-3.2241 Min. :-3.0656 Min. :-1.5766 Min. :-4.17293
## 1st Qu.:-0.6488 1st Qu.:-0.7690 1st Qu.:-0.6840 1st Qu.:-0.59446
## Median :-0.1382 Median : 0.2781 Median :-0.2598 Median :-0.02392
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.4702 3rd Qu.: 0.8442 3rd Qu.: 0.4291 3rd Qu.: 0.48274
## Max. : 5.5375 Max. : 1.4112 Max. : 6.0216 Max. : 4.89673
## ICA5
## Min. :-2.93288
## 1st Qu.:-0.72097
## Median :-0.07233
## Mean : 0.00000
## 3rd Qu.: 0.73693
## Max. : 2.38176
#3 List Which functions are being used? #preProcess, predict
#4.List which package contains each function? #Caret, mlbench
#5 Describe what each algorithm/Code/model is doing or trying to do? #Scale Data-calculate the standard deviation and divide attribute #Center Data-calculate the mean and substract from attribute #Standardize Data-combination of scale and Center. Attribute has mean=0, Sd=1 #Normalize Data-scale data between [0,1] #Box-Cox Transform-reduces skewness of Gaussian-like data, makes more Gaussian #Yeo-Johnson-transform power transform but supports zero and negative values #Principal Component Analysis Transform- return principal components, Uncorrelated #Independent Component Analysis Transform-returns independent components
#6 Whats is standardization-returns, mean=0, sd=1, What is normalization? Scales #between [0,1]
#7 Name pre-processing methods:instance based, regression
#8 List all transform methodes: data scaling, data center, data standardizaton # data normalization, box-cox, yeo-johnson, PCA and ICA
#9 Which transform method is supposedly the best? Depends on the data
#10 Explain what each transform does? # Box-Cox-Makes more Gaussian # Yeo-Johnson-supports raw values equal to zero and negatives # expotrans-apply power transform like Box-Cox, Yeo-Johnson # zv-remove attributes with zero variance # nzv-remove attibutes with near zero variance # center-subtract means from values # scale-divide values by Sd # range-normalizes values # pca-useful in linear and generalized linear regressions # ica-retains independent components useful in Naive Bayes # spatialSign-project data onto unit circle
#11 These data transform are likely to be useful for which algorithm code models? # Box-Cox-Makes more Gaussian # Yeo-Johnson-supports raw values equal to zero and negatives # expotrans-apply power transform like Box-Cox, Yeo-Johnson # zv-remove attributes with zero variance # nzv-remove attibutes with near zero variance # center-subtract means from values # scale-divide values by Sd # range-normalizes values # pca-useful in linear and generalized linear regressions # ica-retains independent components useful in Naive Bayes # spatialSign-project data onto unit circle
#12 Define Scaling-divides values by Standard Deviation
#13 What does center transform do? Calculate the mean and substracts from each value.
#14 What is a gaussian like distribution? distribution about the mean, values near #the mean occur more frequent than those far from the mean.
#15 Study Pima Indians Diabetes, Iris # Characteristics-Pima observe pregnant ladies, glucose, Blood pressure, skin thickness # age,BMI, whether they were on insulin # Characteristics-Iris
#16 Explain PCA and ICA. PCA returns only principal components. Keeps values above the # variance threshold. ICA returns only independent components, number of components # must be specified
#17 Explain pre-processing? Preparing the data to fit learning algorithm for analysis.