1.Read all pages in this packet in its entirety

Completed

#2 Go Through and run every algorithm

library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
data("iris")

summary(iris[,1:4])
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("scale"))

print(preprocessParams)
## Created from 150 samples and 4 variables
## 
## Pre-processing:
##   - ignored (0)
##   - scaled (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
##   Sepal.Length    Sepal.Width      Petal.Length     Petal.Width    
##  Min.   :5.193   Min.   : 4.589   Min.   :0.5665   Min.   :0.1312  
##  1st Qu.:6.159   1st Qu.: 6.424   1st Qu.:0.9064   1st Qu.:0.3936  
##  Median :7.004   Median : 6.883   Median :2.4642   Median :1.7055  
##  Mean   :7.057   Mean   : 7.014   Mean   :2.1288   Mean   :1.5734  
##  3rd Qu.:7.729   3rd Qu.: 7.571   3rd Qu.:2.8890   3rd Qu.:2.3615  
##  Max.   :9.540   Max.   :10.095   Max.   :3.9087   Max.   :3.2798
library(caret)
data("iris")

summary(iris[,1:4])
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("center"))

print(preprocessParams)
## Created from 150 samples and 4 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (0)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
##   Sepal.Length       Sepal.Width        Petal.Length     Petal.Width     
##  Min.   :-1.54333   Min.   :-1.05733   Min.   :-2.758   Min.   :-1.0993  
##  1st Qu.:-0.74333   1st Qu.:-0.25733   1st Qu.:-2.158   1st Qu.:-0.8993  
##  Median :-0.04333   Median :-0.05733   Median : 0.592   Median : 0.1007  
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.000   Mean   : 0.0000  
##  3rd Qu.: 0.55667   3rd Qu.: 0.24267   3rd Qu.: 1.342   3rd Qu.: 0.6007  
##  Max.   : 2.05667   Max.   : 1.34267   Max.   : 3.142   Max.   : 1.3007
library(caret)
data("iris")

summary(iris[,1:4])
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("center","scale"))

print(preprocessParams)
## Created from 150 samples and 4 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (0)
##   - scaled (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
##   Sepal.Length       Sepal.Width       Petal.Length      Petal.Width     
##  Min.   :-1.86378   Min.   :-2.4258   Min.   :-1.5623   Min.   :-1.4422  
##  1st Qu.:-0.89767   1st Qu.:-0.5904   1st Qu.:-1.2225   1st Qu.:-1.1799  
##  Median :-0.05233   Median :-0.1315   Median : 0.3354   Median : 0.1321  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.67225   3rd Qu.: 0.5567   3rd Qu.: 0.7602   3rd Qu.: 0.7880  
##  Max.   : 2.48370   Max.   : 3.0805   Max.   : 1.7799   Max.   : 1.7064
library(caret)
data("iris")

summary(iris[,1:4])
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500
preprocessParams <- preProcess(iris[,1:4],method = c("range"))

print(preprocessParams)
## Created from 150 samples and 4 variables
## 
## Pre-processing:
##   - ignored (0)
##   - re-scaling to [0, 1] (4)
transformed <- predict(preprocessParams, iris[,1:4])
summary(transformed)
##   Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
##  Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
##  Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
##  3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.3.3
library(caret)

data(PimaIndiansDiabetes)
summary(PimaIndiansDiabetes[,7:8])
##     pedigree           age       
##  Min.   :0.0780   Min.   :21.00  
##  1st Qu.:0.2437   1st Qu.:24.00  
##  Median :0.3725   Median :29.00  
##  Mean   :0.4719   Mean   :33.24  
##  3rd Qu.:0.6262   3rd Qu.:41.00  
##  Max.   :2.4200   Max.   :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,7:8], method=c("BoxCox"))

print(preprocessParams)
## Created from 768 samples and 2 variables
## 
## Pre-processing:
##   - Box-Cox transformation (2)
##   - ignored (0)
## 
## Lambda estimates for Box-Cox transformation:
## -0.1, -1.1
transformed<-predict(preprocessParams,PimaIndiansDiabetes[,7:8])
summary(transformed)
##     pedigree            age        
##  Min.   :-2.5510   Min.   :0.8772  
##  1st Qu.:-1.4116   1st Qu.:0.8815  
##  Median :-0.9875   Median :0.8867  
##  Mean   :-0.9599   Mean   :0.8874  
##  3rd Qu.:-0.4680   3rd Qu.:0.8938  
##  Max.   : 0.8838   Max.   :0.9019
library(mlbench)
library(caret)

data(PimaIndiansDiabetes)
summary(PimaIndiansDiabetes[,7:8])
##     pedigree           age       
##  Min.   :0.0780   Min.   :21.00  
##  1st Qu.:0.2437   1st Qu.:24.00  
##  Median :0.3725   Median :29.00  
##  Mean   :0.4719   Mean   :33.24  
##  3rd Qu.:0.6262   3rd Qu.:41.00  
##  Max.   :2.4200   Max.   :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,7:8], method=c("YeoJohnson"))

print(preprocessParams)
## Created from 768 samples and 2 variables
## 
## Pre-processing:
##   - ignored (0)
##   - Yeo-Johnson transformation (2)
## 
## Lambda estimates for Yeo-Johnson transformation:
## -2.25, -1.15
transformed<-predict(preprocessParams,PimaIndiansDiabetes[,7:8])
summary(transformed)
##     pedigree           age        
##  Min.   :0.0691   Min.   :0.8450  
##  1st Qu.:0.1724   1st Qu.:0.8484  
##  Median :0.2265   Median :0.8524  
##  Mean   :0.2317   Mean   :0.8530  
##  3rd Qu.:0.2956   3rd Qu.:0.8580  
##  Max.   :0.4164   Max.   :0.8644
library(mlbench)
data("iris")

preprocessParams<-preProcess(iris,method = c("center","scale","pca"))
print(preprocessParams)
## Created from 150 samples and 5 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (1)
##   - principal component signal extraction (4)
##   - scaled (4)
## 
## PCA needed 2 components to capture 95 percent of the variance
transformed<-predict(preprocessParams,iris)

transformed
##        Species         PC1          PC2
## 1       setosa -2.25714118 -0.478423832
## 2       setosa -2.07401302  0.671882687
## 3       setosa -2.35633511  0.340766425
## 4       setosa -2.29170679  0.595399863
## 5       setosa -2.38186270 -0.644675659
## 6       setosa -2.06870061 -1.484205297
## 7       setosa -2.43586845 -0.047485118
## 8       setosa -2.22539189 -0.222403002
## 9       setosa -2.32684533  1.111603700
## 10      setosa -2.17703491  0.467447569
## 11      setosa -2.15907699 -1.040205867
## 12      setosa -2.31836413 -0.132633999
## 13      setosa -2.21104370  0.726243183
## 14      setosa -2.62430902  0.958296347
## 15      setosa -2.19139921 -1.853846555
## 16      setosa -2.25466121 -2.677315230
## 17      setosa -2.20021676 -1.478655729
## 18      setosa -2.18303613 -0.487206131
## 19      setosa -1.89223284 -1.400327567
## 20      setosa -2.33554476 -1.124083597
## 21      setosa -1.90793125 -0.407490576
## 22      setosa -2.19964383 -0.921035871
## 23      setosa -2.76508142 -0.456813301
## 24      setosa -1.81259716 -0.085272854
## 25      setosa -2.21972701 -0.136796175
## 26      setosa -1.94532930  0.623529705
## 27      setosa -2.04430277 -0.241354991
## 28      setosa -2.16133650 -0.525389422
## 29      setosa -2.13241965 -0.312172005
## 30      setosa -2.25769799  0.336604248
## 31      setosa -2.13297647  0.502856075
## 32      setosa -1.82547925 -0.422280389
## 33      setosa -2.60621687 -1.787587272
## 34      setosa -2.43800983 -2.143546796
## 35      setosa -2.10292986  0.458665270
## 36      setosa -2.20043723  0.205419224
## 37      setosa -2.03831765 -0.659349230
## 38      setosa -2.51889339 -0.590315163
## 39      setosa -2.42152026  0.901161067
## 40      setosa -2.16246625 -0.267981199
## 41      setosa -2.27884081 -0.440240541
## 42      setosa -1.85191836  2.329610745
## 43      setosa -2.54511203  0.477501017
## 44      setosa -1.95788857 -0.470749613
## 45      setosa -2.12992356 -1.138415464
## 46      setosa -2.06283361  0.708678586
## 47      setosa -2.37677076 -1.116688691
## 48      setosa -2.38638171  0.384957230
## 49      setosa -2.22200263 -0.994627669
## 50      setosa -2.19647504 -0.009185585
## 51  versicolor  1.09810244 -0.860091033
## 52  versicolor  0.72889556 -0.592629362
## 53  versicolor  1.23683580 -0.614239894
## 54  versicolor  0.40612251  1.748546197
## 55  versicolor  1.07188379  0.207725147
## 56  versicolor  0.38738955  0.591302717
## 57  versicolor  0.74403715 -0.770438272
## 58  versicolor -0.48569562  1.846243998
## 59  versicolor  0.92480346 -0.032118478
## 60  versicolor  0.01138804  1.030565784
## 61  versicolor -0.10982834  2.645211115
## 62  versicolor  0.43922201  0.063083852
## 63  versicolor  0.56023148  1.758832129
## 64  versicolor  0.71715934  0.185602819
## 65  versicolor -0.03324333  0.437537419
## 66  versicolor  0.87248429 -0.507364239
## 67  versicolor  0.34908221  0.195656268
## 68  versicolor  0.15827980  0.789451008
## 69  versicolor  1.22100316  1.616827281
## 70  versicolor  0.16436725  1.298259939
## 71  versicolor  0.73521959 -0.395247446
## 72  versicolor  0.47469691  0.415926887
## 73  versicolor  1.23005729  0.930209441
## 74  versicolor  0.63074514  0.414997441
## 75  versicolor  0.70031506  0.063200094
## 76  versicolor  0.87135454 -0.249956017
## 77  versicolor  1.25231375  0.076998069
## 78  versicolor  1.35386953 -0.330205463
## 79  versicolor  0.66258066  0.225173502
## 80  versicolor -0.04012419  1.055183583
## 81  versicolor  0.13035846  1.557055553
## 82  versicolor  0.02337438  1.567225244
## 83  versicolor  0.24073180  0.774661195
## 84  versicolor  1.05755171  0.631726901
## 85  versicolor  0.22323093  0.286812663
## 86  versicolor  0.42770626 -0.842758920
## 87  versicolor  1.04522645 -0.520308714
## 88  versicolor  1.04104379  1.378371048
## 89  versicolor  0.06935597  0.218770433
## 90  versicolor  0.28253073  1.324886147
## 91  versicolor  0.27814596  1.116288852
## 92  versicolor  0.62248441 -0.024839814
## 93  versicolor  0.33540673  0.985103828
## 94  versicolor -0.36097409  2.012495825
## 95  versicolor  0.28762268  0.852873116
## 96  versicolor  0.09105561  0.180587142
## 97  versicolor  0.22695654  0.383634868
## 98  versicolor  0.57446378  0.154356489
## 99  versicolor -0.44617230  1.538637456
## 100 versicolor  0.25587339  0.596852285
## 101  virginica  1.83841002 -0.867515056
## 102  virginica  1.15401555  0.696536401
## 103  virginica  2.19790361 -0.560133976
## 104  virginica  1.43534213  0.046830701
## 105  virginica  1.86157577 -0.294059697
## 106  virginica  2.74268509 -0.797736709
## 107  virginica  0.36579225  1.556289178
## 108  virginica  2.29475181 -0.418663020
## 109  virginica  1.99998633  0.709063226
## 110  virginica  2.25223216 -1.914596301
## 111  virginica  1.35962064 -0.690443405
## 112  virginica  1.59732747  0.420292431
## 113  virginica  1.87761053 -0.417849815
## 114  virginica  1.25590769  1.158379741
## 115  virginica  1.46274487  0.440794883
## 116  virginica  1.58476820 -0.673986887
## 117  virginica  1.46651849 -0.254768327
## 118  virginica  2.41822770 -2.548124795
## 119  virginica  3.29964148 -0.017721580
## 120  virginica  1.25954707  1.701046715
## 121  virginica  2.03091256 -0.907427443
## 122  virginica  0.97471535  0.569855257
## 123  virginica  2.88797650 -0.412259950
## 124  virginica  1.32878064  0.480202496
## 125  virginica  1.69505530 -1.010536476
## 126  virginica  1.94780139 -1.004412720
## 127  virginica  1.17118007  0.315338060
## 128  virginica  1.01754169 -0.064131184
## 129  virginica  1.78237879  0.186735633
## 130  virginica  1.85742501 -0.560413289
## 131  virginica  2.42782030 -0.258418706
## 132  virginica  2.29723178 -2.617554417
## 133  virginica  1.85648383  0.177953334
## 134  virginica  1.11042770  0.291944582
## 135  virginica  1.19845835  0.808606364
## 136  virginica  2.78942561 -0.853942542
## 137  virginica  1.57099294 -1.065013214
## 138  virginica  1.34179696 -0.421020154
## 139  virginica  0.92173701 -0.017165594
## 140  virginica  1.84586124 -0.673870645
## 141  virginica  2.00808316 -0.611835930
## 142  virginica  1.89543421 -0.687273065
## 143  virginica  1.15401555  0.696536401
## 144  virginica  2.03374499 -0.864624030
## 145  virginica  1.99147547 -1.045665670
## 146  virginica  1.86425786 -0.385674038
## 147  virginica  1.55935649  0.893692855
## 148  virginica  1.51609145 -0.268170747
## 149  virginica  1.36820418 -1.007877934
## 150  virginica  0.95744849  0.024250427
library(fastICA)
## Warning: package 'fastICA' was built under R version 4.3.3
library(mlbench)
library(caret)

data("PimaIndiansDiabetes")
summary(PimaIndiansDiabetes[,1:8])
##     pregnant         glucose         pressure         triceps     
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     insulin           mass          pedigree           age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780   Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437   1st Qu.:24.00  
##  Median : 30.5   Median :32.00   Median :0.3725   Median :29.00  
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719   Mean   :33.24  
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262   3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200   Max.   :81.00
preprocessParams<-preProcess(PimaIndiansDiabetes[,1:8],method = c("center","scale","ica"),n.comp=5)

print(preprocessParams)
## Created from 768 samples and 8 variables
## 
## Pre-processing:
##   - centered (8)
##   - independent component signal extraction (8)
##   - ignored (0)
##   - scaled (8)
## 
## ICA used 5 components
transformed <- predict(preprocessParams, PimaIndiansDiabetes[,1:8])

summary(transformed)
##       ICA1              ICA2              ICA3              ICA4         
##  Min.   :-3.2241   Min.   :-3.0656   Min.   :-1.5766   Min.   :-4.17293  
##  1st Qu.:-0.6488   1st Qu.:-0.7690   1st Qu.:-0.6840   1st Qu.:-0.59446  
##  Median :-0.1382   Median : 0.2781   Median :-0.2598   Median :-0.02392  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.4702   3rd Qu.: 0.8442   3rd Qu.: 0.4291   3rd Qu.: 0.48274  
##  Max.   : 5.5375   Max.   : 1.4112   Max.   : 6.0216   Max.   : 4.89673  
##       ICA5         
##  Min.   :-2.93288  
##  1st Qu.:-0.72097  
##  Median :-0.07233  
##  Mean   : 0.00000  
##  3rd Qu.: 0.73693  
##  Max.   : 2.38176

#3 List Which functions are being used? #preProcess, predict

#4.List which package contains each function? #Caret, mlbench

#5 Describe what each algorithm/Code/model is doing or trying to do? #Scale Data-calculate the standard deviation and divide attribute #Center Data-calculate the mean and substract from attribute #Standardize Data-combination of scale and Center. Attribute has mean=0, Sd=1 #Normalize Data-scale data between [0,1] #Box-Cox Transform-reduces skewness of Gaussian-like data, makes more Gaussian #Yeo-Johnson-transform power transform but supports zero and negative values #Principal Component Analysis Transform- return principal components, Uncorrelated #Independent Component Analysis Transform-returns independent components

#6 Whats is standardization-returns, mean=0, sd=1, What is normalization? Scales #between [0,1]

#7 Name pre-processing methods:instance based, regression

#8 List all transform methodes: data scaling, data center, data standardizaton # data normalization, box-cox, yeo-johnson, PCA and ICA

#9 Which transform method is supposedly the best? Depends on the data

#10 Explain what each transform does? # Box-Cox-Makes more Gaussian # Yeo-Johnson-supports raw values equal to zero and negatives # expotrans-apply power transform like Box-Cox, Yeo-Johnson # zv-remove attributes with zero variance # nzv-remove attibutes with near zero variance # center-subtract means from values # scale-divide values by Sd # range-normalizes values # pca-useful in linear and generalized linear regressions # ica-retains independent components useful in Naive Bayes # spatialSign-project data onto unit circle

#11 These data transform are likely to be useful for which algorithm code models? # Box-Cox-Makes more Gaussian # Yeo-Johnson-supports raw values equal to zero and negatives # expotrans-apply power transform like Box-Cox, Yeo-Johnson # zv-remove attributes with zero variance # nzv-remove attibutes with near zero variance # center-subtract means from values # scale-divide values by Sd # range-normalizes values # pca-useful in linear and generalized linear regressions # ica-retains independent components useful in Naive Bayes # spatialSign-project data onto unit circle

#12 Define Scaling-divides values by Standard Deviation

#13 What does center transform do? Calculate the mean and substracts from each value.

#14 What is a gaussian like distribution? distribution about the mean, values near #the mean occur more frequent than those far from the mean.

#15 Study Pima Indians Diabetes, Iris # Characteristics-Pima observe pregnant ladies, glucose, Blood pressure, skin thickness # age,BMI, whether they were on insulin # Characteristics-Iris

PimaIndiansDiabetes has 768 rows.

Iris has 150 rows.

#16 Explain PCA and ICA. PCA returns only principal components. Keeps values above the # variance threshold. ICA returns only independent components, number of components # must be specified

#17 Explain pre-processing? Preparing the data to fit learning algorithm for analysis.