data("iris")
View(iris)
summary(iris)
  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width          Species  
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   setosa    :50  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   versicolor:50  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300   virginica :50  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500                  
summary(iris[c("Petal.Width", "Sepal.Width")])
  Petal.Width     Sepal.Width   
 Min.   :0.100   Min.   :2.000  
 1st Qu.:0.300   1st Qu.:2.800  
 Median :1.300   Median :3.000  
 Mean   :1.199   Mean   :3.057  
 3rd Qu.:1.800   3rd Qu.:3.300  
 Max.   :2.500   Max.   :4.400  
summary(iris[c("Petal.Length", "Sepal.Length")])
  Petal.Length    Sepal.Length  
 Min.   :1.000   Min.   :4.300  
 1st Qu.:1.600   1st Qu.:5.100  
 Median :4.350   Median :5.800  
 Mean   :3.758   Mean   :5.843  
 3rd Qu.:5.100   3rd Qu.:6.400  
 Max.   :6.900   Max.   :7.900  
install.packages('ggvis')
library(ggvis)
iris %>% ggvis(~Sepal.Length, ~Sepal.Width, fill = ~Species) %>% layer_points()
install.packages("class")
Installing package into ‘/Users/jaclynbazsika/Library/R/3.3/library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/class_7.3-14.tgz'
Content type 'application/x-gzip' length 87780 bytes (85 KB)
==================================================
downloaded 85 KB

The downloaded binary packages are in
    /var/folders/8s/6xnh03sd7zzc1nny5h0xzx3h0000gn/T//Rtmpc87WBd/downloaded_packages
library("class")
set.seed(3465)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.8, 0.2))
# use the array, ind, to define the training and test sets 
irisTrain <- iris[ind==1, 1:4]
irisTest <- iris[ind==2, 1:4]
irisTrainLabels <- iris[ind==1, 5]
irisTestLabels <- iris[ind==2, 5]
set.seed(3465)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.8, 0.2))
# use the array, ind, to define the training and test sets 
irisTrain <- iris[ind==1, 1:4]
irisTest <- iris[ind==2, 1:4]
irisTrainLabels <- iris[ind==1, 5]
irisTestLabels <- iris[ind==2, 5]
iris_pred <- knn(train=irisTrain, test=irisTest, cl=irisTrainLabels, k=3)
iris_pred # view results of knn function 
 [1] setosa     setosa     setosa     setosa     setosa     setosa     setosa     setosa     versicolor versicolor versicolor versicolor versicolor versicolor
[15] versicolor versicolor virginica  virginica  virginica  virginica 
Levels: setosa versicolor virginica
install.packages("gmodels")
Installing package into ‘/Users/jaclynbazsika/Library/R/3.3/library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/gmodels_2.16.2.tgz'
Content type 'application/x-gzip' length 72626 bytes (70 KB)
==================================================
downloaded 70 KB

The downloaded binary packages are in
    /var/folders/8s/6xnh03sd7zzc1nny5h0xzx3h0000gn/T//Rtmpc87WBd/downloaded_packages
library(gmodels)
 
CrossTable(x=irisTestLabels, y=iris_pred, prop.chisq=F, prop.r=F, prop.c=F, prop.t=F)

 
   Cell Contents
|-------------------------|
|                       N |
|-------------------------|

 
Total Observations in Table:  20 

 
               | iris_pred 
irisTestLabels |     setosa | versicolor |  virginica |  Row Total | 
---------------|------------|------------|------------|------------|
        setosa |          8 |          0 |          0 |          8 | 
---------------|------------|------------|------------|------------|
    versicolor |          0 |          8 |          0 |          8 | 
---------------|------------|------------|------------|------------|
     virginica |          0 |          0 |          4 |          4 | 
---------------|------------|------------|------------|------------|
  Column Total |          8 |          8 |          4 |         20 | 
---------------|------------|------------|------------|------------|

 
install.packages('caret')
Error in install.packages : Updating loaded packages
library(caret)
Loading required package: lattice
Error: unexpected symbol in "Loading required"
normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}
# apply this newly created normalize argument to the dataset 
iris_x <- as.data.frame(lapply(iris[1:4], normalize))
summary(iris)
  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width          Species  
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   setosa    :50  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   versicolor:50  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300   virginica :50  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500                  
summary(iris_x)
  Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
 1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
 Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
 Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
 3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  

Discussion Questions

  1. How do supervised learning algorithms solve regression and classification problems?

  2. What packages in R perform supervised learning?

MICE,rpart,PARTY,CARET,nnet

3.How would we compare the results of two different models, or sets of hyperparameters for one model?

You would use the F-test in a two way ANOVA to compare the results of two different models.

LS0tDQp0aXRsZTogIlN1cGVydmlzZWQgTGVhcm5pbmciDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmRhdGEoImlyaXMiKQ0KVmlldyhpcmlzKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzW2MoIlBldGFsLldpZHRoIiwgIlNlcGFsLldpZHRoIildKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzW2MoIlBldGFsLkxlbmd0aCIsICJTZXBhbC5MZW5ndGgiKV0pDQpgYGANCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCdnZ3ZpcycpDQpsaWJyYXJ5KGdndmlzKQ0KYGBgDQoNCmBgYHtyfQ0KaXJpcyAlPiUgZ2d2aXMoflNlcGFsLkxlbmd0aCwgflNlcGFsLldpZHRoLCBmaWxsID0gflNwZWNpZXMpICU+JSBsYXllcl9wb2ludHMoKQ0KYGBgDQoNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiY2xhc3MiKQ0KbGlicmFyeSgiY2xhc3MiKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMzQ2NSkNCmluZCA8LSBzYW1wbGUoMiwgbnJvdyhpcmlzKSwgcmVwbGFjZT1UUlVFLCBwcm9iPWMoMC44LCAwLjIpKQ0KIyB1c2UgdGhlIGFycmF5LCBpbmQsIHRvIGRlZmluZSB0aGUgdHJhaW5pbmcgYW5kIHRlc3Qgc2V0cyANCmlyaXNUcmFpbiA8LSBpcmlzW2luZD09MSwgMTo0XQ0KaXJpc1Rlc3QgPC0gaXJpc1tpbmQ9PTIsIDE6NF0NCmlyaXNUcmFpbkxhYmVscyA8LSBpcmlzW2luZD09MSwgNV0NCmlyaXNUZXN0TGFiZWxzIDwtIGlyaXNbaW5kPT0yLCA1XQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMzQ2NSkNCmluZCA8LSBzYW1wbGUoMiwgbnJvdyhpcmlzKSwgcmVwbGFjZT1UUlVFLCBwcm9iPWMoMC44LCAwLjIpKQ0KIyB1c2UgdGhlIGFycmF5LCBpbmQsIHRvIGRlZmluZSB0aGUgdHJhaW5pbmcgYW5kIHRlc3Qgc2V0cyANCmlyaXNUcmFpbiA8LSBpcmlzW2luZD09MSwgMTo0XQ0KaXJpc1Rlc3QgPC0gaXJpc1tpbmQ9PTIsIDE6NF0NCmlyaXNUcmFpbkxhYmVscyA8LSBpcmlzW2luZD09MSwgNV0NCmlyaXNUZXN0TGFiZWxzIDwtIGlyaXNbaW5kPT0yLCA1XQ0KYGBgDQoNCg0KYGBge3J9DQppcmlzX3ByZWQgPC0ga25uKHRyYWluPWlyaXNUcmFpbiwgdGVzdD1pcmlzVGVzdCwgY2w9aXJpc1RyYWluTGFiZWxzLCBrPTMpDQppcmlzX3ByZWQgIyB2aWV3IHJlc3VsdHMgb2Yga25uIGZ1bmN0aW9uIA0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJnbW9kZWxzIikNCmxpYnJhcnkoZ21vZGVscykNCiANCkNyb3NzVGFibGUoeD1pcmlzVGVzdExhYmVscywgeT1pcmlzX3ByZWQsIHByb3AuY2hpc3E9RiwgcHJvcC5yPUYsIHByb3AuYz1GLCBwcm9wLnQ9RikNCmBgYA0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCdjYXJldCcpDQpsaWJyYXJ5KGNhcmV0KQ0KTG9hZGluZyByZXF1aXJlZCBwYWNrYWdlOiBsYXR0aWNlDQpMb2FkaW5nIHJlcXVpcmVkIHBhY2thZ2U6IGdncGxvdDINCiMgIFRoaXMgc2V0cyBvdXIgcmFuZG9tIHNlZWQsIA0KIyBzbyByYW5kb21seSBnZW5lcmF0ZWQgbnVtYmVycyB3aWxsIGJlIHRoZSANCiMgc2FtZSBldmVyeSB0aW1lIA0Kc2V0LnNlZWQoMzQ1NikNCnRyYWluSW5kZXggPC0gY3JlYXRlRGF0YVBhcnRpdGlvbihpcmlzJFNwZWNpZXMsIHA9MC44LCBsaXN0PUYsIHRpbWVzPTEpDQpoZWFkKHRyYWluSW5kZXgpDQpgYGANCg0KYGBge3J9DQpub3JtYWxpemUgPC0gZnVuY3Rpb24oeCkgew0KbnVtIDwtIHggLSBtaW4oeCkNCmRlbm9tIDwtIG1heCh4KSAtIG1pbih4KQ0KcmV0dXJuIChudW0vZGVub20pDQp9DQpgYGANCg0KYGBge3J9DQojIGFwcGx5IHRoaXMgbmV3bHkgY3JlYXRlZCBub3JtYWxpemUgYXJndW1lbnQgdG8gdGhlIGRhdGFzZXQgDQppcmlzX3ggPC0gYXMuZGF0YS5mcmFtZShsYXBwbHkoaXJpc1sxOjRdLCBub3JtYWxpemUpKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzX3gpDQpgYGANCg0KIyMgRGlzY3Vzc2lvbiBRdWVzdGlvbnMNCg0KMS4gSG93IGRvIHN1cGVydmlzZWQgbGVhcm5pbmcgYWxnb3JpdGhtcyBzb2x2ZSByZWdyZXNzaW9uIGFuZCBjbGFzc2lmaWNhdGlvbiBwcm9ibGVtcz8NCg0KMi4gV2hhdCBwYWNrYWdlcyBpbiBSIHBlcmZvcm0gc3VwZXJ2aXNlZCBsZWFybmluZz8gDQoNCk1JQ0UscnBhcnQsUEFSVFksQ0FSRVQsbm5ldA0KDQozLkhvdyB3b3VsZCB3ZSBjb21wYXJlIHRoZSByZXN1bHRzIG9mIHR3byBkaWZmZXJlbnQgbW9kZWxzLCBvciBzZXRzIG9mIGh5cGVycGFyYW1ldGVycyBmb3Igb25lIG1vZGVsPw0KDQpZb3Ugd291bGQgdXNlIHRoZSBGLXRlc3QgaW4gYSB0d28gd2F5IEFOT1ZBIHRvIGNvbXBhcmUgdGhlIHJlc3VsdHMgb2YgdHdvIGRpZmZlcmVudCBtb2RlbHMuDQoNCg0K