#KNN Algorithm
iris
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
#visualization
library(ggvis)
iris %>% ggvis(~Petal.Length, ~Petal.Width, fill = ~factor(Species)) %>%layer_points()
library(ggvis)
iris %>% ggvis(~Sepal.Width, ~Sepal.Length, fill = ~factor(Species)) %>%layer_points()
#min max normalization
min_max_normalizer <- function(x)
{
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}

normalized_iris <- as.data.frame(lapply(iris[1:4], min_max_normalizer))
summary(normalized_iris)
##   Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
##  Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
##  Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
##  3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
#outcome distribution
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
#random splitting
set.seed(1234)
random_samples <- sample(2, nrow(iris), replace=TRUE, prob=c(0.67, 0.33))
#train data
iris.training <- iris[random_samples ==1, 1:4]
#train label
iris.trainLabels <- iris[random_samples ==1, 5]
#test data
iris.test <- iris[random_samples ==2, 1:4]
#test label
iris.testLabels <- iris[random_samples ==2, 5]
#training
library(class)
iris_model <- knn(train = iris.training, test = iris.test, cl = iris.trainLabels, k=3)
iris_model
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] versicolor versicolor versicolor versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] virginica  virginica  virginica  virginica  versicolor virginica 
## [31] virginica  virginica  virginica  virginica  virginica  virginica 
## [37] virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica
#evaluation
library(gmodels)
CrossTable(x = iris.testLabels, y = iris_model, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  40 
## 
##  
##                 | iris_model 
## iris.testLabels |     setosa | versicolor |  virginica |  Row Total | 
## ----------------|------------|------------|------------|------------|
##          setosa |         12 |          0 |          0 |         12 | 
##                 |      1.000 |      0.000 |      0.000 |      0.300 | 
##                 |      1.000 |      0.000 |      0.000 |            | 
##                 |      0.300 |      0.000 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##      versicolor |          0 |         12 |          0 |         12 | 
##                 |      0.000 |      1.000 |      0.000 |      0.300 | 
##                 |      0.000 |      0.923 |      0.000 |            | 
##                 |      0.000 |      0.300 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##       virginica |          0 |          1 |         15 |         16 | 
##                 |      0.000 |      0.062 |      0.938 |      0.400 | 
##                 |      0.000 |      0.077 |      1.000 |            | 
##                 |      0.000 |      0.025 |      0.375 |            | 
## ----------------|------------|------------|------------|------------|
##    Column Total |         12 |         13 |         15 |         40 | 
##                 |      0.300 |      0.325 |      0.375 |            | 
## ----------------|------------|------------|------------|------------|
## 
## 
#Apriori Algorithm
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:ggvis':
## 
##     band
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
data(Adult)
summary(Adult)
## transactions as itemMatrix in sparse format with
##  48842 rows (elements/itemsets/transactions) and
##  115 columns (items) and a density of 0.1089939 
## 
## most frequent items:
##            capital-loss=None            capital-gain=None 
##                        46560                        44807 
## native-country=United-States                   race=White 
##                        43832                        41762 
##            workclass=Private                      (Other) 
##                        33906                       401333 
## 
## element (itemset/transaction) length distribution:
## sizes
##     9    10    11    12    13 
##    19   971  2067 15623 30162 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   12.00   13.00   12.53   13.00   13.00 
## 
## includes extended item information - examples:
##            labels variables      levels
## 1       age=Young       age       Young
## 2 age=Middle-aged       age Middle-aged
## 3      age=Senior       age      Senior
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3
inspect(Adult[0:5])
##     items                               transactionID
## [1] {age=Middle-aged,                                
##      workclass=State-gov,                            
##      education=Bachelors,                            
##      marital-status=Never-married,                   
##      occupation=Adm-clerical,                        
##      relationship=Not-in-family,                     
##      race=White,                                     
##      sex=Male,                                       
##      capital-gain=Low,                               
##      capital-loss=None,                              
##      hours-per-week=Full-time,                       
##      native-country=United-States,                   
##      income=small}                                  1
## [2] {age=Senior,                                     
##      workclass=Self-emp-not-inc,                     
##      education=Bachelors,                            
##      marital-status=Married-civ-spouse,              
##      occupation=Exec-managerial,                     
##      relationship=Husband,                           
##      race=White,                                     
##      sex=Male,                                       
##      capital-gain=None,                              
##      capital-loss=None,                              
##      hours-per-week=Part-time,                       
##      native-country=United-States,                   
##      income=small}                                  2
## [3] {age=Middle-aged,                                
##      workclass=Private,                              
##      education=HS-grad,                              
##      marital-status=Divorced,                        
##      occupation=Handlers-cleaners,                   
##      relationship=Not-in-family,                     
##      race=White,                                     
##      sex=Male,                                       
##      capital-gain=None,                              
##      capital-loss=None,                              
##      hours-per-week=Full-time,                       
##      native-country=United-States,                   
##      income=small}                                  3
## [4] {age=Senior,                                     
##      workclass=Private,                              
##      education=11th,                                 
##      marital-status=Married-civ-spouse,              
##      occupation=Handlers-cleaners,                   
##      relationship=Husband,                           
##      race=Black,                                     
##      sex=Male,                                       
##      capital-gain=None,                              
##      capital-loss=None,                              
##      hours-per-week=Full-time,                       
##      native-country=United-States,                   
##      income=small}                                  4
## [5] {age=Middle-aged,                                
##      workclass=Private,                              
##      education=Bachelors,                            
##      marital-status=Married-civ-spouse,              
##      occupation=Prof-specialty,                      
##      relationship=Wife,                              
##      race=Black,                                     
##      sex=Female,                                     
##      capital-gain=None,                              
##      capital-loss=None,                              
##      hours-per-week=Full-time,                       
##      native-country=Cuba,                            
##      income=small}                                  5
#Apriori with support=50% confidence =80%
rules <- apriori(Adult, parameter=list(support=0.5, confidence=0.8,target ="rules"))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5     0.5      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 24421 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.03s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [84 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(rules)
## set of 84 rules
## 
## rule length distribution (lhs + rhs):sizes
##  1  2  3  4 
##  4 23 38 19 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   2.857   3.000   4.000 
## 
## summary of quality measures:
##     support         confidence          lift            count      
##  Min.   :0.5084   Min.   :0.8504   Min.   :0.9789   Min.   :24832  
##  1st Qu.:0.5415   1st Qu.:0.8888   1st Qu.:0.9943   1st Qu.:26447  
##  Median :0.5897   Median :0.9132   Median :0.9988   Median :28803  
##  Mean   :0.6433   Mean   :0.9110   Mean   :1.0034   Mean   :31422  
##  3rd Qu.:0.7490   3rd Qu.:0.9422   3rd Qu.:1.0077   3rd Qu.:36585  
##  Max.   :0.9533   Max.   :0.9583   Max.   :1.0586   Max.   :46560  
## 
## mining info:
##   data ntransactions support confidence
##  Adult         48842     0.5        0.8
#top five rules
as(head(sort(rules, by = c("confidence", "support")), n=3), "data.frame")
##                                                                      rules
## 7                        {hours-per-week=Full-time} => {capital-loss=None}
## 15                              {workclass=Private} => {capital-loss=None}
## 50 {workclass=Private,native-country=United-States} => {capital-loss=None}
##      support confidence     lift count
## 7  0.5606650  0.9582531 1.005219 27384
## 15 0.6639982  0.9564974 1.003377 32431
## 50 0.5897179  0.9554818 1.002312 28803
#k-means clustering
kmean_iris <- iris
kmean_iris$Species <- NULL
(clusters <- kmeans(kmean_iris, 3))
## K-means clustering with 3 clusters of sizes 38, 50, 62
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     6.850000    3.073684     5.742105    2.071053
## 2     5.006000    3.428000     1.462000    0.246000
## 3     5.901613    2.748387     4.393548    1.433871
## 
## Clustering vector:
##   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [71] 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 1
## [106] 1 3 1 1 1 1 1 1 3 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1
## [141] 1 1 3 1 1 1 3 1 1 3
## 
## Within cluster sum of squares by cluster:
## [1] 23.87947 15.15100 39.82097
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
table(iris$Species, clusters$cluster)
##             
##               1  2  3
##   setosa      0 50  0
##   versicolor  2  0 48
##   virginica  36  0 14
plot(kmean_iris[c("Sepal.Length", "Sepal.Width")],
col=clusters$cluster,pch = c(15,16,17)[as.numeric(clusters$cluster)])
points(clusters$centers[,c("Sepal.Length", "Sepal.Width")], col=1:3,
pch=8, cex=2)