datamining.R

memory.size()

## [1] 14.28

memory.limit()

## [1] 1535

getwd()

## [1] "C:/Users/dell/Desktop"

ls()

## character(0)

rm(list=ls())
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 291003  7.8     592000 15.9   350000  9.4
## Vcells 321668  2.5     786432  6.0   677564  5.2

setwd("C:/Users/dell/Desktop")
dir(pattern = "\\.(csv|CSV)$")

## [1] "Analytics decisionstats.com Audience Overview 20110617-20120717.csv"
## [2] "BigDiamonds.csv"                                                    
## [3] "Boston.csv"                                                         
## [4] "ccFraud.csv"                                                        
## [5] "test.csv"

library(arules)

## Loading required package: Matrix
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base':
## 
##     %in%, write

library(arulesViz)

## Loading required package: grid
## 
## Attaching package: 'arulesViz'
## 
## The following object is masked from 'package:base':
## 
##     abbreviate

data("Groceries")
Groceries

## transactions in sparse format with
##  9835 transactions (rows) and
##  169 items (columns)

inspect(Groceries[1:5])

##   items                     
## 1 {citrus fruit,            
##    semi-finished bread,     
##    margarine,               
##    ready soups}             
## 2 {tropical fruit,          
##    yogurt,                  
##    coffee}                  
## 3 {whole milk}              
## 4 {pip fruit,               
##    yogurt,                  
##    cream cheese ,           
##    meat spreads}            
## 5 {other vegetables,        
##    whole milk,              
##    condensed milk,          
##    long life bakery product}

itemFrequencyPlot(Groceries,topN=20,type="absolute")

rules.all=apriori(Groceries, parameter = list(supp = 0.001, conf = 0.8))

## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##         0.8    0.1    1 none FALSE            TRUE   0.001      1     10
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09)        (c) 1996-2004   Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.07s].
## writing ... [410 rule(s)] done [0.01s].
## creating S4 object  ... done [0.01s].

inspect(rules.all[1:5])

##   lhs                 rhs                support confidence      lift
## 1 {liquor,                                                           
##    red/blush wine} => {bottled beer} 0.001931876  0.9047619 11.235269
## 2 {curd,                                                             
##    cereals}        => {whole milk}   0.001016777  0.9090909  3.557863
## 3 {yogurt,                                                           
##    cereals}        => {whole milk}   0.001728521  0.8095238  3.168192
## 4 {butter,                                                           
##    jam}            => {whole milk}   0.001016777  0.8333333  3.261374
## 5 {soups,                                                            
##    bottled beer}   => {whole milk}   0.001118454  0.9166667  3.587512

plot(rules.all[1:5],method="graph",interactive = F)

plot(rules.all[1:15],method="graph",interactive = T)

library(ggplot2)
data(diamonds)
head(diamonds)

##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48

diamonds2=diamonds[c("carat","price")]
clus=kmeans(diamonds2,7)
summary(clus)

##              Length Class  Mode   
## cluster      53940  -none- numeric
## centers         14  -none- numeric
## totss            1  -none- numeric
## withinss         7  -none- numeric
## tot.withinss     1  -none- numeric
## betweenss        1  -none- numeric
## size             7  -none- numeric
## iter             1  -none- numeric
## ifault           1  -none- numeric

table(clus$cluster)

## 
##     1     2     3     4     5     6     7 
##  1821  8902  2452 20698  5337  3366 11364

plot(diamonds2$carat,diamonds2$price,col=clus$cluster)

library(rpart, quietly=TRUE)
library(rattle)

## Loading required package: RGtk2
## Rattle: A free graphical interface for data mining with R.
## Version 3.5.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.

data("weather")

minsplit=10
treemodel <- rpart(RainTomorrow ~ .,
                   
                   ,data=weather)

asRules(treemodel)

## 
##  Rule number: 3 [RainTomorrow=Yes cover=66 (18%) prob=1.00]
##    RISK_MM>=1.1
## 
##  Rule number: 2 [RainTomorrow=No cover=300 (82%) prob=0.00]
##    RISK_MM< 1.1

library(rpart.plot)
fancyRpartPlot(treemodel, main="Decision Tree weather $ RainTomorrow")

datamining.R

dell

Sun Oct 25 18:39:17 2015