housing <- read.table("housing.data")
colnames(housing) <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE", "DIS","RAD","TAX","PRATIO","B","LSTAT","MDEV")
summary(housing)
##       CRIM                ZN             INDUS            CHAS        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       NOX               RM             AGE              DIS        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       RAD              TAX            PRATIO            B         
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      LSTAT            MDEV      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00
plot(housing)

#correlation plot
library(corrplot)
corrplot(cor(housing), method="number", tl.cex=0.5)
#partitioning
housing <- housing[order(housing$MDEV),]
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2

set.seed(3277)
trainingIndices <- createDataPartition(housing$MDEV, p=0.75, list=FALSE)
housingTraining <- housing[trainingIndices,]
housingTesting <- housing[-trainingIndices,]
nrow(housingTraining)
## [1] 381
nrow(housingTesting)
## [1] 125
#linear model
linearModel <- lm(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, data=housingTraining)
summary(linearModel)
## 
## Call:
## lm(formula = MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + 
##     DIS + RAD + TAX + PRATIO + B + LSTAT, data = housingTraining)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1317  -2.6258  -0.5413   1.5656  26.2551 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  41.196069   5.609316   7.344 1.35e-12 ***
## CRIM         -0.122053   0.032598  -3.744 0.000210 ***
## ZN            0.052261   0.015412   3.391 0.000772 ***
## INDUS         0.032047   0.068200   0.470 0.638709    
## CHAS          2.385849   0.959308   2.487 0.013324 *  
## NOX         -17.566444   4.273389  -4.111 4.87e-05 ***
## RM            3.485134   0.463397   7.521 4.23e-13 ***
## AGE          -0.003562   0.014443  -0.247 0.805317    
## DIS          -1.545347   0.221048  -6.991 1.30e-11 ***
## RAD           0.333380   0.076002   4.386 1.51e-05 ***
## TAX          -0.014973   0.004317  -3.468 0.000586 ***
## PRATIO       -0.995370   0.145592  -6.837 3.39e-11 ***
## B             0.006718   0.002832   2.373 0.018180 *  
## LSTAT        -0.521544   0.054005  -9.657  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.549 on 367 degrees of freedom
## Multiple R-squared:  0.7605, Adjusted R-squared:  0.752 
## F-statistic: 89.63 on 13 and 367 DF,  p-value: < 2.2e-16
#prediction
predicted <- predict(linearModel,newdata=housingTesting)
summary(predicted)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8783 17.8442 21.0676 22.4296 27.2561 42.8923
summary(housingTesting$MDEV)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8.10   17.10   21.20   22.89   25.00   50.00
plot(predicted,housingTesting$MDEV)
#function for R^2
sumofsquares <- function(x) { 
  return(sum(x^2))
}
#Residual sum of squares
diff <- predicted - housingTesting$MDEV
sumofsquares(diff)
## [1] 3555.882
#Logistic regression
lr <- glm(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, data=housingTraining)
summary(lr)
## 
## Call:
## glm(formula = MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + 
##     DIS + RAD + TAX + PRATIO + B + LSTAT, data = housingTraining)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -14.1317   -2.6258   -0.5413    1.5656   26.2551  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  41.196069   5.609316   7.344 1.35e-12 ***
## CRIM         -0.122053   0.032598  -3.744 0.000210 ***
## ZN            0.052261   0.015412   3.391 0.000772 ***
## INDUS         0.032047   0.068200   0.470 0.638709    
## CHAS          2.385849   0.959308   2.487 0.013324 *  
## NOX         -17.566444   4.273389  -4.111 4.87e-05 ***
## RM            3.485134   0.463397   7.521 4.23e-13 ***
## AGE          -0.003562   0.014443  -0.247 0.805317    
## DIS          -1.545347   0.221048  -6.991 1.30e-11 ***
## RAD           0.333380   0.076002   4.386 1.51e-05 ***
## TAX          -0.014973   0.004317  -3.468 0.000586 ***
## PRATIO       -0.995370   0.145592  -6.837 3.39e-11 ***
## B             0.006718   0.002832   2.373 0.018180 *  
## LSTAT        -0.521544   0.054005  -9.657  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 20.69378)
## 
##     Null deviance: 31707.3  on 380  degrees of freedom
## Residual deviance:  7594.6  on 367  degrees of freedom
## AIC: 2251.3
## 
## Number of Fisher Scoring iterations: 2
predicted <- predict(lr,newdata=housingTesting)
summary(predicted)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8783 17.8442 21.0676 22.4296 27.2561 42.8923
plot(predicted,housingTesting$MDEV)

diff <- predicted - housingTesting$MDEV
sumofsquares(diff)
## [1] 3555.882
#residual plot
plot(resid(linearModel))

#prediction
x <- housingTesting$MDEV
Y <- predicted
b1 <- sum((x-mean(x))*(Y-mean(Y)))/sum((x-mean(x))^2)
b0 <- mean(Y)-b1*mean(x)
c(b0,b1)
## [1] 7.2106245 0.6648381
plot(x,Y)
abline(c(b0,b1),col="blue",lwd=2)

#relative importance
library(relaimpo)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked _by_ '.GlobalEnv':
## 
##     housing
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
## 
##     melanoma
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## The following object is masked from 'package:caret':
## 
##     cluster
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
## Loading required package: mitools
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
calc.relimp(linearModel,type=c("lmg","last","first","pratt"), rela=TRUE)
## Warning in rev(variances[[p]]) - variances[[p + 1]]: Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.
## Response variable: MDEV 
## Total response variance: 83.44019 
## Analysis based on 381 observations 
## 
## 13 Regressors: 
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PRATIO B LSTAT 
## Proportion of variance explained by model: 76.05%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##               lmg         last      first        pratt
## CRIM   0.04378500 0.0423236380 0.05959783  0.069549551
## ZN     0.04085431 0.0347151937 0.05480466  0.072324623
## INDUS  0.04927578 0.0006666234 0.08442062 -0.015510766
## CHAS   0.02068028 0.0186745066 0.01195166  0.016098916
## NOX    0.04611049 0.0510155167 0.06866322  0.129797308
## RM     0.23110043 0.1707701764 0.16468562  0.239015600
## AGE    0.03211959 0.0001836714 0.05639641  0.005826449
## DIS    0.04282755 0.1475559786 0.02469774 -0.125578499
## RAD    0.03552896 0.0580913573 0.05929346 -0.172215184
## TAX    0.05313897 0.0363198971 0.08310082  0.175938820
## PRATIO 0.11235443 0.1411152591 0.09803364  0.165972509
## B      0.02614223 0.0169947393 0.03917392  0.031322939
## LSTAT  0.26608199 0.2815734421 0.19518041  0.407457734
## 
## Average coefficients for different model sizes: 
## 
##                  1X          2Xs          3Xs          4Xs          5Xs
## CRIM    -0.39658057  -0.27179045  -0.21108113  -0.17716944  -0.15605272
## ZN       0.15016161   0.10008617   0.07724633   0.06573547   0.05920013
## INDUS   -0.66137913  -0.49760611  -0.38326657  -0.29972518  -0.23603446
## CHAS     6.71617551   6.28502633   5.84865357   5.37351604   4.90522742
## NOX    -35.23627433 -24.37290112 -18.14707801 -14.72054067 -13.02167728
## RM       9.10534876   7.97900568   7.33195455   6.85127499   6.43337784
## AGE     -0.13074649  -0.08136606  -0.05484617  -0.03952753  -0.03003803
## DIS      1.15243247   0.14262752  -0.45525720  -0.82823438  -1.06957328
## RAD     -0.43523357  -0.23718077  -0.11226188  -0.02648010   0.03740961
## TAX     -0.02681594  -0.02111878  -0.01748128  -0.01513793  -0.01363214
## PRATIO  -2.22931346  -1.79620241  -1.57371014  -1.43633047  -1.33810121
## B        0.03185870   0.02040032   0.01517751   0.01236138   0.01063506
## LSTAT   -0.94731052  -0.89595398  -0.85129784  -0.81015368  -0.77115301
##                  6Xs           7Xs           8Xs           9Xs
## CRIM    -0.142114473  -0.132753351  -0.126571106  -0.122731539
## ZN       0.055115133   0.052452316   0.050767854   0.049851838
## INDUS   -0.185438964  -0.143646311  -0.107867355  -0.076227096
## CHAS     4.467781227   4.070243661   3.713818215   3.395429620
## NOX    -12.395682217 -12.433747908 -12.880656870 -13.579740358
## RM       6.041597852   5.661331637   5.286757133   4.916192653
## AGE     -0.023741516  -0.019260870  -0.015833831  -0.013014735
## DIS     -1.229160682  -1.335838738  -1.407531199  -1.456002582
## RAD      0.088325237   0.131164783   0.168894108   0.203489063
## TAX     -0.012698631  -0.012186554  -0.012010689  -0.012121634
## PRATIO  -1.261487462  -1.199051858  -1.147196205  -1.103918452
## B        0.009490813   0.008693917   0.008119381   0.007693426
## LSTAT   -0.733817032  -0.698052018  -0.663912120  -0.631502580
##                 10Xs          11Xs          12Xs          13Xs
## CRIM    -0.120692182  -0.120074946  -0.120600039  -0.122052773
## ZN       0.049593499   0.049928963   0.050822551   0.052261271
## INDUS   -0.047386409  -0.020303414   0.005907552   0.032047013
## CHAS     3.109771129   2.850585116   2.611402361   2.385848772
## NOX    -14.437938014 -15.403070024 -16.449059540 -17.566443917
## RM       4.549912278   4.188852718   3.833836628   3.485134376
## AGE     -0.010529199  -0.008200006  -0.005906958  -0.003562336
## DIS     -1.489241258  -1.512781857  -1.530531776  -1.545346737
## RAD      0.236344006   0.268469491   0.300618320   0.333379915
## TAX     -0.012489018  -0.013093345  -0.013922910  -0.014972759
## PRATIO  -1.067936637  -1.038321954  -1.014337069  -0.995369987
## B        0.007368736   0.007112789   0.006901908   0.006718093
## LSTAT   -0.600942864  -0.572352417  -0.545846642  -0.521544146
#stepwise regression
library(MASS)
step <- stepAIC(linearModel, direction="both")
## Start:  AIC=1168.1
## MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + DIS + RAD + 
##     TAX + PRATIO + B + LSTAT
## 
##          Df Sum of Sq    RSS    AIC
## - AGE     1      1.26 7595.9 1166.2
## - INDUS   1      4.57 7599.2 1166.3
## <none>                7594.6 1168.1
## - B       1    116.49 7711.1 1171.9
## - CHAS    1    128.00 7722.6 1172.5
## - ZN      1    237.95 7832.6 1177.9
## - TAX     1    248.95 7843.6 1178.4
## - CRIM    1    290.10 7884.7 1180.4
## - NOX     1    349.67 7944.3 1183.2
## - RAD     1    398.17 7992.8 1185.6
## - PRATIO  1    967.24 8561.9 1211.8
## - DIS     1   1011.39 8606.0 1213.7
## - RM      1   1170.50 8765.1 1220.7
## - LSTAT   1   1929.98 9524.6 1252.4
## 
## Step:  AIC=1166.17
## MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + DIS + RAD + TAX + 
##     PRATIO + B + LSTAT
## 
##          Df Sum of Sq    RSS    AIC
## - INDUS   1      4.53 7600.4 1164.4
## <none>                7595.9 1166.2
## + AGE     1      1.26 7594.6 1168.1
## - B       1    115.79 7711.7 1169.9
## - CHAS    1    127.38 7723.3 1170.5
## - ZN      1    248.43 7844.3 1176.4
## - TAX     1    250.17 7846.0 1176.5
## - CRIM    1    290.16 7886.0 1178.5
## - NOX     1    390.00 7985.9 1183.2
## - RAD     1    402.64 7998.5 1183.8
## - PRATIO  1    971.24 8567.1 1210.0
## - DIS     1   1065.15 8661.0 1214.2
## - RM      1   1189.61 8785.5 1219.6
## - LSTAT   1   2153.07 9748.9 1259.2
## 
## Step:  AIC=1164.39
## MDEV ~ CRIM + ZN + CHAS + NOX + RM + DIS + RAD + TAX + PRATIO + 
##     B + LSTAT
## 
##          Df Sum of Sq    RSS    AIC
## <none>                7600.4 1164.4
## + INDUS   1      4.53 7595.9 1166.2
## + AGE     1      1.22 7599.2 1166.3
## - B       1    114.05 7714.5 1168.1
## - CHAS    1    132.23 7732.6 1169.0
## - ZN      1    244.48 7844.9 1174.5
## - TAX     1    272.90 7873.3 1175.8
## - CRIM    1    293.20 7893.6 1176.8
## - NOX     1    398.54 7998.9 1181.9
## - RAD     1    410.88 8011.3 1182.5
## - PRATIO  1    968.88 8569.3 1208.1
## - DIS     1   1148.81 8749.2 1216.0
## - RM      1   1185.73 8786.1 1217.6
## - LSTAT   1   2151.58 9752.0 1257.4
#KNN Algorithm
library(class)
knnModel <- knn(train=housingTraining, test=housingTesting, cl=housingTraining$MDEV)
summary(knnModel)
##    20.8    14.9      21    18.6    18.7    19.3    11.5    13.4    13.8 
##       5       4       4       3       3       3       2       2       2 
##    14.1      18    18.9    19.4      20    20.4    20.6    20.9    21.4 
##       2       2       2       2       2       2       2       2       2 
##    21.5    22.8    22.9    23.1    24.6    24.8    25.3    27.5    28.4 
##       2       2       2       2       2       2       2       2       2 
##      29    33.2      50     6.3       7    10.2    11.7    12.7    13.1 
##       2       2       2       1       1       1       1       1       1 
##    13.2    13.3    15.2    15.4    15.6    16.1    16.2    16.3    16.6 
##       1       1       1       1       1       1       1       1       1 
##    16.7      17    17.1    17.7    18.2    18.3    18.4      19    19.1 
##       1       1       1       1       1       1       1       1       1 
##    19.2    19.9    20.3    20.5    21.2    21.7      22    22.4    23.8 
##       1       1       1       1       1       1       1       1       1 
##    23.9    24.2    24.3    24.4      25    26.6    28.5    29.6    29.8 
##       1       1       1       1       1       1       1       1       1 
##    30.1    32.2    32.4    32.9    33.1    33.8    35.1    35.2    36.2 
##       1       1       1       1       1       1       1       1       1 
##    37.2    37.9      46    48.8       5     5.6     7.2     7.4     7.5 
##       1       1       1       1       0       0       0       0       0 
##     8.3     8.4     8.5     8.7     8.8     9.5     9.6     9.7    10.4 
##       0       0       0       0       0       0       0       0       0 
## (Other) 
##       0
plot(knnModel)

#Naive Bayes Algorithm
library(e1071)
nb <- naiveBayes(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, data=housingTraining)
nb$tables$TAX
##       TAX
## Y          [,1]        [,2]
##   5    666.0000   0.0000000
##   5.6  666.0000          NA
##   6.3  666.0000          NA
##   7    688.5000  31.8198052
##   7.2  666.0000   0.0000000
##   7.4  666.0000          NA
##   7.5  666.0000          NA
##   8.3  666.0000   0.0000000
##   8.4  666.0000          NA
##   8.5  666.0000          NA
##   8.7  666.0000          NA
##   8.8  666.0000          NA
##   9.5  666.0000          NA
##   9.6  666.0000          NA
##   9.7  666.0000          NA
##   10.2 666.0000   0.0000000
##   10.4 666.0000   0.0000000
##   10.5 666.0000   0.0000000
##   10.9 666.0000   0.0000000
##   11.3 666.0000          NA
##   11.5 666.0000          NA
##   11.7 666.0000   0.0000000
##   11.8 666.0000          NA
##   11.9 273.0000          NA
##   12   666.0000          NA
##   12.1 666.0000          NA
##   12.3 666.0000          NA
##   12.6 666.0000          NA
##   12.7 546.3333 207.2687466
##   12.8 666.0000          NA
##   13.1 546.3333 207.2687466
##   13.2 307.0000          NA
##   13.3 589.6667 132.2132116
##   13.4 534.5000 185.9690835
##   13.5 666.0000          NA
##   13.6 509.0000 285.6711396
##   13.8 600.2500 131.5000000
##   13.9 486.5000 253.8513344
##   14   437.0000          NA
##   14.1 666.0000   0.0000000
##   14.3 666.0000          NA
##   14.4 437.0000          NA
##   14.5 426.6667 207.2687466
##   14.6 534.5000 185.9690835
##   14.8 307.0000          NA
##   14.9 666.0000   0.0000000
##   15   666.0000          NA
##   15.2 561.3333 221.4053598
##   15.3 403.0000          NA
##   15.4 534.5000 185.9690835
##   15.6 382.3333  67.4190873
##   15.7 188.0000          NA
##   16   284.0000          NA
##   16.1 666.0000          NA
##   16.2 437.0000          NA
##   16.3 666.0000          NA
##   16.5 311.0000          NA
##   16.6 270.0000  52.3259018
##   16.7 666.0000          NA
##   16.8 391.0000          NA
##   17   403.0000          NA
##   17.1 370.5000  94.0452019
##   17.2 509.0000 222.0315293
##   17.3 188.0000          NA
##   17.4 403.0000          NA
##   17.5 329.5000  31.8198052
##   17.6 330.0000          NA
##   17.7 666.0000          NA
##   17.8 509.7500 184.8934378
##   17.9 666.0000          NA
##   18   437.0000          NA
##   18.2 320.5000  19.0918831
##   18.3 391.0000          NA
##   18.4 437.0000          NA
##   18.5 381.0000  72.1248917
##   18.6 397.5000  19.0918831
##   18.7 254.0000  42.4264069
##   18.9 300.3333  18.4752086
##   19   445.0000 312.5411973
##   19.1 534.5000 185.9690835
##   19.2 434.5000   3.5355339
##   19.3 351.2000  91.2288332
##   19.4 327.5000  61.6725222
##   19.5 384.0000   0.0000000
##   19.6 447.5000 159.7550208
##   19.7 317.0000 104.6518036
##   19.8 372.6667  63.7599665
##   19.9 666.0000   0.0000000
##   20   370.6000 176.1371625
##   20.1 397.7500 188.2133098
##   20.2 486.5000 253.8513344
##   20.3 330.3333  59.0790431
##   20.4 367.6667  73.8669976
##   20.5 253.6667  71.5984171
##   20.6 358.2500 206.3885898
##   20.7 223.5000   0.7071068
##   20.8 576.6667 154.7298721
##   20.9 330.0000  21.2132034
##   21   296.6667  15.3731367
##   21.1 223.0000          NA
##   21.2 496.3333 148.3587993
##   21.4 438.8000 220.4023593
##   21.5 339.5000  89.8025612
##   21.6 242.0000          NA
##   21.7 390.2000 158.5298079
##   21.8 528.5000 194.4543648
##   21.9 549.0000 202.6499445
##   22   266.2000  45.9913035
##   22.2 264.5000  35.1804870
##   22.3 293.0000          NA
##   22.4 277.0000          NA
##   22.5 277.0000          NA
##   22.6 273.5000   4.9497475
##   22.7 534.5000 185.9690835
##   22.8 333.3333  87.7572409
##   22.9 299.2500  29.4660822
##   23   362.0000 106.0660172
##   23.1 371.7143 134.8378215
##   23.2 541.6667 215.3516504
##   23.3 403.0000          NA
##   23.4 274.0000  43.8406204
##   23.6 283.0000  18.3847763
##   23.7 292.3333  13.2790562
##   23.8 349.2500  62.4519816
##   23.9 296.6667  64.8254065
##   24   301.5000   7.7781746
##   24.1 385.6667  33.2615895
##   24.2 305.0000          NA
##   24.3 346.6667  50.1231816
##   24.4 276.6667  53.5007788
##   24.5 391.0000          NA
##   24.6 291.5000   6.3639610
##   24.7 296.0000  24.0416306
##   24.8 315.5000  35.1804870
##   25   358.8333 160.2316032
##   25.2 223.0000          NA
##   25.3 233.0000          NA
##   26.4 193.0000          NA
##   26.5 384.0000          NA
##   26.6 291.0000  62.2253967
##   26.7 307.0000          NA
##   27   403.0000          NA
##   27.1 311.0000          NA
##   27.5 505.7500 187.6919018
##   28   281.0000          NA
##   28.1 277.0000          NA
##   28.4 249.0000  38.1837662
##   28.5 245.0000          NA
##   28.6 289.0000          NA
##   28.7 249.0000  38.1837662
##   29   332.5000  36.0624458
##   29.1 265.0000          NA
##   29.4 296.0000          NA
##   29.6 261.5000  96.8736290
##   29.8 532.0000 189.5046174
##   29.9 296.0000          NA
##   30.1 252.6667  60.7974780
##   30.3 329.0000          NA
##   30.5 398.0000          NA
##   30.7 264.0000          NA
##   30.8 252.0000          NA
##   31.1 265.0000          NA
##   31.5 307.0000          NA
##   31.6 281.5000  36.0624458
##   31.7 307.0000          NA
##   32   326.0000 101.8233765
##   32.2 285.0000          NA
##   32.4 254.0000          NA
##   32.5 193.0000          NA
##   32.9 402.0000          NA
##   33.1 291.5000  53.0330086
##   33.2 265.0000  15.5563492
##   33.3 329.0000          NA
##   33.8 264.0000          NA
##   34.6 329.0000          NA
##   34.9 325.0000 103.2375901
##   35.1 216.0000          NA
##   35.2 223.0000          NA
##   35.4 226.0000          NA
##   36   264.0000          NA
##   36.1 222.0000          NA
##   36.2 207.5000  20.5060967
##   36.4 398.0000          NA
##   36.5 264.0000          NA
##   37   398.0000          NA
##   37.2 193.0000          NA
##   37.3 245.0000          NA
##   37.6 307.0000          NA
##   37.9 193.0000          NA
##   41.3 403.0000          NA
##   41.7 307.0000          NA
##   42.3 348.0000          NA
##   43.1 264.0000          NA
##   43.8 276.0000          NA
##   44   244.0000          NA
##   44.8 307.0000          NA
##   46   216.0000          NA
##   46.7 307.0000          NA
##   48.3 307.0000          NA
##   48.5 224.0000          NA
##   48.8 264.0000          NA
##   50   419.1000 185.6223466
plot(nb$apriori)

#SVM Algorithm
pima <- read.csv("pima-indians-diabetes.data")
colnames(pima) <- c("pregnancies","glucose","bp","triceps","insulin","bmi","pedigree","age","class")
summary(pima)
##   pregnancies        glucose            bp           triceps     
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.0   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.0   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.0   Median :23.00  
##  Mean   : 3.842   Mean   :120.9   Mean   : 69.1   Mean   :20.52  
##  3rd Qu.: 6.000   3rd Qu.:140.0   3rd Qu.: 80.0   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.0   Max.   :99.00  
##     insulin           bmi           pedigree           age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780   Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2435   1st Qu.:24.00  
##  Median : 32.0   Median :32.00   Median :0.3710   Median :29.00  
##  Mean   : 79.9   Mean   :31.99   Mean   :0.4717   Mean   :33.22  
##  3rd Qu.:127.5   3rd Qu.:36.60   3rd Qu.:0.6250   3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200   Max.   :81.00  
##      class       
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.3481  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
str(pima)
## 'data.frame':    767 obs. of  9 variables:
##  $ pregnancies: int  1 8 1 0 5 3 10 2 8 4 ...
##  $ glucose    : int  85 183 89 137 116 78 115 197 125 110 ...
##  $ bp         : int  66 64 66 40 74 50 0 70 96 92 ...
##  $ triceps    : int  29 0 23 35 0 32 0 45 0 0 ...
##  $ insulin    : int  0 0 94 168 0 88 0 543 0 0 ...
##  $ bmi        : num  26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 37.6 ...
##  $ pedigree   : num  0.351 0.672 0.167 2.288 0.201 ...
##  $ age        : int  31 32 21 33 30 26 29 53 54 30 ...
##  $ class      : int  0 1 0 1 0 1 0 1 1 0 ...
pima$class <- as.factor(pima$class)

set.seed(3277)
library(caret)
pimaIndices <- createDataPartition(pima$class, p=0.75, list=FALSE)
pimaTraining <- pima[pimaIndices,]
pimaTesting <- pima[-pimaIndices,]

library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
bootControl <- trainControl(number = 20)
svmFit <- train(pimaTraining[,-9], pimaTraining[,9], method="svmRadial", tuneLength=5, trControl=bootControl, scaled=FALSE)
svmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 576 samples
##   8 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (20 reps) 
## Summary of sample sizes: 576, 576, 576, 576, 576, 576, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa
##   0.25  0.6493652  0    
##   0.50  0.6493652  0    
##   1.00  0.6493652  0    
##   2.00  0.6493652  0    
##   4.00  0.6493652  0    
## 
## Tuning parameter 'sigma' was held constant at a value of 0.1106031
## Accuracy was used to select the optimal model using  the largest value.
## The final values used for the model were sigma = 0.1106031 and C = 0.25.
predicted <- predict(svmFit$finalModel,newdata=pimaTesting[,-9])
plot(pimaTesting$class,predicted)

table(pred = predicted, true = pimaTesting[,9])
##     true
## pred   0   1
##    0 125  66
##    1   0   0
svmFit$finalModel
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 0.25 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.110603110858516 
## 
## Number of Support Vectors : 576 
## 
## Objective Function Value : -90.823 
## Training error : 0.348958
#k means clustering
data(iris)
irisIndices <- createDataPartition(iris$Species, p=0.75, list=FALSE)
irisTraining <- iris[irisIndices,]
irisTesting <- iris[-irisIndices,]

bootControl <- trainControl(number = 20)
km <- kmeans(irisTraining[,1:4], 3)
km
## K-means clustering with 3 clusters of sizes 48, 38, 28
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.906250    2.741667     4.402083   1.4354167
## 2     5.010526    3.426316     1.468421   0.2447368
## 3     6.878571    3.042857     5.810714   2.0035714
## 
## Clustering vector:
##   1   2   3   4   6   7   8  10  11  12  13  14  16  17  18  19  21  23 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  25  26  27  28  30  32  33  34  36  37  39  40  42  43  44  45  46  47 
##   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
##  49  50  51  52  53  54  55  56  57  58  59  61  63  64  65  66  67  68 
##   2   2   1   1   3   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  70  72  73  74  75  76  77  81  82  83  84  85  87  88  89  90  91  95 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  96  97  99 100 102 104 105 106 107 108 109 111 112 113 114 115 116 117 
##   1   1   1   1   1   3   3   3   1   3   3   3   3   3   1   1   3   3 
## 118 119 122 123 124 125 126 127 129 130 131 132 133 135 136 137 138 139 
##   3   3   1   3   1   3   3   1   3   3   3   3   3   3   3   3   3   1 
## 140 143 144 147 149 150 
##   3   1   3   1   3   1 
## 
## Within cluster sum of squares by cluster:
## [1] 31.72437 12.36553 19.69214
##  (between_SS / total_SS =  87.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
library(clue)
cl_predict(km,irisTesting[,-5])
## Class ids:
##  [1] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 3 1 1 1 1 1 1 1 3 3 3 1 3 1 1 3 3 3 3
## [36] 3
irisTesting[,5]
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] versicolor versicolor versicolor versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] virginica  virginica  virginica  virginica  virginica  virginica 
## [31] virginica  virginica  virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica
#Decision Tree
library(rpart)
housingFit <- rpart(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, method="anova", data=housingTraining)
plot(housingFit)
text(housingFit, use.n=TRUE, all=TRUE, cex=.8)

treePredict <- predict(housingFit,newdata=housingTesting)
diff <- treePredict - housingTesting$MDEV
sumofsquares <- function(x) {return(sum(x^2))}
sumofsquares(diff)
## [1] 3926.297
#AdaBoost
library(ada)
adaModel <- ada(x=pimaTraining[,-9],y=pimaTraining$class,test.x=pimaTesting[,-9],
                test.y=pimaTesting$class)
adaModel
## Call:
## ada(pimaTraining[, -9], y = pimaTraining$class, test.x = pimaTesting[, 
##     -9], test.y = pimaTesting$class)
## 
## Loss: exponential Method: discrete   Iteration: 50 
## 
## Final Confusion Matrix for Data:
##           Final Prediction
## True value   0   1
##          0 351  24
##          1  32 169
## 
## Train Error: 0.097 
## 
## Out-Of-Bag Error:  0.134  iteration= 48 
## 
## Additional Estimates of number of iterations:
## 
## train.err1 train.kap1  test.err2  test.kap2 
##         50         50          3          3
(358+168)/(358+168+33+17)
## [1] 0.9131944
#neural network
library(neuralnet)
nnet <- neuralnet(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE
+ DIS + RAD + TAX + PRATIO + B + LSTAT,housingTraining, hidden=10,
threshold=0.01)

nnet <- neuralnet(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM + AGE +
DIS + RAD + TAX + PRATIO + B + LSTAT,housingTraining)
plot(nnet, rep="best")

results <- compute(nnet, housingTesting[,-14])
diff <- results$net.result - housingTesting$MDEV
sumofsquares(diff)
## [1] 11016.02175
#random forests Algorithm
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
forestFit <- randomForest(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX + RM
+ AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, data=housingTraining)

forestPredict <- predict(forestFit,newdata=housingTesting)

diff <- forestPredict - housingTesting$MDEV
sumofsquares(diff)
## [1] 2322.754856
#Decision Tree
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
## 
## Attaching package: 'rattle'
## The following object is masked from 'package:randomForest':
## 
##     importance
weather <- read.csv("weather.csv")
summary(weather)
##          Date         Location      MinTemp             MaxTemp        
##  2007-11-01:  1   Canberra:366   Min.   :-5.300000   Min.   : 7.60000  
##  2007-11-02:  1                  1st Qu.: 2.300000   1st Qu.:15.02500  
##  2007-11-03:  1                  Median : 7.450000   Median :19.65000  
##  2007-11-04:  1                  Mean   : 7.265574   Mean   :20.55027  
##  2007-11-05:  1                  3rd Qu.:12.500000   3rd Qu.:25.50000  
##  2007-11-06:  1                  Max.   :20.900000   Max.   :35.80000  
##  (Other)   :360                                                        
##     Rainfall          Evaporation           Sunshine          WindGustDir 
##  Min.   : 0.000000   Min.   : 0.200000   Min.   : 0.000000   NW     : 73  
##  1st Qu.: 0.000000   1st Qu.: 2.200000   1st Qu.: 5.950000   NNW    : 44  
##  Median : 0.000000   Median : 4.200000   Median : 8.600000   E      : 37  
##  Mean   : 1.428415   Mean   : 4.521858   Mean   : 7.909366   WNW    : 35  
##  3rd Qu.: 0.200000   3rd Qu.: 6.400000   3rd Qu.:10.500000   ENE    : 30  
##  Max.   :39.800000   Max.   :13.800000   Max.   :13.600000   (Other):144  
##                                          NA's   :3           NA's   :  3  
##  WindGustSpeed        WindDir9am    WindDir3pm   WindSpeed9am      
##  Min.   :13.00000   SE     : 47   NW     : 61   Min.   : 0.000000  
##  1st Qu.:31.00000   SSE    : 40   WNW    : 61   1st Qu.: 6.000000  
##  Median :39.00000   NNW    : 36   NNW    : 47   Median : 7.000000  
##  Mean   :39.84066   N      : 31   N      : 30   Mean   : 9.651811  
##  3rd Qu.:46.00000   NW     : 30   ESE    : 27   3rd Qu.:13.000000  
##  Max.   :98.00000   (Other):151   (Other):139   Max.   :41.000000  
##  NA's   :2          NA's   : 31   NA's   :  1   NA's   :7          
##   WindSpeed3pm       Humidity9am        Humidity3pm      
##  Min.   : 0.00000   Min.   :36.00000   Min.   :13.00000  
##  1st Qu.:11.00000   1st Qu.:64.00000   1st Qu.:32.25000  
##  Median :17.00000   Median :72.00000   Median :43.00000  
##  Mean   :17.98634   Mean   :72.03552   Mean   :44.51913  
##  3rd Qu.:24.00000   3rd Qu.:81.00000   3rd Qu.:55.00000  
##  Max.   :52.00000   Max.   :99.00000   Max.   :96.00000  
##                                                          
##   Pressure9am        Pressure3pm          Cloud9am          Cloud3pm      
##  Min.   : 996.500   Min.   : 996.800   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:1015.350   1st Qu.:1012.800   1st Qu.:1.00000   1st Qu.:1.00000  
##  Median :1020.150   Median :1017.400   Median :3.50000   Median :4.00000  
##  Mean   :1019.709   Mean   :1016.810   Mean   :3.89071   Mean   :4.02459  
##  3rd Qu.:1024.475   3rd Qu.:1021.475   3rd Qu.:7.00000   3rd Qu.:7.00000  
##  Max.   :1035.700   Max.   :1033.200   Max.   :8.00000   Max.   :8.00000  
##                                                                           
##     Temp9am            Temp3pm         RainToday    RISK_MM         
##  Min.   : 0.10000   Min.   : 5.10000   No :300   Min.   : 0.000000  
##  1st Qu.: 7.62500   1st Qu.:14.15000   Yes: 66   1st Qu.: 0.000000  
##  Median :12.55000   Median :18.55000             Median : 0.000000  
##  Mean   :12.35847   Mean   :19.23087             Mean   : 1.428415  
##  3rd Qu.:17.00000   3rd Qu.:24.00000             3rd Qu.: 0.200000  
##  Max.   :24.70000   Max.   :34.50000             Max.   :39.800000  
##                                                                     
##  RainTomorrow
##  No :300     
##  Yes: 66     
##              
##              
##              
##              
## 
weather2 <- subset(weather,select=-c(RISK_MM))
library(rpart)
model <- rpart(formula=RainTomorrow ~ .,data=weather2, method="class")
summary(model)
## Call:
## rpart(formula = RainTomorrow ~ ., data = weather2, method = "class")
##   n= 366 
## 
##     CP nsplit rel error       xerror         xstd
## 1 1.00      0         1 1.0000000000 0.1114417997
## 2 0.01      1         0 0.8333333333 0.1035802753
## 
## Variable importance
##          Date   Humidity3pm      Cloud3pm WindGustSpeed       MinTemp 
##            64            13             8             6             5 
##      Sunshine 
##             5 
## 
## Node number 1: 366 observations,    complexity param=1
##   predicted class=No   expected loss=0.1803278689  P(node) =1
##     class counts:   300    66
##    probabilities: 0.820 0.180 
##   left son=2 (300 obs) right son=3 (66 obs)
##   Primary splits:
##       Date        splits as  RRRRLLLLRLLLLLLLRLLLLRLLLLLLLRRLRLLLRLLLLLLLRRLLRRRRLRLLRLLLLLLLLLLLLLLLRLLLRLRRLLLLLLLLLLRRLLRRLLRLLLRRLLLLLLLRRLLLLLLRLLLLLLLRLLLLLLLLLLLLLLLLRRLLLLLLLRLLLLLLLLLRRLLLLLLLLLLLLLRLLLLLLLLLLLLLLLLLRLLLLLLLLLRRLLLLLLLRLLLLLRRLLLLLLLLRLLLLLLLLLLRLLLLLLRLLRLLLLLLLRLRLLLLLLRLLLLRLLLRLRLLLLLLLLLLLLLLLLLLLLLLRRLLLLLLLLLLLLRRRLLLLLLRLLLLLLLLLLRRLRLLLLLLRLLLLLLLLLLLLLLLLLL, improve=108.19672130, (0 missing)
##       Humidity3pm < 71.5   to the left,  improve= 18.31012675, (0 missing)
##       Pressure3pm < 1011.9 to the right, improve= 17.35279669, (0 missing)
##       Cloud3pm    < 6.5    to the left,  improve= 16.14203133, (0 missing)
##       Sunshine    < 6.45   to the right, improve= 15.36363823, (3 missing)
##   Surrogate splits:
##       Humidity3pm   < 71.5   to the left,  agree=0.855, adj=0.197, (0 split)
##       Cloud3pm      < 7.5    to the left,  agree=0.842, adj=0.121, (0 split)
##       WindGustSpeed < 64     to the left,  agree=0.836, adj=0.091, (0 split)
##       MinTemp       < 17.55  to the left,  agree=0.833, adj=0.076, (0 split)
##       Sunshine      < 0.25   to the right, agree=0.833, adj=0.076, (0 split)
## 
## Node number 2: 300 observations
##   predicted class=No   expected loss=0  P(node) =0.8196721311
##     class counts:   300     0
##    probabilities: 1.000 0.000 
## 
## Node number 3: 66 observations
##   predicted class=Yes  expected loss=0  P(node) =0.1803278689
##     class counts:     0    66
##    probabilities: 0.000 1.000
library(rpart.plot)
fancyRpartPlot(model,main="Rain Tomorrow")

#regression
forestfires <- read.csv("forestfires.csv")
summary(forestfires)
##        X                  Y                month      day    
##  Min.   :1.000000   Min.   :2.000000   aug    :184   fri:85  
##  1st Qu.:3.000000   1st Qu.:4.000000   sep    :172   mon:74  
##  Median :4.000000   Median :4.000000   mar    : 54   sat:84  
##  Mean   :4.669246   Mean   :4.299807   jul    : 32   sun:95  
##  3rd Qu.:7.000000   3rd Qu.:5.000000   feb    : 20   thu:61  
##  Max.   :9.000000   Max.   :9.000000   jun    : 17   tue:64  
##                                        (Other): 38   wed:54  
##       FFMC               DMC                 DC        
##  Min.   :18.70000   Min.   :  1.1000   Min.   :  7.90  
##  1st Qu.:90.20000   1st Qu.: 68.6000   1st Qu.:437.70  
##  Median :91.60000   Median :108.3000   Median :664.20  
##  Mean   :90.64468   Mean   :110.8723   Mean   :547.94  
##  3rd Qu.:92.90000   3rd Qu.:142.4000   3rd Qu.:713.90  
##  Max.   :96.20000   Max.   :291.3000   Max.   :860.60  
##                                                        
##       ISI                 temp                RH          
##  Min.   : 0.000000   Min.   : 2.20000   Min.   : 15.0000  
##  1st Qu.: 6.500000   1st Qu.:15.50000   1st Qu.: 33.0000  
##  Median : 8.400000   Median :19.30000   Median : 42.0000  
##  Mean   : 9.021663   Mean   :18.88917   Mean   : 44.2882  
##  3rd Qu.:10.800000   3rd Qu.:22.80000   3rd Qu.: 53.0000  
##  Max.   :56.100000   Max.   :33.30000   Max.   :100.0000  
##                                                           
##       wind               rain                 area           
##  Min.   :0.400000   Min.   :0.00000000   Min.   :   0.00000  
##  1st Qu.:2.700000   1st Qu.:0.00000000   1st Qu.:   0.00000  
##  Median :4.000000   Median :0.00000000   Median :   0.52000  
##  Mean   :4.017602   Mean   :0.02166344   Mean   :  12.84729  
##  3rd Qu.:4.900000   3rd Qu.:0.00000000   3rd Qu.:   6.57000  
##  Max.   :9.400000   Max.   :6.40000000   Max.   :1090.84000  
## 
model <- lm(formula = area ~ month + temp + wind + rain, data=forestfires)
summary(model)
## 
## Call:
## lm(formula = area ~ month + temp + wind + rain, data = forestfires)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
##  -33.20126  -14.92936   -9.09864   -1.65641 1063.58600 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)  
## (Intercept) -17.389663  24.531469 -0.70887 0.478733  
## monthaug    -10.342265  22.761152 -0.45438 0.649750  
## monthdec     11.533891  30.895985  0.37331 0.709073  
## monthfeb      2.606651  25.795917  0.10105 0.919552  
## monthjan      5.988231  50.493480  0.11859 0.905644  
## monthjul     -8.821520  25.068066 -0.35190 0.725059  
## monthjun    -15.469199  26.973935 -0.57349 0.566572  
## monthmar     -6.630304  23.057135 -0.28756 0.773802  
## monthmay      6.602782  50.052985  0.13192 0.895104  
## monthnov     -8.243943  67.451398 -0.12222 0.902773  
## monthoct     -8.267770  27.236986 -0.30355 0.761597  
## monthsep     -1.070170  22.487923 -0.04759 0.962063  
## temp          1.569244   0.673020  2.33165 0.020114 *
## wind          1.581449   1.710697  0.92445 0.355698  
## rain         -3.179054   9.595489 -0.33131 0.740551  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.98916 on 502 degrees of freedom
## Multiple R-squared:  0.01691589, Adjusted R-squared:  -0.0105008 
## F-statistic: 0.6169923 on 14 and 502 DF,  p-value: 0.8518111
model <- lm(formula = area ~ month + wind + rain, data=forestfires)
summary(model)
## 
## Call:
## lm(formula = area ~ month + wind + rain, data = forestfires)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
##  -22.16857  -14.38907  -10.45919   -3.87373 1072.43486 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)
## (Intercept)  4.0125953 22.8495756  0.17561  0.86067
## monthaug     4.3131721 21.9723517  0.19630  0.84445
## monthdec     1.3259312 30.7188113  0.04316  0.96559
## monthfeb    -1.6630582 25.8440653 -0.06435  0.94872
## monthjan    -6.1033878 50.4474881 -0.12098  0.90375
## monthjul     6.4647558 24.3020651  0.26602  0.79034
## monthjun    -2.4944398 26.5098551 -0.09409  0.92507
## monthmar    -4.8431458 23.1457972 -0.20925  0.83434
## monthmay    10.5753914 50.2441062  0.21048  0.83338
## monthnov    -8.7168784 67.7479071 -0.12867  0.89767
## monthoct    -0.9916663 27.1766956 -0.03649  0.97091
## monthsep    10.2109631 22.0579191  0.46292  0.64362
## wind         1.0453962  1.7026381  0.61399  0.53950
## rain        -1.8504292  9.6207046 -0.19234  0.84755
## 
## Residual standard error: 64.27074 on 503 degrees of freedom
## Multiple R-squared:  0.006269255,    Adjusted R-squared:  -0.01941365 
## F-statistic: 0.2441023 on 13 and 503 DF,  p-value: 0.9971217
plot(model)
## Warning: not plotting observations with leverage one:
##   517

## Warning: not plotting observations with leverage one:
##   517

#ANN
bupa <- read.csv("bupa.data")
colnames(bupa) <- c("mcv","alkphos","alamine","aspartate","glutamyl","drinks","selector")
summary(bupa)
##       mcv               alkphos             alamine         
##  Min.   : 65.00000   Min.   : 23.00000   Min.   :  4.00000  
##  1st Qu.: 87.00000   1st Qu.: 57.00000   1st Qu.: 19.00000  
##  Median : 90.00000   Median : 67.00000   Median : 26.00000  
##  Mean   : 90.17442   Mean   : 69.80523   Mean   : 30.36337  
##  3rd Qu.: 93.00000   3rd Qu.: 80.00000   3rd Qu.: 34.00000  
##  Max.   :103.00000   Max.   :138.00000   Max.   :155.00000  
##    aspartate           glutamyl             drinks         
##  Min.   : 5.00000   Min.   :  5.00000   Min.   : 0.000000  
##  1st Qu.:19.00000   1st Qu.: 15.00000   1st Qu.: 0.500000  
##  Median :23.00000   Median : 24.50000   Median : 3.000000  
##  Mean   :24.63663   Mean   : 38.30523   Mean   : 3.465116  
##  3rd Qu.:27.00000   3rd Qu.: 46.25000   3rd Qu.: 6.000000  
##  Max.   :82.00000   Max.   :297.00000   Max.   :20.000000  
##     selector       
##  Min.   :1.000000  
##  1st Qu.:1.000000  
##  Median :2.000000  
##  Mean   :1.581395  
##  3rd Qu.:2.000000  
##  Max.   :2.000000
nn <- neuralnet(selector~mcv+alkphos+alamine+aspartate+glutamyl+drinks, data=bupa, linear.output=FALSE, hidden=2)
nn$result.matrix
##                                     1
## error                 100.00134410555
## reached.threshold       0.00245301475
## steps                  47.00000000000
## Intercept.to.1layhid1   0.41602844406
## mcv.to.1layhid1        -0.10920845301
## alkphos.to.1layhid1     0.53367967640
## alamine.to.1layhid1     2.65321469994
## aspartate.to.1layhid1   0.49399792808
## glutamyl.to.1layhid1    0.50537073025
## drinks.to.1layhid1      0.03765883152
## Intercept.to.1layhid2   0.25098970910
## mcv.to.1layhid2         0.47460785268
## alkphos.to.1layhid2    -0.54659321629
## alamine.to.1layhid2     0.21558022509
## aspartate.to.1layhid2  -0.13052791775
## glutamyl.to.1layhid2    0.69013092149
## drinks.to.1layhid2     -0.95369381418
## Intercept.to.selector   4.19048690517
## 1layhid.1.to.selector   5.29556987846
## 1layhid.2.to.selector   2.81924720338
plot(nn)

#SVM
library(kernlab)
data("spam")
summary(spam)
##       make              address                all           
##  Min.   :0.0000000   Min.   : 0.0000000   Min.   :0.0000000  
##  1st Qu.:0.0000000   1st Qu.: 0.0000000   1st Qu.:0.0000000  
##  Median :0.0000000   Median : 0.0000000   Median :0.0000000  
##  Mean   :0.1045534   Mean   : 0.2130146   Mean   :0.2806564  
##  3rd Qu.:0.0000000   3rd Qu.: 0.0000000   3rd Qu.:0.4200000  
##  Max.   :4.5400000   Max.   :14.2800000   Max.   :5.1000000  
##      num3d                  our                  over           
##  Min.   : 0.00000000   Min.   : 0.0000000   Min.   :0.00000000  
##  1st Qu.: 0.00000000   1st Qu.: 0.0000000   1st Qu.:0.00000000  
##  Median : 0.00000000   Median : 0.0000000   Median :0.00000000  
##  Mean   : 0.06542491   Mean   : 0.3122234   Mean   :0.09590089  
##  3rd Qu.: 0.00000000   3rd Qu.: 0.3800000   3rd Qu.:0.00000000  
##  Max.   :42.81000000   Max.   :10.0000000   Max.   :5.88000000  
##      remove             internet              order           
##  Min.   :0.0000000   Min.   : 0.0000000   Min.   :0.00000000  
##  1st Qu.:0.0000000   1st Qu.: 0.0000000   1st Qu.:0.00000000  
##  Median :0.0000000   Median : 0.0000000   Median :0.00000000  
##  Mean   :0.1142078   Mean   : 0.1052945   Mean   :0.09006738  
##  3rd Qu.:0.0000000   3rd Qu.: 0.0000000   3rd Qu.:0.00000000  
##  Max.   :7.2700000   Max.   :11.1100000   Max.   :5.26000000  
##       mail               receive                will          
##  Min.   : 0.0000000   Min.   :0.00000000   Min.   :0.0000000  
##  1st Qu.: 0.0000000   1st Qu.:0.00000000   1st Qu.:0.0000000  
##  Median : 0.0000000   Median :0.00000000   Median :0.1000000  
##  Mean   : 0.2394132   Mean   :0.05982395   Mean   :0.5417018  
##  3rd Qu.: 0.1600000   3rd Qu.:0.00000000   3rd Qu.:0.8000000  
##  Max.   :18.1800000   Max.   :2.61000000   Max.   :9.6700000  
##      people               report              addresses         
##  Min.   :0.00000000   Min.   : 0.00000000   Min.   :0.00000000  
##  1st Qu.:0.00000000   1st Qu.: 0.00000000   1st Qu.:0.00000000  
##  Median :0.00000000   Median : 0.00000000   Median :0.00000000  
##  Mean   :0.09392958   Mean   : 0.05862639   Mean   :0.04920452  
##  3rd Qu.:0.00000000   3rd Qu.: 0.00000000   3rd Qu.:0.00000000  
##  Max.   :5.55000000   Max.   :10.00000000   Max.   :4.41000000  
##       free               business             email          
##  Min.   : 0.0000000   Min.   :0.0000000   Min.   :0.0000000  
##  1st Qu.: 0.0000000   1st Qu.:0.0000000   1st Qu.:0.0000000  
##  Median : 0.0000000   Median :0.0000000   Median :0.0000000  
##  Mean   : 0.2488481   Mean   :0.1425864   Mean   :0.1847446  
##  3rd Qu.: 0.1000000   3rd Qu.:0.0000000   3rd Qu.:0.0000000  
##  Max.   :20.0000000   Max.   :7.1400000   Max.   :9.0900000  
##       you              credit                 your           
##  Min.   : 0.0000   Min.   : 0.00000000   Min.   : 0.0000000  
##  1st Qu.: 0.0000   1st Qu.: 0.00000000   1st Qu.: 0.0000000  
##  Median : 1.3100   Median : 0.00000000   Median : 0.2200000  
##  Mean   : 1.6621   Mean   : 0.08557705   Mean   : 0.8097609  
##  3rd Qu.: 2.6400   3rd Qu.: 0.00000000   3rd Qu.: 1.2700000  
##  Max.   :18.7500   Max.   :18.18000000   Max.   :11.1100000  
##       font                num000              money            
##  Min.   : 0.0000000   Min.   :0.0000000   Min.   : 0.00000000  
##  1st Qu.: 0.0000000   1st Qu.:0.0000000   1st Qu.: 0.00000000  
##  Median : 0.0000000   Median :0.0000000   Median : 0.00000000  
##  Mean   : 0.1212019   Mean   :0.1016453   Mean   : 0.09426864  
##  3rd Qu.: 0.0000000   3rd Qu.:0.0000000   3rd Qu.: 0.00000000  
##  Max.   :17.1000000   Max.   :5.4500000   Max.   :12.50000000  
##        hp                  hpl                 george          
##  Min.   : 0.0000000   Min.   : 0.0000000   Min.   : 0.0000000  
##  1st Qu.: 0.0000000   1st Qu.: 0.0000000   1st Qu.: 0.0000000  
##  Median : 0.0000000   Median : 0.0000000   Median : 0.0000000  
##  Mean   : 0.5495045   Mean   : 0.2653836   Mean   : 0.7673049  
##  3rd Qu.: 0.0000000   3rd Qu.: 0.0000000   3rd Qu.: 0.0000000  
##  Max.   :20.8300000   Max.   :16.6600000   Max.   :33.3300000  
##      num650               lab                   labs          
##  Min.   :0.0000000   Min.   : 0.00000000   Min.   :0.0000000  
##  1st Qu.:0.0000000   1st Qu.: 0.00000000   1st Qu.:0.0000000  
##  Median :0.0000000   Median : 0.00000000   Median :0.0000000  
##  Mean   :0.1248446   Mean   : 0.09891545   Mean   :0.1028516  
##  3rd Qu.:0.0000000   3rd Qu.: 0.00000000   3rd Qu.:0.0000000  
##  Max.   :9.0900000   Max.   :14.28000000   Max.   :5.8800000  
##      telnet                num857                data            
##  Min.   : 0.00000000   Min.   :0.00000000   Min.   : 0.00000000  
##  1st Qu.: 0.00000000   1st Qu.:0.00000000   1st Qu.: 0.00000000  
##  Median : 0.00000000   Median :0.00000000   Median : 0.00000000  
##  Mean   : 0.06475331   Mean   :0.04704847   Mean   : 0.09722886  
##  3rd Qu.: 0.00000000   3rd Qu.:0.00000000   3rd Qu.: 0.00000000  
##  Max.   :12.50000000   Max.   :4.76000000   Max.   :18.18000000  
##      num415               num85              technology        
##  Min.   :0.00000000   Min.   : 0.0000000   Min.   :0.00000000  
##  1st Qu.:0.00000000   1st Qu.: 0.0000000   1st Qu.:0.00000000  
##  Median :0.00000000   Median : 0.0000000   Median :0.00000000  
##  Mean   :0.04783525   Mean   : 0.1054119   Mean   :0.09747664  
##  3rd Qu.:0.00000000   3rd Qu.: 0.0000000   3rd Qu.:0.00000000  
##  Max.   :4.76000000   Max.   :20.0000000   Max.   :7.69000000  
##     num1999              parts                  pm             
##  Min.   :0.0000000   Min.   :0.00000000   Min.   : 0.00000000  
##  1st Qu.:0.0000000   1st Qu.:0.00000000   1st Qu.: 0.00000000  
##  Median :0.0000000   Median :0.00000000   Median : 0.00000000  
##  Mean   :0.1369528   Mean   :0.01320148   Mean   : 0.07862856  
##  3rd Qu.:0.0000000   3rd Qu.:0.00000000   3rd Qu.: 0.00000000  
##  Max.   :6.8900000   Max.   :8.33000000   Max.   :11.11000000  
##      direct                 cs                meeting          
##  Min.   :0.00000000   Min.   :0.00000000   Min.   : 0.0000000  
##  1st Qu.:0.00000000   1st Qu.:0.00000000   1st Qu.: 0.0000000  
##  Median :0.00000000   Median :0.00000000   Median : 0.0000000  
##  Mean   :0.06483373   Mean   :0.04366659   Mean   : 0.1323386  
##  3rd Qu.:0.00000000   3rd Qu.:0.00000000   3rd Qu.: 0.0000000  
##  Max.   :4.76000000   Max.   :7.14000000   Max.   :14.2800000  
##     original             project                  re            
##  Min.   :0.00000000   Min.   : 0.00000000   Min.   : 0.0000000  
##  1st Qu.:0.00000000   1st Qu.: 0.00000000   1st Qu.: 0.0000000  
##  Median :0.00000000   Median : 0.00000000   Median : 0.0000000  
##  Mean   :0.04609867   Mean   : 0.07919583   Mean   : 0.3012236  
##  3rd Qu.:0.00000000   3rd Qu.: 0.00000000   3rd Qu.: 0.1100000  
##  Max.   :3.57000000   Max.   :20.00000000   Max.   :21.4200000  
##       edu                table               conference         
##  Min.   : 0.000000   Min.   :0.000000000   Min.   : 0.00000000  
##  1st Qu.: 0.000000   1st Qu.:0.000000000   1st Qu.: 0.00000000  
##  Median : 0.000000   Median :0.000000000   Median : 0.00000000  
##  Mean   : 0.179824   Mean   :0.005444469   Mean   : 0.03186916  
##  3rd Qu.: 0.000000   3rd Qu.:0.000000000   3rd Qu.: 0.00000000  
##  Max.   :22.050000   Max.   :2.170000000   Max.   :10.00000000  
##  charSemicolon        charRoundbracket    charSquarebracket   
##  Min.   :0.00000000   Min.   :0.0000000   Min.   :0.00000000  
##  1st Qu.:0.00000000   1st Qu.:0.0000000   1st Qu.:0.00000000  
##  Median :0.00000000   Median :0.0650000   Median :0.00000000  
##  Mean   :0.03857466   Mean   :0.1390304   Mean   :0.01697588  
##  3rd Qu.:0.00000000   3rd Qu.:0.1880000   3rd Qu.:0.00000000  
##  Max.   :4.38500000   Max.   :9.7520000   Max.   :4.08100000  
##  charExclamation        charDollar            charHash          
##  Min.   : 0.0000000   Min.   :0.00000000   Min.   : 0.00000000  
##  1st Qu.: 0.0000000   1st Qu.:0.00000000   1st Qu.: 0.00000000  
##  Median : 0.0000000   Median :0.00000000   Median : 0.00000000  
##  Mean   : 0.2690708   Mean   :0.07581069   Mean   : 0.04423821  
##  3rd Qu.: 0.3150000   3rd Qu.:0.05200000   3rd Qu.: 0.00000000  
##  Max.   :32.4780000   Max.   :6.00300000   Max.   :19.82900000  
##    capitalAve           capitalLong          capitalTotal       
##  Min.   :   1.000000   Min.   :   1.00000   Min.   :    1.0000  
##  1st Qu.:   1.588000   1st Qu.:   6.00000   1st Qu.:   35.0000  
##  Median :   2.276000   Median :  15.00000   Median :   95.0000  
##  Mean   :   5.191515   Mean   :  52.17279   Mean   :  283.2893  
##  3rd Qu.:   3.706000   3rd Qu.:  43.00000   3rd Qu.:  266.0000  
##  Max.   :1102.500000   Max.   :9989.00000   Max.   :15841.0000  
##       type     
##  nonspam:2788  
##  spam   :1813  
##                
##                
##                
## 
table(spam$type)
## 
## nonspam    spam 
##    2788    1813
index <- 1:nrow(spam)
testindex <- sample(index, trunc(length(index)/3))
testset <- spam[testindex,]
trainingset <- spam[-testindex,]

library(e1071)
model <- svm(type ~ ., data = trainingset, method = "C-classification", kernel = "radial", cost = 10, gamma = 0.1)
summary(model)
## 
## Call:
## svm(formula = type ~ ., data = trainingset, method = "C-classification", 
##     kernel = "radial", cost = 10, gamma = 0.1)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
##       gamma:  0.1 
## 
## Number of Support Vectors:  1534
## 
##  ( 638 896 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  nonspam spam
pred <- predict(model, testset)
table(pred, testset$type)
##          
## pred      nonspam spam
##   nonspam     882   94
##   spam         32  525
((885+508) / (885+107+33+508))
## [1] 0.9086757991
#Random forests Algorithm
library(randomForest)
fit <- randomForest(type ~ ., data=spam)
fit
## 
## Call:
##  randomForest(formula = type ~ ., data = spam) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 7
## 
##         OOB estimate of  error rate: 4.78%
## Confusion matrix:
##         nonspam spam   class.error
## nonspam    2706   82 0.02941176471
## spam        138 1675 0.07611693326
#Hidden Markov Model
library(HMM)
hmm <- initHMM(c("Rainy","Sunny"), c('walk', 'shop', 'clean'),
c(.6,.4), matrix(c(.7,.3,.4,.6),2), matrix(c(.1,.4,.5,.6,.3,.1),3))
hmm
## $States
## [1] "Rainy" "Sunny"
## 
## $Symbols
## [1] "walk"  "shop"  "clean"
## 
## $startProbs
## Rainy Sunny 
##   0.6   0.4 
## 
## $transProbs
##        to
## from    Rainy Sunny
##   Rainy   0.7   0.4
##   Sunny   0.3   0.6
## 
## $emissionProbs
##        symbols
## states  walk shop clean
##   Rainy  0.1  0.5   0.3
##   Sunny  0.4  0.6   0.1
future <- forward(hmm, c("walk","shop","clean"))
future
##        index
## states             1            2            3
##   Rainy -2.813410717 -3.101092789 -4.139551155
##   Sunny -1.832581464 -2.631089160 -5.096193182
#Blind signal separation
library(FactoMineR)
data(decathlon)
summary(decathlon)
##       100m            Long.jump       Shot.put          High.jump       
##  Min.   :10.44000   Min.   :6.61   Min.   :12.68000   Min.   :1.850000  
##  1st Qu.:10.85000   1st Qu.:7.03   1st Qu.:13.88000   1st Qu.:1.920000  
##  Median :10.98000   Median :7.30   Median :14.57000   Median :1.950000  
##  Mean   :10.99805   Mean   :7.26   Mean   :14.47707   Mean   :1.976829  
##  3rd Qu.:11.14000   3rd Qu.:7.48   3rd Qu.:14.97000   3rd Qu.:2.040000  
##  Max.   :11.64000   Max.   :7.96   Max.   :16.36000   Max.   :2.150000  
##       400m           110m.hurdle           Discus        
##  Min.   :46.81000   Min.   :13.97000   Min.   :37.92000  
##  1st Qu.:48.93000   1st Qu.:14.21000   1st Qu.:41.90000  
##  Median :49.40000   Median :14.48000   Median :44.41000  
##  Mean   :49.61634   Mean   :14.60585   Mean   :44.32561  
##  3rd Qu.:50.30000   3rd Qu.:14.98000   3rd Qu.:46.07000  
##  Max.   :53.20000   Max.   :15.67000   Max.   :51.65000  
##    Pole.vault          Javeline            1500m         
##  Min.   :4.200000   Min.   :50.31000   Min.   :262.1000  
##  1st Qu.:4.500000   1st Qu.:55.27000   1st Qu.:271.0200  
##  Median :4.800000   Median :58.36000   Median :278.0500  
##  Mean   :4.762439   Mean   :58.31659   Mean   :279.0249  
##  3rd Qu.:4.920000   3rd Qu.:60.89000   3rd Qu.:285.1000  
##  Max.   :5.400000   Max.   :70.52000   Max.   :317.0000  
##       Rank              Points           Competition
##  Min.   : 1.00000   Min.   :7313.000   Decastar:13  
##  1st Qu.: 6.00000   1st Qu.:7802.000   OlympicG:28  
##  Median :11.00000   Median :8021.000                
##  Mean   :12.12195   Mean   :8005.366                
##  3rd Qu.:18.00000   3rd Qu.:8122.000                
##  Max.   :28.00000   Max.   :8893.000
head(decathlon)
##          100m Long.jump Shot.put High.jump  400m 110m.hurdle Discus
## SEBRLE  11.04      7.58    14.83      2.07 49.81       14.69  43.75
## CLAY    10.76      7.40    14.26      1.86 49.37       14.05  50.72
## KARPOV  11.02      7.30    14.77      2.04 48.37       14.09  48.95
## BERNARD 11.02      7.23    14.25      1.92 48.93       14.99  40.87
## YURKOV  11.34      7.09    15.19      2.10 50.42       15.31  46.26
## WARNERS 11.11      7.60    14.31      1.98 48.68       14.23  41.10
##         Pole.vault Javeline 1500m Rank Points Competition
## SEBRLE        5.02    63.19 291.7    1   8217    Decastar
## CLAY          4.92    60.15 301.5    2   8122    Decastar
## KARPOV        4.92    50.31 300.2    3   8099    Decastar
## BERNARD       5.32    62.77 280.1    4   8067    Decastar
## YURKOV        4.72    63.44 276.4    5   8036    Decastar
## WARNERS       4.92    51.77 278.1    6   8030    Decastar
res.pca = PCA(decathlon[,1:10], scale.unit=TRUE, ncp=5, graph=T)