The fireforest data set is being used for Support vector machine modelling by using the factors like wind ,rain, rh and temperature etc and predict the aize of burned area.

library(kernlab)
## Warning: package 'kernlab' was built under R version 3.5.1
library(caret)
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:kernlab':
## 
##     alpha
library(plyr)
## Warning: package 'plyr' was built under R version 3.5.1
# Read the data
FF <- read.csv(file.choose())
View(FF)
class(FF)
## [1] "data.frame"
str(FF)
## 'data.frame':    517 obs. of  31 variables:
##  $ month        : Factor w/ 12 levels "apr","aug","dec",..: 8 11 11 8 8 2 2 2 12 12 ...
##  $ day          : Factor w/ 7 levels "fri","mon","sat",..: 1 6 3 1 4 4 2 2 6 3 ...
##  $ FFMC         : num  86.2 90.6 90.6 91.7 89.3 92.3 92.3 91.5 91 92.5 ...
##  $ DMC          : num  26.2 35.4 43.7 33.3 51.3 ...
##  $ DC           : num  94.3 669.1 686.9 77.5 102.2 ...
##  $ ISI          : num  5.1 6.7 6.7 9 9.6 14.7 8.5 10.7 7 7.1 ...
##  $ temp         : num  8.2 18 14.6 8.3 11.4 22.2 24.1 8 13.1 22.8 ...
##  $ RH           : int  51 33 33 97 99 29 27 86 63 40 ...
##  $ wind         : num  6.7 0.9 1.3 4 1.8 5.4 3.1 2.2 5.4 4 ...
##  $ rain         : num  0 0 0 0.2 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dayfri       : int  1 0 0 1 0 0 0 0 0 0 ...
##  $ daymon       : int  0 0 0 0 0 0 1 1 0 0 ...
##  $ daysat       : int  0 0 1 0 0 0 0 0 0 1 ...
##  $ daysun       : int  0 0 0 0 1 1 0 0 0 0 ...
##  $ daythu       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ daytue       : int  0 1 0 0 0 0 0 0 1 0 ...
##  $ daywed       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthapr     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthaug     : int  0 0 0 0 0 1 1 1 0 0 ...
##  $ monthdec     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthfeb     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthjan     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthjul     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthjun     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthmar     : int  1 0 0 1 1 0 0 0 0 0 ...
##  $ monthmay     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthnov     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ monthoct     : int  0 1 1 0 0 0 0 0 0 0 ...
##  $ monthsep     : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ size_category: Factor w/ 2 levels "large","small": 2 2 2 2 2 2 2 2 2 2 ...
# The area value has lots of zeros

hist(FF$area)
rug(FF$area)

# Transform the Area value to Y 

FF1 <- mutate(FF, y = log(area + 1))  # default is to the base e, y is lower case
hist(FF1$y)

summary(FF) # Confirms on the different scale and demands normalizing the data.
##      month      day          FFMC            DMC              DC       
##  aug    :184   fri:85   Min.   :18.70   Min.   :  1.1   Min.   :  7.9  
##  sep    :172   mon:74   1st Qu.:90.20   1st Qu.: 68.6   1st Qu.:437.7  
##  mar    : 54   sat:84   Median :91.60   Median :108.3   Median :664.2  
##  jul    : 32   sun:95   Mean   :90.64   Mean   :110.9   Mean   :547.9  
##  feb    : 20   thu:61   3rd Qu.:92.90   3rd Qu.:142.4   3rd Qu.:713.9  
##  jun    : 17   tue:64   Max.   :96.20   Max.   :291.3   Max.   :860.6  
##  (Other): 38   wed:54                                                  
##       ISI              temp             RH              wind      
##  Min.   : 0.000   Min.   : 2.20   Min.   : 15.00   Min.   :0.400  
##  1st Qu.: 6.500   1st Qu.:15.50   1st Qu.: 33.00   1st Qu.:2.700  
##  Median : 8.400   Median :19.30   Median : 42.00   Median :4.000  
##  Mean   : 9.022   Mean   :18.89   Mean   : 44.29   Mean   :4.018  
##  3rd Qu.:10.800   3rd Qu.:22.80   3rd Qu.: 53.00   3rd Qu.:4.900  
##  Max.   :56.100   Max.   :33.30   Max.   :100.00   Max.   :9.400  
##                                                                   
##       rain              area             dayfri           daymon      
##  Min.   :0.00000   Min.   :   0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:   0.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :   0.52   Median :0.0000   Median :0.0000  
##  Mean   :0.02166   Mean   :  12.85   Mean   :0.1644   Mean   :0.1431  
##  3rd Qu.:0.00000   3rd Qu.:   6.57   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :6.40000   Max.   :1090.84   Max.   :1.0000   Max.   :1.0000  
##                                                                       
##      daysat           daysun           daythu          daytue      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   :0.1625   Mean   :0.1838   Mean   :0.118   Mean   :0.1238  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :1.0000  
##                                                                    
##      daywed          monthapr          monthaug         monthdec      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.1044   Mean   :0.01741   Mean   :0.3559   Mean   :0.01741  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.00000  
##                                                                       
##     monthfeb          monthjan           monthjul         monthjun      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.0000   Median :0.00000  
##  Mean   :0.03868   Mean   :0.003868   Mean   :0.0619   Mean   :0.03288  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.0000   Max.   :1.00000  
##                                                                         
##     monthmar         monthmay           monthnov           monthoct      
##  Min.   :0.0000   Min.   :0.000000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000000   Median :0.000000   Median :0.00000  
##  Mean   :0.1044   Mean   :0.003868   Mean   :0.001934   Mean   :0.02901  
##  3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.000000   Max.   :1.000000   Max.   :1.00000  
##                                                                          
##     monthsep      size_category
##  Min.   :0.0000   large:139    
##  1st Qu.:0.0000   small:378    
##  Median :0.0000                
##  Mean   :0.3327                
##  3rd Qu.:1.0000                
##  Max.   :1.0000                
## 
# Prediction of Forest fires requires only prediction from 
# temperature, rain, relative humidity and wind speed

# Apply Normalization technique to the whole dataset :

normalize<-function(x){
  return ( (x-min(x))/(max(x)-min(x)))
}
FF$temp = normalize(FF$temp)
FF$RH   = normalize(FF$RH)
FF$wind = normalize(FF$wind)
FF$rain = normalize(FF$rain)
# We need to tweak this as a classification problem.lets base out the Size using this criteria :

attach(FF)
# Data Partition 
set.seed(123)
ind <- sample(2, nrow(FF), replace = TRUE, prob = c(0.7,0.3))
FF_train <- FF[ind==1,]
FF_test  <- FF[ind==2,]
# to train model
# e1071 package from LIBSVM library
# SVMlight algorithm klar package 

# kvsm() function uses gaussian RBF kernel 

# Building model 


model1<-ksvm(size_category~temp+rain+wind+RH, 
             data= FF_train,kernel = "vanilladot")
##  Setting default kernel parameters
model1
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 199 
## 
## Objective Function Value : -184 
## Training error : 0.247978
Area_pred <- predict(model1, FF_test)

table(Area_pred,FF_test$size_category)
##          
## Area_pred large small
##     large     0     0
##     small    47    99
agreement <- Area_pred == FF_test$size_category
table(agreement)
## agreement
## FALSE  TRUE 
##    47    99
prop.table(table(agreement))
## agreement
##     FALSE      TRUE 
## 0.3219178 0.6780822
# Different types of kernels 
# "rbfdot", "polydot", "tanhdot", "vanilladot", "laplacedot", 
# "besseldot", "anovadot", "splinedot", "matrix"

# kernel = rfdot 
model_rfdot<-ksvm(size_category~temp+rain+wind+RH, 
                  data= FF_train,kernel = "rbfdot")
pred_rfdot<-predict(model_rfdot,newdata=FF_test)
mean(pred_rfdot==FF_test$size_category) # 68.41
## [1] 0.6849315
# kernel = vanilladot
model_vanilla<-ksvm(size_category~temp+rain+wind+RH, 
                    data= FF_train,kernel = "vanilladot")
##  Setting default kernel parameters
pred_vanilla<-predict(model_vanilla,newdata=FF_test)
mean(pred_vanilla==FF_test$size_category) # 67.80
## [1] 0.6780822
# kernal = besseldot
model_besseldot<-ksvm(size_category~temp+rain+wind+RH, 
                      data= FF_train,kernel = "besseldot")
##  Setting default kernel parameters
pred_bessel<-predict(model_besseldot,newdata=FF_test)
mean(pred_bessel==FF_test$size_category) # 67.80
## [1] 0.6780822
# kernel = polydot

model_poly<-ksvm(size_category~temp+rain+wind+RH, 
                 data= FF_train,kernel = "polydot")
##  Setting default kernel parameters
pred_poly<-predict(model_poly,newdata = FF_test)
mean(pred_poly==FF_test$size_category) # 67.80
## [1] 0.6780822