library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(GGally)

Import Data

Import Data

raw <- readLines("~/Downloads/Algerian_forest_fires_dataset_UPDATE.csv")

raw <- raw[raw != ""]
raw <- raw[!grepl("Region", raw)]
raw_text <- paste(raw, collapse = "\n")

data <- read.csv(text = raw_text, header = TRUE, stringsAsFactors = FALSE)

head(data)
##   day month year Temperature RH Ws Rain FFMC DMC   DC ISI BUI FWI     Classes
## 1  01    06 2012          29 57 18    0 65.7 3.4  7.6 1.3 3.4 0.5 not fire   
## 2  02    06 2012          29 61 13  1.3 64.4 4.1  7.6   1 3.9 0.4 not fire   
## 3  03    06 2012          26 82 22 13.1 47.1 2.5  7.1 0.3 2.7 0.1 not fire   
## 4  04    06 2012          25 89 13  2.5 28.6 1.3  6.9   0 1.7   0 not fire   
## 5  05    06 2012          27 77 16    0 64.8   3 14.2 1.2 3.9 0.5 not fire   
## 6  06    06 2012          31 67 14    0 82.6 5.8 22.2 3.1   7 2.5     fire
str(data)
## 'data.frame':    245 obs. of  14 variables:
##  $ day        : chr  "01" "02" "03" "04" ...
##  $ month      : chr  "06" "06" "06" "06" ...
##  $ year       : chr  "2012" "2012" "2012" "2012" ...
##  $ Temperature: chr  "29" "29" "26" "25" ...
##  $ RH         : chr  "57" "61" "82" "89" ...
##  $ Ws         : chr  "18" "13" "22" "13" ...
##  $ Rain       : chr  "0" "1.3" "13.1" "2.5" ...
##  $ FFMC       : chr  "65.7" "64.4" "47.1" "28.6" ...
##  $ DMC        : chr  "3.4" "4.1" "2.5" "1.3" ...
##  $ DC         : chr  "7.6" "7.6" "7.1" "6.9" ...
##  $ ISI        : chr  "1.3" "1" "0.3" "0" ...
##  $ BUI        : chr  "3.4" "3.9" "2.7" "1.7" ...
##  $ FWI        : chr  "0.5" "0.4" "0.1" "0" ...
##  $ Classes    : chr  "not fire   " "not fire   " "not fire   " "not fire   " ...

Persiapan Data PCA

library(dplyr)

data_num <- data %>%
  select(Temperature, RH, Ws, Rain, FFMC, DMC, DC, ISI, BUI, FWI) %>%
  mutate_all(as.numeric) %>%
  na.omit()
## Warning: There were 10 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Temperature = .Primitive("as.double")(Temperature)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
summary(data_num)
##   Temperature          RH              Ws             Rain       
##  Min.   :22.00   Min.   :21.00   Min.   : 6.00   Min.   : 0.000  
##  1st Qu.:30.00   1st Qu.:52.50   1st Qu.:14.00   1st Qu.: 0.000  
##  Median :32.00   Median :63.00   Median :15.00   Median : 0.000  
##  Mean   :32.15   Mean   :62.04   Mean   :15.49   Mean   : 0.763  
##  3rd Qu.:35.00   3rd Qu.:73.50   3rd Qu.:17.00   3rd Qu.: 0.500  
##  Max.   :42.00   Max.   :90.00   Max.   :29.00   Max.   :16.800  
##       FFMC            DMC              DC              ISI        
##  Min.   :28.60   Min.   : 0.70   Min.   :  6.90   Min.   : 0.000  
##  1st Qu.:71.85   1st Qu.: 5.80   1st Qu.: 12.35   1st Qu.: 1.400  
##  Median :83.30   Median :11.30   Median : 33.10   Median : 3.500  
##  Mean   :77.84   Mean   :14.68   Mean   : 49.43   Mean   : 4.742  
##  3rd Qu.:88.30   3rd Qu.:20.80   3rd Qu.: 69.10   3rd Qu.: 7.250  
##  Max.   :96.00   Max.   :65.90   Max.   :220.40   Max.   :19.000  
##       BUI             FWI        
##  Min.   : 1.10   Min.   : 0.000  
##  1st Qu.: 6.00   1st Qu.: 0.700  
##  Median :12.40   Median : 4.200  
##  Mean   :16.69   Mean   : 7.035  
##  3rd Qu.:22.65   3rd Qu.:11.450  
##  Max.   :68.00   Max.   :31.100

PCA

data_scaled <- scale(data_num)
pca <- prcomp(data_scaled)

summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3931 1.2583 0.95768 0.89248 0.62560 0.50021 0.47145
## Proportion of Variance 0.5727 0.1583 0.09171 0.07965 0.03914 0.02502 0.02223
## Cumulative Proportion  0.5727 0.7310 0.82273 0.90238 0.94152 0.96654 0.98877
##                            PC8     PC9    PC10
## Standard deviation     0.30385 0.12710 0.06202
## Proportion of Variance 0.00923 0.00162 0.00038
## Cumulative Proportion  0.99800 0.99962 1.00000

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.