library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(GGally)
raw <- readLines("~/Downloads/Algerian_forest_fires_dataset_UPDATE.csv")
raw <- raw[raw != ""]
raw <- raw[!grepl("Region", raw)]
raw_text <- paste(raw, collapse = "\n")
data <- read.csv(text = raw_text, header = TRUE, stringsAsFactors = FALSE)
head(data)
## day month year Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI Classes
## 1 01 06 2012 29 57 18 0 65.7 3.4 7.6 1.3 3.4 0.5 not fire
## 2 02 06 2012 29 61 13 1.3 64.4 4.1 7.6 1 3.9 0.4 not fire
## 3 03 06 2012 26 82 22 13.1 47.1 2.5 7.1 0.3 2.7 0.1 not fire
## 4 04 06 2012 25 89 13 2.5 28.6 1.3 6.9 0 1.7 0 not fire
## 5 05 06 2012 27 77 16 0 64.8 3 14.2 1.2 3.9 0.5 not fire
## 6 06 06 2012 31 67 14 0 82.6 5.8 22.2 3.1 7 2.5 fire
str(data)
## 'data.frame': 245 obs. of 14 variables:
## $ day : chr "01" "02" "03" "04" ...
## $ month : chr "06" "06" "06" "06" ...
## $ year : chr "2012" "2012" "2012" "2012" ...
## $ Temperature: chr "29" "29" "26" "25" ...
## $ RH : chr "57" "61" "82" "89" ...
## $ Ws : chr "18" "13" "22" "13" ...
## $ Rain : chr "0" "1.3" "13.1" "2.5" ...
## $ FFMC : chr "65.7" "64.4" "47.1" "28.6" ...
## $ DMC : chr "3.4" "4.1" "2.5" "1.3" ...
## $ DC : chr "7.6" "7.6" "7.1" "6.9" ...
## $ ISI : chr "1.3" "1" "0.3" "0" ...
## $ BUI : chr "3.4" "3.9" "2.7" "1.7" ...
## $ FWI : chr "0.5" "0.4" "0.1" "0" ...
## $ Classes : chr "not fire " "not fire " "not fire " "not fire " ...
library(dplyr)
data_num <- data %>%
select(Temperature, RH, Ws, Rain, FFMC, DMC, DC, ISI, BUI, FWI) %>%
mutate_all(as.numeric) %>%
na.omit()
## Warning: There were 10 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Temperature = .Primitive("as.double")(Temperature)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
summary(data_num)
## Temperature RH Ws Rain
## Min. :22.00 Min. :21.00 Min. : 6.00 Min. : 0.000
## 1st Qu.:30.00 1st Qu.:52.50 1st Qu.:14.00 1st Qu.: 0.000
## Median :32.00 Median :63.00 Median :15.00 Median : 0.000
## Mean :32.15 Mean :62.04 Mean :15.49 Mean : 0.763
## 3rd Qu.:35.00 3rd Qu.:73.50 3rd Qu.:17.00 3rd Qu.: 0.500
## Max. :42.00 Max. :90.00 Max. :29.00 Max. :16.800
## FFMC DMC DC ISI
## Min. :28.60 Min. : 0.70 Min. : 6.90 Min. : 0.000
## 1st Qu.:71.85 1st Qu.: 5.80 1st Qu.: 12.35 1st Qu.: 1.400
## Median :83.30 Median :11.30 Median : 33.10 Median : 3.500
## Mean :77.84 Mean :14.68 Mean : 49.43 Mean : 4.742
## 3rd Qu.:88.30 3rd Qu.:20.80 3rd Qu.: 69.10 3rd Qu.: 7.250
## Max. :96.00 Max. :65.90 Max. :220.40 Max. :19.000
## BUI FWI
## Min. : 1.10 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.: 0.700
## Median :12.40 Median : 4.200
## Mean :16.69 Mean : 7.035
## 3rd Qu.:22.65 3rd Qu.:11.450
## Max. :68.00 Max. :31.100
data_scaled <- scale(data_num)
pca <- prcomp(data_scaled)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3931 1.2583 0.95768 0.89248 0.62560 0.50021 0.47145
## Proportion of Variance 0.5727 0.1583 0.09171 0.07965 0.03914 0.02502 0.02223
## Cumulative Proportion 0.5727 0.7310 0.82273 0.90238 0.94152 0.96654 0.98877
## PC8 PC9 PC10
## Standard deviation 0.30385 0.12710 0.06202
## Proportion of Variance 0.00923 0.00162 0.00038
## Cumulative Proportion 0.99800 0.99962 1.00000
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.