# Loading packages & importing dataset
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)
forest_fires <- read_csv("forestfires.csv")
## Parsed with column specification:
## cols(
## X = col_double(),
## Y = col_double(),
## month = col_character(),
## day = col_character(),
## FFMC = col_double(),
## DMC = col_double(),
## DC = col_double(),
## ISI = col_double(),
## temp = col_double(),
## RH = col_double(),
## wind = col_double(),
## rain = col_double(),
## area = col_double()
## )
View(forest_fires) # Looking at the entire dataset, forestfires.csv
# X: X-axis spatial coordinate within the Montesinho park map: 1 to 9
# Y: Y-axis spatial coordinate within the Montesinho park map: 2 to 9
# month: Month of the year: 'jan' to 'dec'
# day: Day of the week: 'mon' to 'sun'
# FFMC: Fine Fuel Moisture Code index from the FWI system: 18.7 to 96.20
# DMC: Duff Moisture Code index from the FWI system: 1.1 to 291.3
# DC: Drought Code index from the FWI system: 7.9 to 860.6
# ISI: Initial Spread Index from the FWI system: 0.0 to 56.10
# temp: Temperature in Celsius degrees: 2.2 to 33.30
# RH: Relative humidity in percentage: 15.0 to 100
# wind: Wind speed in km/h: 0.40 to 9.40
# rain: Outside rain in mm/m2 : 0.0 to 6.4
# area: The burned area of the forest (in ha): 0.00 to 1090.84
During which months are forest fires most common?
fires_by_month <- forest_fires %>%
group_by(month) %>%
summarize(total_fires = n())
## `summarise()` ungrouping output (override with `.groups` argument)
# applying n() function counting the number of observations in each group
# save the resulting summary as a new data frame
ggplot(data = fires_by_month,
aes(x = month, y = total_fires)) +
geom_bar(stat = "identity") +
theme(panel.background = element_rect(fill = "white")
)

# use geom_bar to create bar chart
On which days of the week are forest fires most common?
fires_by_day <- forest_fires %>%
group_by(day) %>%
summarize(total_fires = n())
## `summarise()` ungrouping output (override with `.groups` argument)
# applying n() function counting the number of observations in each group
ggplot(data = fires_by_day,
aes(x = day, y = total_fires)) +
geom_bar(stat = "identity") +
theme(panel.background = element_rect(fill = "white")
)

specify a certain order for values of variables by changing their data type to factor
# To change the data type of month to factor and specify the order of months
# write this code
forest_fires <- forest_fires %>%
mutate(
month = factor(
month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")),
day = factor(day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat")))
observing the new plots, forest fires are most common in august and sept
forest fires most common on weekends (friday, saturday, sunday). Sunday most
# Create box plots to visualize the distribution of the following variables by month and by day of the week
create_boxplots <- function(x, y) {
ggplot(data = forest_fires) +
aes_string(x = x, y = y) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white"))
}
# Assign x and y names
x_var_month <- names(forest_fires)[3] # month
x_var_day <- names(forest_fires)[4] # day
y_var <- names(forest_fires)[5:12]
## use the map() function to apply the function to the variables of interest
month_box <- map2(x_var_month, y_var, create_boxplots) ## visualize variables by month
day_box <- map2(x_var_day, y_var, create_boxplots) ## visualize variables by day
month_box
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

day_box
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

Create scatter plots to visualize the distribution of the following variables by month and by day of the week
create_scatter <- function(x, y) {
ggplot(data = forest_fires) +
aes_string(x = x, y = y) +
geom_point() +
theme(panel.background = element_rect(fill = "white"))
}
# Assign x and y names
x_var <- names(forest_fires)[5:12] # month
y_var <- names(forest_fires)[13]
## use the map() function to apply the function to the variables of interest
scatter_plot <- map2(x_var, y_var, create_scatter)
scatter_plot
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]
