# Loading packages & importing dataset
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)

forest_fires <- read_csv("forestfires.csv")
## Parsed with column specification:
## cols(
##   X = col_double(),
##   Y = col_double(),
##   month = col_character(),
##   day = col_character(),
##   FFMC = col_double(),
##   DMC = col_double(),
##   DC = col_double(),
##   ISI = col_double(),
##   temp = col_double(),
##   RH = col_double(),
##   wind = col_double(),
##   rain = col_double(),
##   area = col_double()
## )
View(forest_fires) # Looking at the entire dataset, forestfires.csv
# X: X-axis spatial coordinate within the Montesinho park map: 1 to 9
# Y: Y-axis spatial coordinate within the Montesinho park map: 2 to 9
# month: Month of the year: 'jan' to 'dec'
# day: Day of the week: 'mon' to 'sun'
# FFMC: Fine Fuel Moisture Code index from the FWI system: 18.7 to 96.20
# DMC: Duff Moisture Code index from the FWI system: 1.1 to 291.3
# DC: Drought Code index from the FWI system: 7.9 to 860.6
# ISI: Initial Spread Index from the FWI system: 0.0 to 56.10
# temp: Temperature in Celsius degrees: 2.2 to 33.30
# RH: Relative humidity in percentage: 15.0 to 100
# wind: Wind speed in km/h: 0.40 to 9.40
# rain: Outside rain in mm/m2 : 0.0 to 6.4
# area: The burned area of the forest (in ha): 0.00 to 1090.84

During which months are forest fires most common?

fires_by_month <- forest_fires %>%
  group_by(month) %>%
  summarize(total_fires = n())
## `summarise()` ungrouping output (override with `.groups` argument)
# applying n() function counting the number of observations in each group
# save the resulting summary as a new data frame
  
ggplot(data = fires_by_month,
       aes(x = month, y = total_fires)) +
  geom_bar(stat = "identity")  + 
  theme(panel.background = element_rect(fill = "white")
  )

# use geom_bar to create bar chart

On which days of the week are forest fires most common?

fires_by_day <- forest_fires %>%
  group_by(day) %>%
  summarize(total_fires = n()) 
## `summarise()` ungrouping output (override with `.groups` argument)
# applying n() function counting the number of observations in each group
                        
ggplot(data = fires_by_day,
       aes(x = day, y = total_fires)) +
  geom_bar(stat = "identity")  +
  theme(panel.background = element_rect(fill = "white")
  )

specify a certain order for values of variables by changing their data type to factor

# To change the data type of month to factor and specify the order of months
# write this code
forest_fires <- forest_fires %>%
  mutate(
    month = factor(
      month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")),
      day = factor(day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat")))

observing the new plots, forest fires are most common in august and sept

forest fires most common on weekends (friday, saturday, sunday). Sunday most

# Create box plots to visualize the distribution of the following variables by month and by day of the week

create_boxplots <- function(x, y) {
  ggplot(data = forest_fires) + 
    aes_string(x = x, y = y) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white"))
}
# Assign x and y names
x_var_month <- names(forest_fires)[3] # month
x_var_day <- names(forest_fires)[4] # day
y_var <- names(forest_fires)[5:12]

## use the map() function to apply the function to the variables of interest
month_box <- map2(x_var_month, y_var, create_boxplots) ## visualize variables by month
day_box <- map2(x_var_day, y_var, create_boxplots) ## visualize variables by day

month_box
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

day_box
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

Create scatter plots to visualize the distribution of the following variables by month and by day of the week

create_scatter <- function(x, y) {
  ggplot(data = forest_fires) + 
    aes_string(x = x, y = y) +
    geom_point() +
    theme(panel.background = element_rect(fill = "white"))
}
# Assign x and y names
x_var <- names(forest_fires)[5:12] # month
y_var <- names(forest_fires)[13]

## use the map() function to apply the function to the variables of interest
scatter_plot <- map2(x_var, y_var, create_scatter)

scatter_plot
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]