12/6/2020

Import libraries

library(plotly)
library(ggplot2)
library(dplyr)
library(imputeTS)
library(tidyr)

Data

data("airquality")
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

Cleaning the data

## Count of missing values 
sapply(airquality, function(x) {sum(is.na(x))})
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0

Creating a datatime variable to plot a timeseries plot

year = rep(1973, nrow(airquality))
datetime = as.Date(strptime(paste0(year,"-",airquality$Month,
                                   "-",airquality$Day),
                    format = "%Y-%m-%d"))

Modifying data to plot multiline timeseries

data = airquality %>% 
  select(Ozone, Solar.R,Wind, Temp) %>%
  mutate(Ozone = na_interpolation(Ozone),
         Solar.R = na_interpolation(Solar.R)) %>%
  gather(param, level) %>% 
  mutate(time = rep(datetime,4))
head(data)
##   param level       time
## 1 Ozone    41 1973-05-01
## 2 Ozone    36 1973-05-02
## 3 Ozone    12 1973-05-03
## 4 Ozone    18 1973-05-04
## 5 Ozone    23 1973-05-05
## 6 Ozone    28 1973-05-06

Visualizing

Comparing Ozone, Solar.R, Wind and Temp levels through Q3 of 1973

plot_ly(data = data, x = ~time, y = ~level , color = ~param, 
        type = "scatter", mode = "lines")

Thank You