#Time Series and Forecasting ____
#1) Select an interesting dataset of your
#choice that will allow you to permit time series forecasting ___
#loading Bejing air polluntion dataset and storing in the data2 variable.
data2 <- read.csv("PRSA.csv")
View(data2)
#2) Clean and transform the data into a more convenient form, if needed. ____
any(is.na(data2))
## [1] TRUE
summary(data2)
## No year month day
## Min. : 1 Min. :2013 Min. : 1.000 Min. : 1.00
## 1st Qu.: 8767 1st Qu.:2014 1st Qu.: 4.000 1st Qu.: 8.00
## Median :17532 Median :2015 Median : 7.000 Median :16.00
## Mean :17532 Mean :2015 Mean : 6.523 Mean :15.73
## 3rd Qu.:26298 3rd Qu.:2016 3rd Qu.:10.000 3rd Qu.:23.00
## Max. :35064 Max. :2017 Max. :12.000 Max. :31.00
##
## hour PM2.5 PM10 SO2
## Min. : 0.00 Min. : 3.00 Min. : 2.0 Min. : 0.2856
## 1st Qu.: 5.75 1st Qu.: 22.00 1st Qu.: 38.0 1st Qu.: 3.0000
## Median :11.50 Median : 58.00 Median : 87.0 Median : 9.0000
## Mean :11.50 Mean : 82.77 Mean :110.1 Mean : 17.3759
## 3rd Qu.:17.25 3rd Qu.:114.00 3rd Qu.:155.0 3rd Qu.: 21.0000
## Max. :23.00 Max. :898.00 Max. :984.0 Max. :341.0000
## NA's :925 NA's :718 NA's :935
## NO2 CO O3 TEMP
## Min. : 2.00 Min. : 100 Min. : 0.2142 Min. :-16.80
## 1st Qu.: 30.00 1st Qu.: 500 1st Qu.: 8.0000 1st Qu.: 3.10
## Median : 53.00 Median : 900 Median : 42.0000 Median : 14.50
## Mean : 59.31 Mean : 1263 Mean : 56.3534 Mean : 13.58
## 3rd Qu.: 82.00 3rd Qu.: 1500 3rd Qu.: 82.0000 3rd Qu.: 23.30
## Max. :290.00 Max. :10000 Max. :423.0000 Max. : 40.50
## NA's :1023 NA's :1776 NA's :1719 NA's :20
## PRES DEWP RAIN wd
## Min. : 985.9 Min. :-35.300 Min. : 0.00000 NE : 5140
## 1st Qu.:1003.3 1st Qu.: -8.100 1st Qu.: 0.00000 ENE : 3950
## Median :1011.4 Median : 3.800 Median : 0.00000 SW : 3359
## Mean :1011.8 Mean : 3.123 Mean : 0.06742 E : 2608
## 3rd Qu.:1020.1 3rd Qu.: 15.600 3rd Qu.: 0.00000 NNE : 2445
## Max. :1042.0 Max. : 28.500 Max. :72.50000 (Other):17481
## NA's :20 NA's :20 NA's :20 NA's : 81
## WSPM station
## Min. : 0.000 Aotizhongxin:35064
## 1st Qu.: 0.900
## Median : 1.400
## Mean : 1.708
## 3rd Qu.: 2.200
## Max. :11.200
## NA's :14
newdata2 <- na.omit(data2)
#3) Describe the problem statement as it relates to
#using time series analysis approach for prediction
#Which year has the maximum CO2 level in the environment?
#4) Perform exploratory time series analysis by producing a time plot
#of the data. Describe the time plot. ____
#5) To better assess the trends, remove any seasonal effects by
#aggregation and describe the plot obtained. ____
newdata2.month <- ts(newdata2$CO, start = c(2013,3), end = c(2017,2), frequency =12)
newdata2.annual <- aggregate(newdata2.month, FUN = mean)
#After taking frequency manthly we can that CO2 level has been increased from 2014 till 2017
#After removing seasonal effects,we can see in the year 2015 there is a huge inclined in CO,
#from the annual plot.
plot(newdata2.month, xlab="year", ylab = "CO2")

plot(newdata2.annual, xlab="year", ylab = "CO2")

#6) Obtain boxplots summarizing the observed values and explain your results ____
#We can see that the highest median is higher than the other months, we can also see that
#March, April, May, June and July has the lower median in CO level.
boxplot(newdata2.month~ cycle(newdata2.month))

#7) Decompose the series into a components trend, seasonal effect and residuals.
#Plot the decomposed series.Produce a plot of the trend with a superimposed seasonal effect. ____
newdata2.decom <- decompose(newdata2.month,type = "multiplicative")
plot(newdata2.decom)

seasonal <- newdata2.decom$seasonal
trend <- newdata2.decom$trend
ts.plot(cbind(seasonal,trend * seasonal),lty=1:2)

#8) Plot the correlogram of the residuals from step 7.
#Describe the plot and explain any “significant” correlations at significant lags. ____
#There is no significant correlations at significant lags as we can see from the plot.
plot(ts(newdata2.decom$random))

acf(newdata2.decom$random, na.action = na.pass)

#9) If patterns change over a time period, use appropriate methods (moving average,
#exponential smoothing, Holt-Winters method)to obtain an appropriately fitted model for
#forecasting. ____
#Holt-Winters method generalizes the simple exponential smoothing
#equation and takes into account seasonal effects and trends, hence we are using this methods
#as we have both trend and seasonal effects.
newdata2.HW <- HoltWinters(newdata2.month, seasonal = "multiplicative")
newdata2.HW
## Holt-Winters exponential smoothing with trend and multiplicative seasonal component.
##
## Call:
## HoltWinters(x = newdata2.month, seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha: 0.3915301
## beta : 0.05345697
## gamma: 0.9280682
##
## Coefficients:
## [,1]
## a 1301.4001063
## b 30.8431014
## s1 1.0450765
## s2 0.7749670
## s3 0.7745522
## s4 0.7881087
## s5 0.9775579
## s6 1.0662227
## s7 0.9933308
## s8 1.1190524
## s9 1.1775109
## s10 1.2172574
## s11 1.2183904
## s12 1.3013351
plot(newdata2.HW)

#10) Using the fitted model, forecast values for a time period of your choice.
#Assess the validity of these forecasts. ____
# can see from the plot that its is showing increase in the CO level, due to more pollution.
newdata2.pred <- predict(newdata2.HW,n.ahead = 4*12)
newdata2.pred
## Jan Feb Mar Apr May Jun Jul
## 2017 1392.296 1056.347 1079.671 1122.876 1422.949
## 2018 1998.982 2175.204 1779.097 1343.176 1366.346 1414.568 1784.760
## 2019 2449.929 2656.851 2165.898 1630.004 1653.021 1706.261 2146.571
## 2020 2900.876 3138.497 2552.699 1916.833 1939.696 1997.953 2508.382
## 2021 3351.824 3620.144
## Aug Sep Oct Nov Dec
## 2017 1584.896 1507.183 1732.455 1859.276 1959.579
## 2018 1979.523 1874.831 2146.636 2295.093 2410.107
## 2019 2374.151 2242.480 2560.817 2730.910 2860.635
## 2020 2768.778 2610.129 2974.997 3166.727 3311.163
## 2021
ts.plot(newdata2.month,newdata2.pred,lty=1:2)

#11) What would your recommendations be based on the forecasts observed? ____
#This forecast showing increase in CO level.The HoltWinters methods shows increase in trends and fitted almost.
#which will lead to increase in the global warming from this we can understand that
#the use of sustainable practice is so much important in order to protect our climate.