John Watters
remove(list=ls())
library(rmarkdown)
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✔ ggplot2 3.4.0 ✔ fma 2.4
## ✔ forecast 8.19 ✔ expsmooth 2.3
##
library(ggplot2)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(rcompanion)
##
## Attaching package: 'rcompanion'
##
## The following object is masked from 'package:forecast':
##
## accuracy
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
library(reshape)
##
## Attaching package: 'reshape'
##
## The following object is masked from 'package:lubridate':
##
## stamp
##
## The following object is masked from 'package:dplyr':
##
## rename
##
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(gains)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(lattice)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tibble)
library(TSstudio)
# Importing the global CO2 emissions data:
emissions <- read.csv("E:/Cruise/API_EN.ATM.CO2E.KT_DS2_en_csv_v2_3888754.csv")
# Transposing the data:
emissions <- t(emissions)
#View(emissions)
#Making first row the header with janitor package:
emissions <- janitor::row_to_names(emissions, 1)
#View(emissions)
#Extracting World data column:
globalemissions <- as.data.frame( emissions[,'World'], drop=false)
str(globalemissions)
## 'data.frame': 62 obs. of 1 variable:
## $ emissions[, "World"]: chr "9463838.500" "9423934.4240" "9732505.8900" "1.027450e+07" ...
#View(globalemissions)
#Renaming column with base R function:
names(globalemissions)[1] <- "World"
#View(globalemissions)
dim(globalemissions)
## [1] 62 1
str(globalemissions)
## 'data.frame': 62 obs. of 1 variable:
## $ World: chr "9463838.500" "9423934.4240" "9732505.8900" "1.027450e+07" ...
#Creating a date column from labels with tibble:
head(globalemissions)
## World
## X1960 9463838.500
## X1961 9423934.4240
## X1962 9732505.8900
## X1963 1.027450e+07
## X1964 1.082025e+07
## X1965 1.140258e+07
globalemissions <- tibble::rownames_to_column(globalemissions, "Year")
head(globalemissions)
## Year World
## 1 X1960 9463838.500
## 2 X1961 9423934.4240
## 3 X1962 9732505.8900
## 4 X1963 1.027450e+07
## 5 X1964 1.082025e+07
## 6 X1965 1.140258e+07
#Removing the 'X' from Year column:
globalemissions$Year <- gsub("X", "", as.character(globalemissions$Year))
head(globalemissions)
## Year World
## 1 1960 9463838.500
## 2 1961 9423934.4240
## 3 1962 9732505.8900
## 4 1963 1.027450e+07
## 5 1964 1.082025e+07
## 6 1965 1.140258e+07
#Converting year into valid date with truncated dates with lubridate:
yrs <- c(1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970,
1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981,
1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992,
1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2019, 2020, 2021)
lubridate::ymd(yrs, truncated = 2L)
## [1] "1960-01-01" "1961-01-01" "1962-01-01" "1963-01-01" "1964-01-01"
## [6] "1965-01-01" "1966-01-01" "1967-01-01" "1968-01-01" "1969-01-01"
## [11] "1970-01-01" "1971-01-01" "1972-01-01" "1973-01-01" "1974-01-01"
## [16] "1975-01-01" "1976-01-01" "1977-01-01" "1978-01-01" "1979-01-01"
## [21] "1980-01-01" "1981-01-01" "1982-01-01" "1983-01-01" "1984-01-01"
## [26] "1985-01-01" "1986-01-01" "1987-01-01" "1988-01-01" "1989-01-01"
## [31] "1990-01-01" "1991-01-01" "1992-01-01" "1993-01-01" "1994-01-01"
## [36] "1995-01-01" "1996-01-01" "1997-01-01" "1998-01-01" "1999-01-01"
## [41] "2000-01-01" "2001-01-01" "2002-01-01" "2003-01-01" "2004-01-01"
## [46] "2005-01-01" "2006-01-01" "2007-01-01" "2008-01-01" "2009-01-01"
## [51] "2010-01-01" "2011-01-01" "2012-01-01" "2013-01-01" "2014-01-01"
## [56] "2015-01-01" "2016-01-01" "2017-01-01" "2018-01-01" "2019-01-01"
## [61] "2020-01-01" "2021-01-01"
globalemissions$Year <- as.Date(as.character(yrs), format = "%Y")
head(globalemissions)
## Year World
## 1 1960-02-16 9463838.500
## 2 1961-02-16 9423934.4240
## 3 1962-02-16 9732505.8900
## 4 1963-02-16 1.027450e+07
## 5 1964-02-16 1.082025e+07
## 6 1965-02-16 1.140258e+07
#View(globalemissions)
#Creating a vector and time series out of the global emissions data:
emissions.ts <- matrix(globalemissions$World, nrow = 59, ncol = 1)
## Warning in matrix(globalemissions$World, nrow = 59, ncol = 1): data length [62]
## is not a sub-multiple or multiple of the number of rows [59]
emissions.ts <- as.vector(t(emissions.ts))
emissions.ts
## [1] "9463838.500" "9423934.4240" "9732505.8900" "1.027450e+07"
## [5] "1.082025e+07" "1.140258e+07" "1.198832e+07" "1.241335e+07"
## [9] "13087346.5700" "1.397601e+07" "1.527229e+07" "15913710.780"
## [13] "16680140.790" "17582029.200" "17563629.930" "17641960.090"
## [17] "18580873.040" "19183226.420" "19921141.710" "20459807.600"
## [21] "20396427.120" "19879068.570" "19815965.250" "20034812.120"
## [25] "20636636.760" "21429383.160" "21729229.480" "22365639.630"
## [29] "23289432.350" "23699896.020" "20607832.060" "20799343.280"
## [33] "20741660.86" "20874105.03" "20983787.41" "21582843.03"
## [37] "22039447.41" "22394054.68" "22509779.84" "22615862.92"
## [41] "23311805.66" "23707262.47" "24054448.1" "25143943.9"
## [45] "26319528.2" "27325257.2" "28223539.1" "29312213.5"
## [49] "29574920.8" "29253757.5" "31031799.0" "32004027.9"
## [53] "32444191.6" "33053846.5" "33085192.0" "32943427.8"
## [57] "32940650.1" "33351608.1" "34041046.0"
#Emissions by year are NOT a time series, as they are a list of data points indexed in time order.
#It should be relatively simple to display these as a line chart and analyze for trends.
emissions.ts <- ts(emissions.ts, start = c(1960), end = c(2018), frequency=1)
emissions.ts
## Time Series:
## Start = 1960
## End = 2018
## Frequency = 1
## [1] 9463838.500 9423934.4240 9732505.8900 1.027450e+07 1.082025e+07
## [6] 1.140258e+07 1.198832e+07 1.241335e+07 13087346.5700 1.397601e+07
## [11] 1.527229e+07 15913710.780 16680140.790 17582029.200 17563629.930
## [16] 17641960.090 18580873.040 19183226.420 19921141.710 20459807.600
## [21] 20396427.120 19879068.570 19815965.250 20034812.120 20636636.760
## [26] 21429383.160 21729229.480 22365639.630 23289432.350 23699896.020
## [31] 20607832.060 20799343.280 20741660.86 20874105.03 20983787.41
## [36] 21582843.03 22039447.41 22394054.68 22509779.84 22615862.92
## [41] 23311805.66 23707262.47 24054448.1 25143943.9 26319528.2
## [46] 27325257.2 28223539.1 29312213.5 29574920.8 29253757.5
## [51] 31031799.0 32004027.9 32444191.6 33053846.5 33085192.0
## [56] 32943427.8 32940650.1 33351608.1 34041046.0
plot(ts(as.vector(emissions.ts)), type='o', xlab = 'Time (1960-2018)', ylab='Global CO2 Emissions (kt)')

mape <- function(actual,pred){
mape <- mean(abs((actual - pred)/actual))*100
return (mape)
}
#We can see a dip in emissions in 1990. This correlates to massive structural changes in
#Western and European economic shifts away from manufacturing and towards service as well
#as a shift to lower coal consumption and higher gas consumption.
#Importing passenger count from 1990-2021:
passengers <- read.csv("E:/Cruise/PassengerCount.csv")
head(passengers)
## Year Passengers.x1000.
## 1 1990 3774
## 2 1991 4168
## 3 1992 4385
## 4 1993 4800
## 5 1994 4970
## 6 1995 4721
#View(passengers)
#Transforming year into valid date column:
yr2 <- c(1990, 1991, 1992,
1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2019, 2020, 2021)
lubridate::ymd(yr2, truncated = 2L)
## [1] "1990-01-01" "1991-01-01" "1992-01-01" "1993-01-01" "1994-01-01"
## [6] "1995-01-01" "1996-01-01" "1997-01-01" "1998-01-01" "1999-01-01"
## [11] "2000-01-01" "2001-01-01" "2002-01-01" "2003-01-01" "2004-01-01"
## [16] "2005-01-01" "2006-01-01" "2007-01-01" "2008-01-01" "2009-01-01"
## [21] "2010-01-01" "2011-01-01" "2012-01-01" "2013-01-01" "2014-01-01"
## [26] "2015-01-01" "2016-01-01" "2017-01-01" "2018-01-01" "2019-01-01"
## [31] "2020-01-01" "2021-01-01"
passengers$ï..Year <- as.Date(as.character(yr2), format = "%Y")
head(passengers)
## Year Passengers.x1000. ï..Year
## 1 1990 3774 1990-02-16
## 2 1991 4168 1991-02-16
## 3 1992 4385 1992-02-16
## 4 1993 4800 1993-02-16
## 5 1994 4970 1994-02-16
## 6 1995 4721 1995-02-16
#View(passengers)
#Renaming columns:
names(passengers)[1] <- 'Year'
head(passengers)
## Year Passengers.x1000. ï..Year
## 1 1990 3774 1990-02-16
## 2 1991 4168 1991-02-16
## 3 1992 4385 1992-02-16
## 4 1993 4800 1993-02-16
## 5 1994 4970 1994-02-16
## 6 1995 4721 1995-02-16
names(globalemissions)[2] <- 'WorldCO'
#Multiplying passenger count by average carbon emissions per cruise:
#Average passengers emit 0.82 tonnes of CO2 equiv per cruise. Converted to
#KT is multiplied by .02.
passengers <- passengers %>% mutate(PassCO = ((Passengers.x1000.*1000)*.082)*.02)
#View(passengers)
#Plotting passenger emissions
plot
## function (x, y, ...)
## UseMethod("plot")
## <bytecode: 0x000001ccfec9c3d8>
## <environment: namespace:base>
plot(passengers$Year, y = passengers$PassCO, type = "o", col = "red", xlab = "Year", ylab = "Passenger Emissions (kt)")

#Trimming year and adding dataframe columns for modeling:
globalemissions1990 <- globalemissions[-c(1:30),]
#View(globalemissions1990)
passco <- cbind(globalemissions1990, passengers)
passco <- passco[,-3]
passco <- passco[-c(30:32),]
#Plotting the two variables:
plot(passco$Year, y = passco$WorldCO, type = "o", col = "red", xlab = "Year", ylab = "World Emissions (kt)")

plot(passco$Year, y = passco$PassCO, type = "o", col = "red", xlab = "Year", ylab = "Passenger Emissions (Kt)")

#Subsetting and modeling for passenger emissions:
glimpse(passco)
## Rows: 29
## Columns: 5
## $ Year <date> 1990-02-16, 1991-02-16, 1992-02-16, 1993-02-16, 199…
## $ WorldCO <chr> "20607832.060", "20799343.280", "20741660.86", "2087…
## $ Passengers.x1000. <int> 3774, 4168, 4385, 4800, 4970, 4721, 5380, 5868, 5995…
## $ ï..Year <date> 1990-02-16, 1991-02-16, 1992-02-16, 1993-02-16, 199…
## $ PassCO <dbl> 6189.36, 6835.52, 7191.40, 7872.00, 8150.80, 7742.44…
n = nrow(passco)
split = sample(c(TRUE, FALSE), n, replace = TRUE, prob=c(0.75, 0.25))
dat_train = passco[split, ]
dat_test = passco[!split, ]
nrow(dat_train); nrow(dat_test)
## [1] 23
## [1] 6
#Creating a vector and time series out of the recent passenger emissions data:
dat_ts <- ts(dat_train[, 4], start = c(1990), end = c(2018), frequency = 1)
#MAPE function:
mape <- function(actual,pred){
mape <- mean(abs((actual - pred)/actual))*100
return (mape)
}
#Creating alternate TS objects for global and passenger emissions:
emissions.ts2 <- matrix(passco$WorldCO, nrow = 29, ncol = 1)
emissions.ts2 <- as.vector(t(emissions.ts2))
emissions.ts2
## [1] "20607832.060" "20799343.280" "20741660.86" "20874105.03" "20983787.41"
## [6] "21582843.03" "22039447.41" "22394054.68" "22509779.84" "22615862.92"
## [11] "23311805.66" "23707262.47" "24054448.1" "25143943.9" "26319528.2"
## [16] "27325257.2" "28223539.1" "29312213.5" "29574920.8" "29253757.5"
## [21] "31031799.0" "32004027.9" "32444191.6" "33053846.5" "33085192.0"
## [26] "32943427.8" "32940650.1" "33351608.1" "34041046.0"
emissions.ts3 <- matrix(passco$PassCO, nrow = 29, ncol = 1)
emissions.ts3 <- as.vector(t(emissions.ts3))
emissions.ts3
## [1] 6189.36 6835.52 7191.40 7872.00 8150.80 7742.44 8823.20 9623.52
## [9] 9831.80 9856.40 10392.68 11830.96 12298.36 14177.80 15622.64 17154.40
## [17] 18335.20 19689.84 23985.00 25877.56 28234.24 30210.44 31778.28 33349.40
## [25] 34400.64 36995.12 39651.92 41291.92 43466.56
#Emissions by year are NOT a time series, as they are a list of data points indexed in time order.
#It should be relatively simple to display these as a line chart and analyze for trends.
emissions.ts2 <- ts(emissions.ts2,frequency=1)
emissions.ts2
## Time Series:
## Start = 1
## End = 29
## Frequency = 1
## [1] 20607832.060 20799343.280 20741660.86 20874105.03 20983787.41
## [6] 21582843.03 22039447.41 22394054.68 22509779.84 22615862.92
## [11] 23311805.66 23707262.47 24054448.1 25143943.9 26319528.2
## [16] 27325257.2 28223539.1 29312213.5 29574920.8 29253757.5
## [21] 31031799.0 32004027.9 32444191.6 33053846.5 33085192.0
## [26] 32943427.8 32940650.1 33351608.1 34041046.0
emissions.ts3 <- ts(emissions.ts3,frequency=1)
emissions.ts3
## Time Series:
## Start = 1
## End = 29
## Frequency = 1
## [1] 6189.36 6835.52 7191.40 7872.00 8150.80 7742.44 8823.20 9623.52
## [9] 9831.80 9856.40 10392.68 11830.96 12298.36 14177.80 15622.64 17154.40
## [17] 18335.20 19689.84 23985.00 25877.56 28234.24 30210.44 31778.28 33349.40
## [25] 34400.64 36995.12 39651.92 41291.92 43466.56
#Plotting the time series for emissions:
x = ts(rnorm(29), frequency = 1, start = c(1990, 1))
y = ts(rnorm(29), frequency = 1, start = c(1990, 1))
ts.plot(emissions.ts2, emissions.ts3, gpars = list(col = c("black", "red")))

#Ab lines for simple ts:
plot(emissions.ts2)
abline(reg1<-lm(emissions.ts2~time(emissions.ts2)), col="red")

#abline(reg=lm(emissions.ts2~time(emissions.ts2)), col="red")
coef(reg1)
## (Intercept) time(emissions.ts2)
## 18330552.3 562494.6
#Slope:
coef(reg1)[2]
## time(emissions.ts2)
## 562494.6
model1 <- lm(emissions.ts2~log(x))
## Warning in log(x): NaNs produced
summary(model1)
##
## Call:
## lm(formula = emissions.ts2 ~ log(x))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6344584 -3486693 -936222 4618708 5922434
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27403416 2150405 12.743 1.35e-06 ***
## log(x) 899477 1575730 0.571 0.584
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4913000 on 8 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.03914, Adjusted R-squared: -0.08097
## F-statistic: 0.3258 on 1 and 8 DF, p-value: 0.5838
plot(model1)




plot(emissions.ts3)
abline(reg2<-lm(emissions.ts3~time(emissions.ts3)), col="red")

#abline(reg=lm(emissions.ts3~time(emissions.ts3)), col="red")
coef(reg2)
## (Intercept) time(emissions.ts3)
## -886.0403 1371.3898
#Slope:
#coef(reg)[2]
#Using forecasting for passenger emissions (non-COVID years):
holt_model <- holt(dat_ts)
summary(holt_model)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = dat_ts)
##
## Smoothing parameters:
## alpha = 0.8905
## beta = 1e-04
##
## Initial states:
## l = 8118.7961
## b = 67.628
##
## sigma: 2026.368
##
## AIC AICc BIC
## 544.9594 547.5681 551.7959
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -22.53991 1881.436 743.3498 -2.057968 8.325619 0.949751
## ACF1
## Training set 0.003471886
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2019 9573.011 6976.115 12169.91 5601.4018 13544.62
## 2020 9640.573 6163.021 13118.13 4322.1155 14959.03
## 2021 9708.136 5531.525 13884.75 3320.5606 16095.71
## 2022 9775.698 5001.184 14550.21 2473.7087 17077.69
## 2023 9843.261 4537.689 15148.83 1729.0882 17957.43
## 2024 9910.824 4122.611 15699.04 1058.5156 18763.13
## 2025 9978.386 3744.694 16212.08 444.7759 19512.00
## 2026 10045.949 3396.465 16695.43 -123.5598 20215.46
## 2027 10113.512 3072.662 17154.36 -654.5385 20881.56
## 2028 10181.074 2769.416 17592.73 -1154.0794 21516.23
plot(holt_model)

#Forecasting for this particular dataset is not particularly insightful.
#Exporting csv for dashboarding:
#write.csv(passco,"E:/Cruise/WorldPassEmissions.csv", row.names = FALSE)
#possible exploration for:
#Liquid fuel consumption emissions
#Emissions per person
#Emissions compared to LNG