Resources

Workshop Script Files

Day 1 - R Basics

# Part One - R is a calculator ----
1 + 1

sin(1+2*tan(34))

x <- 2
x*2

y <- 23
x*y

x <- 4

# Part Two - Vectors ----
x <- c(1,4,32,3)
x

x <- 1:40
x

x*2

x*x

y <- 1:21
x*y

# Boolean Logic
x # The numbers from 1 to 40
x < 10 # Still has 40 elements
(x < 10)*1
sum(x < 10)

x >= 10
x <= 10
x == 10 # is exactly equal to
# x = 10 is the same as x <- 10
x != 10

x == 10 | x > 30 # or statement
x > 10 & x < 30 # and statement

# Functions of Vectors
mean(x)
sd(x)
var(x)
range(x)
median(x)

# percentage of values less than 10
mean(x < 10)

# Subsets
x[33] # the 33rd value in x
x[40]

x[x < 10]

x[0] # there is no value here
x[-1] # removes the first element
x[42] # Not available

# weird quirks
x[0]^0
x[42]^0

# Dealing with NA's
y <- c(1,4,2,3,NA)
y

y^2
mean(y) # missing number, can't find the mean
mean(y, na.rm = TRUE)

# checking the help files
?mean
mean(x = y, trim = 0, na.rm = TRUE)
# R matches arguments by order
mean(y, 0, TRUE)

sd(y)
?sd
sd(y, TRUE)

# Alternate method
mean(y)
is.na(y)
!is.na(y)
y[!is.na(y)]
mean(y[!is.na(y)])




# Part Three - Plotting ----
dave <- seq(from = 2, to = 17, by = 0.5)
elon <- 2*dave^2 - 4*dave + rnorm(length(dave), 0, 1)

plot(x = dave, y = elon)

# Making pretty plots
plot(dave, elon,
     col = 2, # change the colour
     pch = 14, # change Plotting CHaracter
     main = "Dave Versus Elon",
     xlab = "Dave", ylab = "Elon",
     xlim = c(0, 20), # x limits from 0 to 20 
     las = 1) # rotate y values

# Plot types
plot(dave, elon, type = 'l') # line chart
plot(dave, elon, type = 'b') # both points and lines
plot(dave, elon, type = 'h') # histogram lines
plot(dave, elon, type = 's') # histogram steps

plot(dave, elon, type = 'n') # nothing
lines(dave, elon, type = 'h')
lines(dave, elon, type = 's')
points(dave, elon, pch = "?")
points(elon ~ dave)

# Formula notation y ~ x
plot(elon ~ dave)

# subsetting plots - must do both!
plot(x = dave[dave < 10],
     y = elon[dave < 10])



# Part Four - Dataframes
ed <- data.frame(Dave=dave, Elon = elon)
ed

# Still subset with square brackets
# now more dimensions!
ed[1,1] # first row, first column
ed[15, 2] # row 15, column 2
ed[14, 3] # same as integer(0)

ed[1, c(1,2)] # first row, both columns
ed[1:4, 1:2] # first four rows, first 2 columns
ed[1:4,] # all of the columns
ed[,1] # all of the rows

plot(ed)

# usually, you want the columns
ed$Elon
ed$Elon[ed$Dave < 10]

# More subset fun
subset(ed, Dave < 10)$Elon
with(ed, Elon[Dave < 10]) # saves me from dollar signs

plot(Elon ~ Dave, data = ed)
plot(Elon ~ Dave, data = subset(ed, Dave < 10))

# Other plots
hist(dave)
plot(density(dave))
boxplot(dave)

Cathy <- c(1,1,2,1,2,2,1,2,2,1,2,2,1,2)
table(Cathy)
barplot(table(Cathy))

# Doesn't work:
barplot(Cathy)

Day 1 - mtcars examples

# Analysis of mtcars data
data(mtcars)
?mtcars

str(mtcars)

# Do transmission type and displacement affect mpg?

# scatterplot of mpg versus displacement
# Clunky way to do it
plot(x=mtcars$disp, y=mtcars$mpg)
plot(mpg ~ disp, data = mtcars)

plot(mpg ~ disp, data = subset(mtcars, am == 0),
     xlim = c(50,500), ylim = c(0,40))
# add manual transmissions as red points
points(mpg ~ disp, data = subset(mtcars, am == 1),
       col = "hotpink")

plot(mpg ~ disp, col = am + 1, data = mtcars,
     pch = 16)
plot(mpg ~ disp, col = factor(am), data=mtcars)

# Create a boxplot of mpg
# Create a boxplot of mpg for automatic only
# Create a boxplot of mpg for manual only
# (and give them labels and stuff)
boxplot(mtcars$mpg)
boxplot(mtcars$mpg[mtcars$am == 1])
boxplot(mpg ~ am, data = mtcars,
        xlab = "Transmissopm", ylab = "mpg",
        names = c("Auto", "Man"))

# Recall the scatterplot
plot(mpg ~ disp, col = factor(am), data=mtcars)

# Fitting a linear model
mpg_both <- lm(mpg ~ disp, data = mtcars)
mpg_both

abline(a = 29.6, b = -0.041)
# A better way
abline(mpg_both, lwd = 2) # thcker line

plot(mpg_both$residuals ~ mpg_both$fitted.values)
abline(h = 0)

# Looking for significance
summary(mpg_both)

mpg_auto <- lm(mpg ~ disp, data = subset(mtcars, am == 0))
summary(mpg_auto)
mpg_man <- lm(mpg ~ disp, data = subset(mtcars, am == 1))
summary(mpg_man)

plot(mpg ~ disp, col = factor(am), data=mtcars)
abline(mpg_auto)
abline(mpg_man, col = 2)

# Multiple regression
# This changes the intercept but not the slope!!!
mpg_am <- lm(mpg ~ disp + am, data = mtcars)
summary(mpg_am)

mpg_full <- lm(mpg ~ disp * factor(am), data = mtcars)
summary(mpg_full)

Day 2 - Forecasting

# Fitting a basic time series model
# Using base packages as well as extra packages

data(co2)
?co2
## starting httpd help server ...
##  done
# for mtcars we did plot(mpg~disp, data=mtcars)
co2
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 1959 315.42 316.31 316.50 317.56 318.13 318.00 316.39 314.65 313.68 313.18
## 1960 316.27 316.81 317.42 318.87 319.87 319.43 318.01 315.74 314.00 313.68
## 1961 316.73 317.54 318.38 319.31 320.42 319.61 318.42 316.63 314.83 315.16
## 1962 317.78 318.40 319.53 320.42 320.85 320.45 319.45 317.25 316.11 315.27
## 1963 318.58 318.92 319.70 321.22 322.08 321.31 319.58 317.61 316.05 315.83
## 1964 319.41 320.07 320.74 321.40 322.06 321.73 320.27 318.54 316.54 316.71
## 1965 319.27 320.28 320.73 321.97 322.00 321.71 321.05 318.71 317.66 317.14
## 1966 320.46 321.43 322.23 323.54 323.91 323.59 322.24 320.20 318.48 317.94
## 1967 322.17 322.34 322.88 324.25 324.83 323.93 322.38 320.76 319.10 319.24
## 1968 322.40 322.99 323.73 324.86 325.40 325.20 323.98 321.95 320.18 320.09
## 1969 323.83 324.26 325.47 326.50 327.21 326.54 325.72 323.50 322.22 321.62
## 1970 324.89 325.82 326.77 327.97 327.91 327.50 326.18 324.53 322.93 322.90
## 1971 326.01 326.51 327.01 327.62 328.76 328.40 327.20 325.27 323.20 323.40
## 1972 326.60 327.47 327.58 329.56 329.90 328.92 327.88 326.16 324.68 325.04
## 1973 328.37 329.40 330.14 331.33 332.31 331.90 330.70 329.15 327.35 327.02
## 1974 329.18 330.55 331.32 332.48 332.92 332.08 331.01 329.23 327.27 327.21
## 1975 330.23 331.25 331.87 333.14 333.80 333.43 331.73 329.90 328.40 328.17
## 1976 331.58 332.39 333.33 334.41 334.71 334.17 332.89 330.77 329.14 328.78
## 1977 332.75 333.24 334.53 335.90 336.57 336.10 334.76 332.59 331.42 330.98
## 1978 334.80 335.22 336.47 337.59 337.84 337.72 336.37 334.51 332.60 332.38
## 1979 336.05 336.59 337.79 338.71 339.30 339.12 337.56 335.92 333.75 333.70
## 1980 337.84 338.19 339.91 340.60 341.29 341.00 339.39 337.43 335.72 335.84
## 1981 339.06 340.30 341.21 342.33 342.74 342.08 340.32 338.26 336.52 336.68
## 1982 340.57 341.44 342.53 343.39 343.96 343.18 341.88 339.65 337.81 337.69
## 1983 341.20 342.35 342.93 344.77 345.58 345.14 343.81 342.21 339.69 339.82
## 1984 343.52 344.33 345.11 346.88 347.25 346.62 345.22 343.11 340.90 341.18
## 1985 344.79 345.82 347.25 348.17 348.74 348.07 346.38 344.51 342.92 342.62
## 1986 346.11 346.78 347.68 349.37 350.03 349.37 347.76 345.73 344.68 343.99
## 1987 347.84 348.29 349.23 350.80 351.66 351.07 349.33 347.92 346.27 346.18
## 1988 350.25 351.54 352.05 353.41 354.04 353.62 352.22 350.27 348.55 348.72
## 1989 352.60 352.92 353.53 355.26 355.52 354.97 353.75 351.52 349.64 349.83
## 1990 353.50 354.55 355.23 356.04 357.00 356.07 354.67 352.76 350.82 351.04
## 1991 354.59 355.63 357.03 358.48 359.22 358.12 356.06 353.92 352.05 352.11
## 1992 355.88 356.63 357.72 359.07 359.58 359.17 356.94 354.92 352.94 353.23
## 1993 356.63 357.10 358.32 359.41 360.23 359.55 357.53 355.48 353.67 353.95
## 1994 358.34 358.89 359.95 361.25 361.67 360.94 359.55 357.49 355.84 356.00
## 1995 359.98 361.03 361.66 363.48 363.82 363.30 361.94 359.50 358.11 357.80
## 1996 362.09 363.29 364.06 364.76 365.45 365.01 363.70 361.54 359.51 359.65
## 1997 363.23 364.06 364.61 366.40 366.84 365.68 364.52 362.57 360.24 360.83
##         Nov    Dec
## 1959 314.66 315.43
## 1960 314.84 316.03
## 1961 315.94 316.85
## 1962 316.53 317.53
## 1963 316.91 318.20
## 1964 317.53 318.55
## 1965 318.70 319.25
## 1966 319.63 320.87
## 1967 320.56 321.80
## 1968 321.16 322.74
## 1969 322.69 323.95
## 1970 323.85 324.96
## 1971 324.63 325.85
## 1972 326.34 327.39
## 1973 327.99 328.48
## 1974 328.29 329.41
## 1975 329.32 330.59
## 1976 330.14 331.52
## 1977 332.24 333.68
## 1978 333.75 334.78
## 1979 335.12 336.56
## 1980 336.93 338.04
## 1981 338.19 339.44
## 1982 339.09 340.32
## 1983 340.98 342.82
## 1984 342.80 344.04
## 1985 344.06 345.38
## 1986 345.48 346.72
## 1987 347.64 348.78
## 1988 349.91 351.18
## 1989 351.14 352.37
## 1990 352.69 354.07
## 1991 353.64 354.89
## 1992 354.09 355.33
## 1993 355.30 356.78
## 1994 357.59 359.05
## 1995 359.61 360.74
## 1996 360.80 362.38
## 1997 362.49 364.34
plot(co2)

# this is because:
class(co2)
## [1] "ts"
?ts

# Some other exploratory things
str(co2)
##  Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
max(co2)
## [1] 366.84
min(co2)
## [1] 313.18
which.max(co2) # the highest value is entry 461
## [1] 461
# time series specific functions
frequency(co2)
## [1] 12
time(co2)
##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 1959 1959.000 1959.083 1959.167 1959.250 1959.333 1959.417 1959.500
## 1960 1960.000 1960.083 1960.167 1960.250 1960.333 1960.417 1960.500
## 1961 1961.000 1961.083 1961.167 1961.250 1961.333 1961.417 1961.500
## 1962 1962.000 1962.083 1962.167 1962.250 1962.333 1962.417 1962.500
## 1963 1963.000 1963.083 1963.167 1963.250 1963.333 1963.417 1963.500
## 1964 1964.000 1964.083 1964.167 1964.250 1964.333 1964.417 1964.500
## 1965 1965.000 1965.083 1965.167 1965.250 1965.333 1965.417 1965.500
## 1966 1966.000 1966.083 1966.167 1966.250 1966.333 1966.417 1966.500
## 1967 1967.000 1967.083 1967.167 1967.250 1967.333 1967.417 1967.500
## 1968 1968.000 1968.083 1968.167 1968.250 1968.333 1968.417 1968.500
## 1969 1969.000 1969.083 1969.167 1969.250 1969.333 1969.417 1969.500
## 1970 1970.000 1970.083 1970.167 1970.250 1970.333 1970.417 1970.500
## 1971 1971.000 1971.083 1971.167 1971.250 1971.333 1971.417 1971.500
## 1972 1972.000 1972.083 1972.167 1972.250 1972.333 1972.417 1972.500
## 1973 1973.000 1973.083 1973.167 1973.250 1973.333 1973.417 1973.500
## 1974 1974.000 1974.083 1974.167 1974.250 1974.333 1974.417 1974.500
## 1975 1975.000 1975.083 1975.167 1975.250 1975.333 1975.417 1975.500
## 1976 1976.000 1976.083 1976.167 1976.250 1976.333 1976.417 1976.500
## 1977 1977.000 1977.083 1977.167 1977.250 1977.333 1977.417 1977.500
## 1978 1978.000 1978.083 1978.167 1978.250 1978.333 1978.417 1978.500
## 1979 1979.000 1979.083 1979.167 1979.250 1979.333 1979.417 1979.500
## 1980 1980.000 1980.083 1980.167 1980.250 1980.333 1980.417 1980.500
## 1981 1981.000 1981.083 1981.167 1981.250 1981.333 1981.417 1981.500
## 1982 1982.000 1982.083 1982.167 1982.250 1982.333 1982.417 1982.500
## 1983 1983.000 1983.083 1983.167 1983.250 1983.333 1983.417 1983.500
## 1984 1984.000 1984.083 1984.167 1984.250 1984.333 1984.417 1984.500
## 1985 1985.000 1985.083 1985.167 1985.250 1985.333 1985.417 1985.500
## 1986 1986.000 1986.083 1986.167 1986.250 1986.333 1986.417 1986.500
## 1987 1987.000 1987.083 1987.167 1987.250 1987.333 1987.417 1987.500
## 1988 1988.000 1988.083 1988.167 1988.250 1988.333 1988.417 1988.500
## 1989 1989.000 1989.083 1989.167 1989.250 1989.333 1989.417 1989.500
## 1990 1990.000 1990.083 1990.167 1990.250 1990.333 1990.417 1990.500
## 1991 1991.000 1991.083 1991.167 1991.250 1991.333 1991.417 1991.500
## 1992 1992.000 1992.083 1992.167 1992.250 1992.333 1992.417 1992.500
## 1993 1993.000 1993.083 1993.167 1993.250 1993.333 1993.417 1993.500
## 1994 1994.000 1994.083 1994.167 1994.250 1994.333 1994.417 1994.500
## 1995 1995.000 1995.083 1995.167 1995.250 1995.333 1995.417 1995.500
## 1996 1996.000 1996.083 1996.167 1996.250 1996.333 1996.417 1996.500
## 1997 1997.000 1997.083 1997.167 1997.250 1997.333 1997.417 1997.500
##           Aug      Sep      Oct      Nov      Dec
## 1959 1959.583 1959.667 1959.750 1959.833 1959.917
## 1960 1960.583 1960.667 1960.750 1960.833 1960.917
## 1961 1961.583 1961.667 1961.750 1961.833 1961.917
## 1962 1962.583 1962.667 1962.750 1962.833 1962.917
## 1963 1963.583 1963.667 1963.750 1963.833 1963.917
## 1964 1964.583 1964.667 1964.750 1964.833 1964.917
## 1965 1965.583 1965.667 1965.750 1965.833 1965.917
## 1966 1966.583 1966.667 1966.750 1966.833 1966.917
## 1967 1967.583 1967.667 1967.750 1967.833 1967.917
## 1968 1968.583 1968.667 1968.750 1968.833 1968.917
## 1969 1969.583 1969.667 1969.750 1969.833 1969.917
## 1970 1970.583 1970.667 1970.750 1970.833 1970.917
## 1971 1971.583 1971.667 1971.750 1971.833 1971.917
## 1972 1972.583 1972.667 1972.750 1972.833 1972.917
## 1973 1973.583 1973.667 1973.750 1973.833 1973.917
## 1974 1974.583 1974.667 1974.750 1974.833 1974.917
## 1975 1975.583 1975.667 1975.750 1975.833 1975.917
## 1976 1976.583 1976.667 1976.750 1976.833 1976.917
## 1977 1977.583 1977.667 1977.750 1977.833 1977.917
## 1978 1978.583 1978.667 1978.750 1978.833 1978.917
## 1979 1979.583 1979.667 1979.750 1979.833 1979.917
## 1980 1980.583 1980.667 1980.750 1980.833 1980.917
## 1981 1981.583 1981.667 1981.750 1981.833 1981.917
## 1982 1982.583 1982.667 1982.750 1982.833 1982.917
## 1983 1983.583 1983.667 1983.750 1983.833 1983.917
## 1984 1984.583 1984.667 1984.750 1984.833 1984.917
## 1985 1985.583 1985.667 1985.750 1985.833 1985.917
## 1986 1986.583 1986.667 1986.750 1986.833 1986.917
## 1987 1987.583 1987.667 1987.750 1987.833 1987.917
## 1988 1988.583 1988.667 1988.750 1988.833 1988.917
## 1989 1989.583 1989.667 1989.750 1989.833 1989.917
## 1990 1990.583 1990.667 1990.750 1990.833 1990.917
## 1991 1991.583 1991.667 1991.750 1991.833 1991.917
## 1992 1992.583 1992.667 1992.750 1992.833 1992.917
## 1993 1993.583 1993.667 1993.750 1993.833 1993.917
## 1994 1994.583 1994.667 1994.750 1994.833 1994.917
## 1995 1995.583 1995.667 1995.750 1995.833 1995.917
## 1996 1996.583 1996.667 1996.750 1996.833 1996.917
## 1997 1997.583 1997.667 1997.750 1997.833 1997.917
abline(lm(co2 ~ time(co2)))

cycle(co2)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1959   1   2   3   4   5   6   7   8   9  10  11  12
## 1960   1   2   3   4   5   6   7   8   9  10  11  12
## 1961   1   2   3   4   5   6   7   8   9  10  11  12
## 1962   1   2   3   4   5   6   7   8   9  10  11  12
## 1963   1   2   3   4   5   6   7   8   9  10  11  12
## 1964   1   2   3   4   5   6   7   8   9  10  11  12
## 1965   1   2   3   4   5   6   7   8   9  10  11  12
## 1966   1   2   3   4   5   6   7   8   9  10  11  12
## 1967   1   2   3   4   5   6   7   8   9  10  11  12
## 1968   1   2   3   4   5   6   7   8   9  10  11  12
## 1969   1   2   3   4   5   6   7   8   9  10  11  12
## 1970   1   2   3   4   5   6   7   8   9  10  11  12
## 1971   1   2   3   4   5   6   7   8   9  10  11  12
## 1972   1   2   3   4   5   6   7   8   9  10  11  12
## 1973   1   2   3   4   5   6   7   8   9  10  11  12
## 1974   1   2   3   4   5   6   7   8   9  10  11  12
## 1975   1   2   3   4   5   6   7   8   9  10  11  12
## 1976   1   2   3   4   5   6   7   8   9  10  11  12
## 1977   1   2   3   4   5   6   7   8   9  10  11  12
## 1978   1   2   3   4   5   6   7   8   9  10  11  12
## 1979   1   2   3   4   5   6   7   8   9  10  11  12
## 1980   1   2   3   4   5   6   7   8   9  10  11  12
## 1981   1   2   3   4   5   6   7   8   9  10  11  12
## 1982   1   2   3   4   5   6   7   8   9  10  11  12
## 1983   1   2   3   4   5   6   7   8   9  10  11  12
## 1984   1   2   3   4   5   6   7   8   9  10  11  12
## 1985   1   2   3   4   5   6   7   8   9  10  11  12
## 1986   1   2   3   4   5   6   7   8   9  10  11  12
## 1987   1   2   3   4   5   6   7   8   9  10  11  12
## 1988   1   2   3   4   5   6   7   8   9  10  11  12
## 1989   1   2   3   4   5   6   7   8   9  10  11  12
## 1990   1   2   3   4   5   6   7   8   9  10  11  12
## 1991   1   2   3   4   5   6   7   8   9  10  11  12
## 1992   1   2   3   4   5   6   7   8   9  10  11  12
## 1993   1   2   3   4   5   6   7   8   9  10  11  12
## 1994   1   2   3   4   5   6   7   8   9  10  11  12
## 1995   1   2   3   4   5   6   7   8   9  10  11  12
## 1996   1   2   3   4   5   6   7   8   9  10  11  12
## 1997   1   2   3   4   5   6   7   8   9  10  11  12
# Average yearly co2
plot(aggregate(co2, FUN = mean))
lines(co2)

# a little bit nicer
plot(co2, col='grey')
lines(aggregate(co2, FUN = mean))

# months over the years
monthplot(co2) # describe this plot in your own words

#install.packages("forecast")
library(forecast) # load in the package
## Warning: package 'forecast' was built under R version 3.3.3

seasonplot(co2)

# I want this to be fancier
?rainbow
rainbow(6)
## [1] "#FF0000FF" "#FFFF00FF" "#00FF00FF" "#00FFFFFF" "#0000FFFF" "#FF00FFFF"
plot(1:60, col=rainbow(60))

nyears <- length(co2)/12
seasonplot(co2, col=rainbow(nyears))

# create my own color palette - black to red
blackred <- colorRampPalette(c("black", "red"))
# I still have to specify how many colours I want
seasonplot(co2, col = blackred(nyears))

# Modelling ----
# functions: arima, ARIMA, auto.arima

pacf(co2)

acf(co2)

Pacf(co2) # this one with multivariate time series

Acf(co2)

# automatic modelling
co2_arima <- auto.arima(co2)
co2_arima
## Series: co2 
## ARIMA(1,1,1)(1,1,2)[12]                    
## 
## Coefficients:
##          ar1      ma1     sar1     sma1     sma2
##       0.2569  -0.5847  -0.5489  -0.2620  -0.5123
## s.e.  0.1406   0.1204   0.5879   0.5701   0.4819
## 
## sigma^2 estimated as 0.08576:  log likelihood=-84.39
## AIC=180.78   AICc=180.97   BIC=205.5
class(co2_arima)
## [1] "ARIMA" "Arima"
plot(co2_arima)

plot(forecast(co2_arima, h = 4*12)) # 4 years

plot(forecast(co2_arima, h = 4*12, 
              level = 0.95), # 95% prediction interval
     xlim=c(1995, 2002), ylim=c(354, 375))

# Seasonality trend via Loess smoother
co2_stl <- stl(co2, s.window = "period")
plot(co2_stl)

# note the grey bars!!!

class(co2_stl)
## [1] "stl"
str(co2_stl)
## List of 8
##  $ time.series: Time-Series [1:468, 1:3] from 1959 to 1998: -0.061 0.595 1.329 2.469 2.957 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "seasonal" "trend" "remainder"
##  $ weights    : num [1:468] 1 1 1 1 1 1 1 1 1 1 ...
##  $ call       : language stl(x = co2, s.window = "period")
##  $ win        : Named num [1:3] 4681 19 13
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ deg        : Named int [1:3] 0 1 1
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ jump       : Named num [1:3] 469 2 2
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ inner      : int 2
##  $ outer      : int 0
##  - attr(*, "class")= chr "stl"
head(co2_stl$time.series) # first few values
##             seasonal    trend   remainder
## Jan 1959 -0.06100103 315.1954  0.28564410
## Feb 1959  0.59463870 315.3023  0.41305456
## Mar 1959  1.32899651 315.4093 -0.23825306
## Apr 1959  2.46904706 315.5147 -0.42371128
## May 1959  2.95704630 315.6201 -0.44711820
## Jun 1959  2.31835208 315.7194 -0.03777682
plot(co2_stl$time.series)

# examining the remainder
hist(co2_stl$time.series[, "remainder"])

qqnorm(co2_stl$time.series[, 'remainder'])
qqline(co2_stl$time.series[, 'remainder'])

plot(forecast(co2_stl, h=4*12, level = 0.95))

arima_fore <- forecast(co2_arima, h = 19*12, 
                       level = 0.95)
stl_fore <- forecast(co2_stl, h = 19*12, 
                       level = 0.95)
class(arima_fore)
## [1] "forecast"
str(arima_fore)
## List of 10
##  $ method   : chr "ARIMA(1,1,1)(1,1,2)[12]"
##  $ model    :List of 18
##   ..$ coef     : Named num [1:5] 0.257 -0.585 -0.549 -0.262 -0.512
##   .. ..- attr(*, "names")= chr [1:5] "ar1" "ma1" "sar1" "sma1" ...
##   ..$ sigma2   : num 0.0858
##   ..$ var.coef : num [1:5, 1:5] 0.01976 -0.01596 -0.00217 0.00213 -0.00264 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:5] "ar1" "ma1" "sar1" "sma1" ...
##   .. .. ..$ : chr [1:5] "ar1" "ma1" "sar1" "sma1" ...
##   ..$ mask     : logi [1:5] TRUE TRUE TRUE TRUE TRUE
##   ..$ loglik   : num -84.4
##   ..$ aic      : num 181
##   ..$ arma     : int [1:7] 1 1 1 2 12 1 1
##   ..$ residuals: Time-Series [1:468] from 1959 to 1998: 0.1821 0.0821 0.0539 0.0411 0.0333 ...
##   ..$ call     : language auto.arima(y = co2, x = structure(list(x = structure(c(315.42, 316.31,  316.5, 317.56, 318.13, 318, 316.39, 314.65, 313.68, 313.18, 314.66,  ...
##   ..$ series   : chr "co2"
##   ..$ code     : int 0
##   ..$ n.cond   : int 0
##   ..$ nobs     : int 455
##   ..$ model    :List of 10
##   .. ..$ phi  : num [1:13] 0.257 0 0 0 0 ...
##   .. ..$ theta: num [1:25] -0.585 0 0 0 0 ...
##   .. ..$ Delta: num [1:13] 1 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ Z    : num [1:39] 1 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ a    : num [1:39] 0.27 -0.1075 -0.0477 0.2983 -0.4679 ...
##   .. ..$ P    : num [1:39, 1:39] 4.22e-15 -1.89e-15 3.96e-17 7.59e-20 7.23e-21 ...
##   .. ..$ T    : num [1:39, 1:39] 0.257 0 0 0 0 ...
##   .. ..$ V    : num [1:39, 1:39] 1 -0.585 0 0 0 ...
##   .. ..$ h    : num 0
##   .. ..$ Pn   : num [1:39, 1:39] 1.00 -5.85e-01 5.88e-08 -2.09e-13 -3.51e-13 ...
##   ..$ bic      : num 205
##   ..$ aicc     : num 181
##   ..$ x        : Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
##   ..$ fitted   : Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
##   ..- attr(*, "class")= chr [1:2] "ARIMA" "Arima"
##  $ level    : num 95
##  $ mean     : Time-Series [1:228] from 1998 to 2017: 365 366 367 368 369 ...
##  $ lower    : Time-Series [1:228, 1] from 1998 to 2017: 365 365 366 367 368 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "95%"
##  $ upper    : Time-Series [1:228, 1] from 1998 to 2017: 366 367 368 369 370 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "95%"
##  $ x        : Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
##  $ series   : chr "co2"
##  $ fitted   : Time-Series [1:468] from 1959 to 1998: 315 316 316 318 318 ...
##  $ residuals: Time-Series [1:468] from 1959 to 1998: 0.1821 0.0821 0.0539 0.0411 0.0333 ...
##  - attr(*, "class")= chr "forecast"
plot(arima_fore$mean)

plot(co2, xlim=c(1996, 2017), ylim=c(359, 400))
lines(arima_fore$mean, col="purple")
lines(stl_fore$mean, col="darkgreen")
# add a legend 
legend('topleft', #x=1998, y = 400, 
       legend = c("Original Data", "ARIMA Forecast",
                  "STL Forecast"),
       col=c('black', 'purple', 'darkgreen'), 
       lty = 1) # line type = 1

?co2

newco2 <- read.csv("~/co2_2016.csv", header=FALSE)

head(newco2)
##        V1 V2       V3     V4     V5     V6 V7
## 1 1997  1 1997.042 363.09 363.09 362.88 31
## 2    1997  2 1997.125 364.03 364.03 363.22 27
## 3    1997  3 1997.208 364.51 364.51 362.88 31
## 4    1997  4 1997.292 366.35 366.35 363.68 21
## 5    1997  5 1997.375 366.64 366.64 363.74 29
## 6    1997  6 1997.458 365.59 365.59 363.41 27
length(newco2)
## [1] 7
newmatrix <- matrix(data = newco2, ncol = 7,
                    byrow = TRUE)
head(newco2)
##        V1 V2       V3     V4     V5     V6 V7
## 1 1997  1 1997.042 363.09 363.09 362.88 31
## 2    1997  2 1997.125 364.03 364.03 363.22 27
## 3    1997  3 1997.208 364.51 364.51 362.88 31
## 4    1997  4 1997.292 366.35 366.35 363.68 21
## 5    1997  5 1997.375 366.64 366.64 363.74 29
## 6    1997  6 1997.458 365.59 365.59 363.41 27
newts <- ts(newco2[,5], start = c(1997, 1),
          frequency = 12)

plot(newts) # this is the real data

plot(co2, xlim=c(1996, 2017), ylim=c(359, 400))
lines(arima_fore$mean, col="purple")
lines(stl_fore$mean, col="darkgreen")
lines(newts, col = 'orange')
# add a legend 
legend('topleft', #x=1998, y = 400, 
       legend = c("Original Data", "ARIMA Forecast",
                  "STL Forecast", "Actual"),
       col=c('black', 'purple', 'darkgreen', "orange"), 
       lty = 1) # line type = 1