Data624_Homwork3

library(ggplot2)
library(fpp3)

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.2.1     ✔ tsibbledata 0.4.1
## ✔ dplyr       1.1.4     ✔ feasts      0.3.1
## ✔ tidyr       1.3.0     ✔ fable       0.3.3
## ✔ lubridate   1.9.3     ✔ fabletools  0.4.0
## ✔ tsibble     1.1.4

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──

## ✔ forecast  8.21.1     ✔ expsmooth 2.3   
## ✔ fma       2.5

##

## 
## Attaching package: 'fpp2'

## The following object is masked from 'package:fpp3':
## 
##     insurance

library(forecast)

head(elecdaily)

## Time Series:
## Start = c(1, 4) 
## End = c(2, 2) 
## Frequency = 7 
##            Demand WorkDay Temperature
## 1.428571 174.8963       0        26.0
## 1.571429 188.5909       1        23.0
## 1.714286 188.9169       1        22.2
## 1.857143 173.8142       0        20.3
## 2.000000 169.5152       0        26.1
## 2.142857 195.7288       1        19.6

daily20 <- head(elecdaily,20)
autoplot(daily20)

# Subset the data for the first 20 days
daily20 <- head(elecdaily, 20)

# Fit a regression model
lm_model <- lm(Demand ~ Temperature, data = daily20)

# Print summary of the regression model
summary(lm_model)

## 
## Call:
## lm(formula = Demand ~ Temperature, data = daily20)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.060  -7.117  -1.437  17.484  27.102 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.2117    17.9915   2.179   0.0428 *  
## Temperature   6.7572     0.6114  11.052 1.88e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22 on 18 degrees of freedom
## Multiple R-squared:  0.8716, Adjusted R-squared:  0.8644 
## F-statistic: 122.1 on 1 and 18 DF,  p-value: 1.876e-09

# a)Why is there a positive relationship?
#There is a positive relationship between temperature and electricy demand. This can be because in general as the temp rises more elec is used for air conditiong etc....

# Plotting the residuals
plot(lm_model, which = 1)  # Residuals vs Fitted

# Checking for outliers or influential observations
plot(lm_model, which = 4)  # Cook's distance plot

#b) Produce a residual plot. Is the model adequate? Are there any outliers or influential observations? We do see sevearla outliers that should be investigated further for influential observations.

#c) Use the model to forecast the electricity demand that you would expect for the next day if the maximum temperature was 15 and compare it with the forecast if the with maximum temperature was 35 Do you believe these forecasts?
# Given maximum temperature values
temp_15 <- 15
temp_35 <- 35

# Create data frames with new temperature values
new_data_15 <- data.frame(Temperature = temp_15)
new_data_35 <- data.frame(Temperature = temp_35)

# Forecasting electricity demand for the next day with 15°C maximum temperature
forecast_demand_15 <- forecast(lm_model, newdata = new_data_15)

# Forecasting electricity demand for the next day with 35°C maximum temperature
forecast_demand_35 <- forecast(lm_model, newdata = new_data_35)

# Print the forecasts
print(paste("Forecasted demand for the next day (Temperature = 15°C):", round(forecast_demand_15$mean, 2), "MW"))

## [1] "Forecasted demand for the next day (Temperature = 15°C): 140.57 MW"

print(paste("Forecasted demand for the next day (Temperature = 35°C):", round(forecast_demand_35$mean, 2), "MW"))

## [1] "Forecasted demand for the next day (Temperature = 35°C): 275.71 MW"

lm_model <- lm(Demand ~ Temperature, data = daily20)
tr <- data.frame(Temperature=c(15,35))
forecast(lm_model, tr)

##   Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 1       140.5701 108.6810 172.4591  90.21166 190.9285
## 2       275.7146 245.2278 306.2014 227.57056 323.8586

# d) Give prediction intervals for your forecasts. The following R code will get you started:
autoplot(daily20, facets=TRUE)

daily20 %>%
  as.data.frame() %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula = 'y ~ x'

fit <- tslm(Demand ~ Temperature, data=daily20)
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774

forecast(fit, newdata=data.frame(Temperature=c(15,35)))

##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 4.285714       140.5701 108.6810 172.4591  90.21166 190.9285
## 4.428571       275.7146 245.2278 306.2014 227.57056 323.8586

#This shows that the results were good

#e) Plot Demand vs Temperature for all of the available data in elecdaily. What does this say about your model?
  
  plot(Demand ~ Temperature, data = elecdaily)

#Data set mens400 contains the winning times (in seconds) for the men’s 400 meters final in each Olympic Games from 1896 to 2016.

#Plot the winning time against the year. Describe the main features of the plot.
autoplot(mens400)

# there is missing data wih null values as see in graph. We also have a negative relationship(downward trend).

#b) Fit a regression line to the data. Obviously the winning times have been decreasing, but at what average rate per year? At .064 rate 
timemens400 <- time(mens400)

f <- tslm(mens400 ~ timemens400, data = mens400)
f

## 
## Call:
## tslm(formula = mens400 ~ timemens400, data = mens400)
## 
## Coefficients:
## (Intercept)  timemens400  
##   172.48148     -0.06457

autoplot(mens400) + geom_smooth()

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Warning: Removed 3 rows containing non-finite values (`stat_smooth()`).

summary(f)

## 
## Call:
## tslm(formula = mens400 ~ timemens400, data = mens400)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6002 -0.5747 -0.2858  0.5751  4.1505 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 172.481477  11.487522   15.02 2.52e-14 ***
## timemens400  -0.064574   0.005865  -11.01 2.75e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.136 on 26 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8234, Adjusted R-squared:  0.8166 
## F-statistic: 121.2 on 1 and 26 DF,  p-value: 2.752e-11

#c) Plot the residuals against the year. What does this indicate about the suitability of the fitted line?
a <- lm(mens400 ~ timemens400, data = mens400)
checkresiduals(a)

## 
##  Breusch-Godfrey test for serial correlation of order up to 6
## 
## data:  Residuals
## LM test = 3.6082, df = 6, p-value = 0.7295

plot(a)

# There is not autocorrelation present in the residuals.

#3)Type easter(ausbeer) and interpret what you see.
easter(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00

# We see a data set with four with quarters and results of each quater from years 1956 to 2010. Each value appaears to be from 0 to 1.

#Data set huron gives the water level of Lake Huron in feet from 1875 to 1972.Plot the data and comment on its features.Fit a linear regression and compare this to a piecewise linear trend model with a knot at 1915.Generate forecasts from these two models for the period up to 1980 and comment on these.

autoplot(huron)

lr <- tslm(huron ~ trend)
lr

## 
## Call:
## tslm(formula = huron ~ trend)
## 
## Coefficients:
## (Intercept)        trend  
##     10.2020      -0.0242

timehuron <- time(huron)
timehuron15 <- 1915

# Fit linear regression model
lm_model <- lm(huron ~ I(time(huron) - 1915) + 1)

# Define time variable
time_huron <- time(huron)

# Define section variable
section_huron <- pmax(0, time_huron - 1915)

a <- tslm(huron ~ trend)
forecast(a, h=10)

##      Point Forecast    Lo 80    Hi 80    Lo 95     Hi 95
## 1973       7.806127 6.317648 9.294605 5.516501 10.095752
## 1974       7.781926 6.292536 9.271315 5.490899 10.072952
## 1975       7.757724 6.267406 9.248043 5.465269 10.050179
## 1976       7.733523 6.242259 9.224788 5.439613 10.027434
## 1977       7.709322 6.217094 9.201550 5.413929 10.004715
## 1978       7.685121 6.191912 9.178331 5.388219  9.982024
## 1979       7.660920 6.166712 9.155128 5.362481  9.959359
## 1980       7.636719 6.141494 9.131943 5.336717  9.936721
## 1981       7.612518 6.116259 9.108776 5.310925  9.914110
## 1982       7.588317 6.091007 9.085626 5.285107  9.891526

forecast(section_huron, h=10)

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  99             58 57.86560 58.13440 57.79446 58.20554
## 100             59 58.70969 59.29031 58.55602 59.44398
## 101             60 59.51703 60.48297 59.26136 60.73864
## 102             61 60.29422 61.70578 59.92061 62.07939
## 103             62 61.04503 62.95497 60.53950 63.46050
## 104             63 61.77204 64.22796 61.12199 64.87801
## 105             64 62.47716 65.52284 61.67102 66.32898
## 106             65 63.16194 66.83806 62.18893 67.81107
## 107             66 63.82760 68.17240 62.67760 69.32240
## 108             67 64.47519 69.52481 63.13863 70.86137

Data624_Homwork3

2024-02-19