Q1: downloading the data and analyzing prices
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.2.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.2.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.2.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
getSymbols(c("^GSPC", "PEP", "WY", "RSG", "CPRT", "PNR", "ENPH"), from="2000-01-01", periodicity="monthly")
## [1] "GSPC" "PEP" "WY" "RSG" "CPRT" "PNR" "ENPH"
mydata <- merge(Ad(GSPC), Ad(PEP), Ad(WY), Ad(RSG), Ad(CPRT), Ad(PNR))
myret <- 100 * diff(log(mydata))
names(myret) <- c( "GSPC", "PEP", "WY", "RSG", "CPRT", "PNR")
head(myret)
## GSPC PEP WY RSG CPRT PNR
## 2000-01-01 NA NA NA NA NA NA
## 2000-02-01 -2.031300 -6.039628 -10.477671 -8.894742 21.279644 3.080632
## 2000-03-01 9.232375 8.213596 10.511749 1.729141 -18.380802 7.527608
## 2000-04-01 -3.127991 5.508383 -6.570955 22.428564 -1.438917 3.153774
## 2000-05-01 -2.215875 10.348512 -7.284699 18.687739 2.857294 2.998600
## 2000-06-01 2.365163 8.816271 -13.584258 -3.664819 -10.379589 -9.997828
tail(myret)
## GSPC PEP WY RSG CPRT PNR
## 2023-06-01 6.2718865 1.561633 15.634141 7.833032 4.049774 15.237198
## 2023-07-01 3.0663951 1.898259 2.293137 -1.026628 -3.140570 7.311239
## 2023-08-01 -1.7875204 -5.223150 -3.922078 -4.729235 1.426595 1.419819
## 2023-09-01 -4.9946168 -4.182475 -6.018734 -1.130365 -3.958664 -8.166898
## 2023-10-01 0.4757777 -5.551426 -2.777957 1.680289 3.958664 -2.264838
## 2023-10-06 0.0000000 0.000000 0.000000 0.000000 0.000000 0.000000
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
M<-cor(myret, use= 'complete.obs')
corrplot(M, method = "number")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
myret.df<-data.frame(Date=time(myret), coredata(myret))
longdf<- myret.df %>% tidyr::gather(stocks, returns,-Date,-GSPC)
ggplot(longdf, aes(GSPC, returns))+geom_point()+geom_smooth(method="lm",se=TRUE)+facet_wrap(vars(stocks))+theme_grey()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_point()`).
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.2.3
fit<-list()
fit$PEP<-lm(PEP~GSPC, myret)
fit$WY<-lm(WY~GSPC, myret)
fit$RSG<-lm(RSG~GSPC, myret)
fit$CPRT<-lm(CPRT~GSPC, myret)
fit$PNR<-lm(PNR~GSPC, myret)
names<-c("PEP","WY","RSG","CPRT","PNR")
reg.df<-data.frame(stock=names, beta= sapply(fit, function(x) beta = coeftest(x)[2]),
R2=sapply(fit, function(x) R2 = summary(x)$r.squared))
print(reg.df)
## stock beta R2
## PEP PEP 0.4919758 0.2299456
## WY WY 1.1852336 0.2147659
## RSG RSG 0.6077701 0.1857734
## CPRT CPRT 0.9686086 0.2251588
## PNR PNR 1.0863318 0.3638616
knitr::kable(as.data.frame(reg.df), digits=3)
| stock | beta | R2 | |
|---|---|---|---|
| PEP | PEP | 0.492 | 0.230 |
| WY | WY | 1.185 | 0.215 |
| RSG | RSG | 0.608 | 0.186 |
| CPRT | CPRT | 0.969 | 0.225 |
| PNR | PNR | 1.086 | 0.364 |
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1<- ggplot(reg.df, aes(stock, beta)) +
geom_bar(stat = "Identity", fill="green", color = "darkgreen") +
geom_line(aes(x=stock, y=R2, group=1), data = reg.df, color = "blue2", size=1) +
theme_grey() + geom_hline(yintercept = 1, color = "black", linetype="dashed") +
labs(x="stock", y="beta", title="Beta and R square") +
theme(axis.text.x = element_text(angle=90))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p<-ggplotly(p1)
p
m1 <- lm(PEP~GSPC, myret)
m2 <- lm(WY~GSPC, myret)
m3 <- lm(RSG~GSPC, myret)
m4 <- lm(CPRT~GSPC, myret)
m5 <- lm(PNR~GSPC, myret)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(m1, m2, m3, m4, m5, type ="text")
##
## =================================================================================
## Dependent variable:
## --------------------------------------------------
## PEP WY RSG CPRT PNR
## (1) (2) (3) (4) (5)
## ---------------------------------------------------------------------------------
## GSPC 0.492*** 1.185*** 0.608*** 0.969*** 1.086***
## (0.053) (0.134) (0.076) (0.107) (0.085)
##
## Constant 0.552** -0.071 0.932*** 1.068** 0.336
## (0.239) (0.602) (0.338) (0.477) (0.382)
##
## ---------------------------------------------------------------------------------
## Observations 286 286 286 286 286
## R2 0.230 0.215 0.186 0.225 0.364
## Adjusted R2 0.227 0.212 0.183 0.222 0.362
## Residual Std. Error (df = 284) 4.030 10.144 5.695 8.043 6.429
## F Statistic (df = 1; 284) 84.805*** 77.676*** 64.797*** 82.527*** 162.444***
## =================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
p1 <- plot(mydata$GSPC.Adjusted, ylab = "price", col="blue")
p1
p2 <- plot(mydata$PEP.Adjusted, ylab = "price", col="blue")
p2
p3 <- plot(mydata$WY.Adjusted, ylab = "price", col="blue")
p3
p4 <- plot(mydata$RSG.Adjusted, ylab = "price", col="blue")
p4
p5 <- plot(mydata$CPRT.Adjusted, ylab = "price", col="blue")
p5
p6 <- plot(mydata$PNR.Adjusted, ylab = "price", col="blue")
p6
r1 <- plot(myret$GSPC, ylab = "rent", col="red")
r1
r2 <- plot(myret$PEP, ylab = "rent", col = "red")
r2
r3 <-plot(myret$WY, ylab = "rent", col = "red")
r3
r4 <-plot(myret$RSG, ylab = "rent", col = "red")
r4
r5 <-plot(myret$CPRT, ylab = "rent", col = "red")
r5
r6 <-plot(myret$PNR, ylab = "rent", col = "red")
r6
h1<-ggplot(myret.df, aes(GSPC))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="GSPC returns") + theme_bw()
ggplotly(h1)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
## Please report the issue at <]8;;https://github.com/tidyverse/ggplot2/issueshttps://github.com/tidyverse/ggplot2/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
h2<-ggplot(myret.df, aes(PEP))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="PEP returns") + theme_bw()
ggplotly(h2)
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
h3<-ggplot(myret.df, aes(WY))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="WY returns") + theme_bw()
ggplotly(h3)
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
h4<-ggplot(myret.df, aes(RSG))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="RSG returns") + theme_bw()
ggplotly(h4)
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
h5<-ggplot(myret.df, aes(CPRT))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="CPRT returns") + theme_bw()
ggplotly(h5)
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
h6<-ggplot(myret.df, aes(PNR))+geom_histogram(aes(y=..density..), bins = 50, color="blue", fill="lightblue") +
stat_function(fun=dnorm, color="red", size=1.5, args=list(0, 10)) +
labs(x="PNR returns") + theme_bw()
ggplotly(h6)
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
download.file("http://www.apradie.com/ec2004/salesbrands3.xlsx", "salesbrands3.xlsx", mode="wb")
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
sales_brands <- read_excel("salesbrands3.xlsx", sheet="Resultado")
# Always familiarize with the data
head(sales_brands, 5)
## # A tibble: 5 × 15
## date qbrand1tons salesbrand1 qbrand2tons salesbrand2
## <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 2015-01-01 00:00:00 107. 60781. 195. 60701.
## 2 2015-02-01 00:00:00 106. 60411. 216. 68289.
## 3 2015-03-01 00:00:00 88.8 53175. 220. 62530.
## 4 2015-04-01 00:00:00 86.7 50862. 171. 50576.
## 5 2015-05-01 00:00:00 87.2 51014. 177. 54327.
## # ℹ 10 more variables: qbrand3tons <dbl>, salesbrand3 <dbl>, qbrand4tons <dbl>,
## # salesbrand4 <dbl>, qbrand5tons <dbl>, salesbrand5 <dbl>, qbrand6tons <dbl>,
## # salesbrand6 <dbl>, tonstotal <dbl>, salestotal <dbl>
library(zoo)
library(xts)
# We will start by turning sales_brands into a data frame
# because data frames are easier to manipulate
sales_brands.df <- as.data.frame(sales_brands)
# Divide the index and the core data:
# I get the date in a new variable date:
fecha <- sales_brands.df$date
# Now I join both objects using xts() and assign the resulting objects to sales.xts
sales.xts <- xts(x = sales_brands.df, order.by = fecha)
# We will start by turning sales_brands into a data frame
# because data frames are easier to manipulate
sales_brands.df <- as.data.frame(sales_brands)
# Divide the index and the core data:
# I get the date in a new variable fecha:
fecha <- sales_brands.df$date
# The first column of the sales_brand.df is the date.
# I drop this column since I copied to fecha:
sales_brands.df <- sales_brands.df[,-1]
# Now I join both objects using xts() and assign the resulting object to sales.xts
sales.xts <- xts(x = sales_brands.df, order.by = fecha)
head(sales.xts, 5)
## Warning: object timezone (UTC) is different from system timezone ()
## NOTE: set 'options(xts_check_TZ = FALSE)'to disable this warning
## This note is displayed once per session
## qbrand1tons salesbrand1 qbrand2tons salesbrand2 qbrand3tons
## 2015-01-01 106.62 60780.72 195.46 60700.51 293.10
## 2015-02-01 105.82 60411.47 216.09 68289.15 494.46
## 2015-03-01 88.82 53174.89 220.15 62529.54 356.88
## 2015-04-01 86.69 50861.69 170.74 50575.68 291.07
## 2015-05-01 87.23 51014.23 177.17 54327.48 318.09
## salesbrand3 qbrand4tons salesbrand4 qbrand5tons salesbrand5
## 2015-01-01 75196.35 185.91 48228.51 28.94 2295.50
## 2015-02-01 105010.90 149.30 39922.14 24.66 1973.43
## 2015-03-01 90902.84 144.65 39788.22 23.75 1903.24
## 2015-04-01 72853.34 161.43 42692.59 21.00 1684.28
## 2015-05-01 75277.10 141.09 37167.52 23.44 1883.41
## qbrand6tons salesbrand6 tonstotal salestotal
## 2015-01-01 338.38 55561.26 1148.41 302762.8
## 2015-02-01 284.52 51469.74 1274.85 327076.8
## 2015-03-01 251.34 50395.48 1085.59 298694.2
## 2015-04-01 230.29 49124.61 961.22 267792.2
## 2015-05-01 250.45 46906.63 997.47 266576.4
head(sales.xts, 10)
## Warning: object timezone (UTC) is different from system timezone ()
## qbrand1tons salesbrand1 qbrand2tons salesbrand2 qbrand3tons
## 2015-01-01 106.62 60780.72 195.46 60700.51 293.10
## 2015-02-01 105.82 60411.47 216.09 68289.15 494.46
## 2015-03-01 88.82 53174.89 220.15 62529.54 356.88
## 2015-04-01 86.69 50861.69 170.74 50575.68 291.07
## 2015-05-01 87.23 51014.23 177.17 54327.48 318.09
## 2015-06-01 85.56 48564.68 125.77 40908.26 287.31
## 2015-07-01 87.26 49719.86 129.85 42382.65 246.32
## 2015-08-01 84.67 49711.92 114.58 38773.93 250.95
## 2015-09-01 78.22 46643.88 115.66 38986.41 249.93
## 2015-10-01 78.08 47272.66 135.01 45381.74 257.86
## salesbrand3 qbrand4tons salesbrand4 qbrand5tons salesbrand5
## 2015-01-01 75196.35 185.91 48228.51 28.94 2295.50
## 2015-02-01 105010.90 149.30 39922.14 24.66 1973.43
## 2015-03-01 90902.84 144.65 39788.22 23.75 1903.24
## 2015-04-01 72853.34 161.43 42692.59 21.00 1684.28
## 2015-05-01 75277.10 141.09 37167.52 23.44 1883.41
## 2015-06-01 70009.01 155.38 40400.14 20.00 1643.53
## 2015-07-01 65541.41 153.03 41292.25 20.83 1708.58
## 2015-08-01 67456.58 133.73 37079.06 20.55 1678.25
## 2015-09-01 68126.14 173.89 45931.25 20.04 1654.71
## 2015-10-01 70278.44 221.53 53964.69 23.85 1967.04
## qbrand6tons salesbrand6 tonstotal salestotal
## 2015-01-01 338.38 55561.26 1148.41 302762.8
## 2015-02-01 284.52 51469.74 1274.85 327076.8
## 2015-03-01 251.34 50395.48 1085.59 298694.2
## 2015-04-01 230.29 49124.61 961.22 267792.2
## 2015-05-01 250.45 46906.63 997.47 266576.4
## 2015-06-01 246.05 46206.43 920.07 247732.0
## 2015-07-01 232.36 48614.39 869.65 249259.1
## 2015-08-01 234.06 48782.40 838.54 243482.1
## 2015-09-01 210.62 44684.33 848.36 246026.7
## 2015-10-01 250.50 50244.17 966.83 269108.7
rm(fecha)
head(sales.xts, 5)
## Warning: object timezone (UTC) is different from system timezone ()
## qbrand1tons salesbrand1 qbrand2tons salesbrand2 qbrand3tons
## 2015-01-01 106.62 60780.72 195.46 60700.51 293.10
## 2015-02-01 105.82 60411.47 216.09 68289.15 494.46
## 2015-03-01 88.82 53174.89 220.15 62529.54 356.88
## 2015-04-01 86.69 50861.69 170.74 50575.68 291.07
## 2015-05-01 87.23 51014.23 177.17 54327.48 318.09
## salesbrand3 qbrand4tons salesbrand4 qbrand5tons salesbrand5
## 2015-01-01 75196.35 185.91 48228.51 28.94 2295.50
## 2015-02-01 105010.90 149.30 39922.14 24.66 1973.43
## 2015-03-01 90902.84 144.65 39788.22 23.75 1903.24
## 2015-04-01 72853.34 161.43 42692.59 21.00 1684.28
## 2015-05-01 75277.10 141.09 37167.52 23.44 1883.41
## qbrand6tons salesbrand6 tonstotal salestotal
## 2015-01-01 338.38 55561.26 1148.41 302762.8
## 2015-02-01 284.52 51469.74 1274.85 327076.8
## 2015-03-01 251.34 50395.48 1085.59 298694.2
## 2015-04-01 230.29 49124.61 961.22 267792.2
## 2015-05-01 250.45 46906.63 997.47 266576.4
df <- xts(sales.xts)
plot(df$qbrand1tons, size=1, col="blue")
plot(df$salesbrand1, size=1, col="blue")
plot(df$qbrand2tons, size=1, col="blue")
plot(df$salesbrand2, size=1, col="blue")
plot(df$qbrand3tons, size=1, col="blue")
plot(df$salesbrand3, size=1, col="blue")
plot(df$qbrand4tons, size=1, col="blue")
plot(df$salesbrand4, size=1, col="blue")
plot(df$qbrand5tons, size=1, col="blue")
plot(df$salesbrand5, size=1, col="blue")
plot(df$qbrand6tons, size=1, col="blue")
plot(df$salesbrand6, size=1, col="blue")
plot(df$tonstotal, size=1, col="blue")
plot(df$salestotal, size=1, col="blue")
ln_sales <- log(sales.xts)
plot(diff(ln_sales$salestotal), type ="l", col= "red")
# I create a variable for the seasonal difference of the log of sales, which is the annual %
# change of sales (in continuously compounded):
s12.lnqsalestotal = diff(ln_sales$salestotal, lag = 12)
# I plot this new variable, the annual % change:
plot(s12.lnqsalestotal, type = "l", col = "red")
library(tseries)
## Warning: package 'tseries' was built under R version 4.2.3
# Before running the Dicky-Fuller test, we need to drop NA (missing) values of
# the variable:
s12.lnqsalestotal = na.omit(s12.lnqsalestotal)
adf.test(s12.lnqsalestotal, alternative = "stationary",k=0)
## Warning in adf.test(s12.lnqsalestotal, alternative = "stationary", k = 0):
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: s12.lnqsalestotal
## Dickey-Fuller = -4.3032, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Estacionaria.
library(astsa)
acf2(s12.lnqsalestotal)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## ACF 0.64 0.54 0.47 0.39 0.38 0.23 0.23 0.21
## PACF 0.64 0.21 0.10 0.02 0.10 -0.17 0.08 0.03
AR(1) phi > 0
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
##
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
##
## gas
# Use function Arima
m1 <- Arima(s12.lnqsalestotal, order = c(1,0,0))
summary(m1)
## Series: s12.lnqsalestotal
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.6870 0.0998
## s.e. 0.1148 0.0139
##
## sigma^2 = 0.000899: log likelihood = 88.41
## AIC=-170.82 AICc=-170.19 BIC=-165.61
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001153012 0.02926002 0.02362702 -9.243064 27.54316 0.22541
## ACF1
## Training set -0.1882822
# I create a variable for the seasonal difference of the log of sales, which is the annual %
# change of sales (in continuously compounded):
s12.lnqtonstotal = diff(ln_sales$salestotal, lag = 12)
# I plot this new variable, the annual % change:
plot(s12.lnqtonstotal, type = "l", col = "red")
# Before running the Dicky-Fuller test, we need to drop NA (missing) values of
# the variable:
s12.lnqtonstotal = na.omit(s12.lnqtonstotal)
adf.test(s12.lnqtonstotal, alternative = "stationary",k=0)
## Warning in adf.test(s12.lnqtonstotal, alternative = "stationary", k = 0):
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: s12.lnqtonstotal
## Dickey-Fuller = -4.3032, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Stationary
acf2(s12.lnqtonstotal)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## ACF 0.64 0.54 0.47 0.39 0.38 0.23 0.23 0.21
## PACF 0.64 0.21 0.10 0.02 0.10 -0.17 0.08 0.03
Tiene estructura arma
m2 <- Arima(s12.lnqtonstotal, order = c(1,0,1))
summary(m2)
## Series: s12.lnqtonstotal
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.8797 -0.3785 0.0939
## s.e. 0.0926 0.1730 0.0207
##
## sigma^2 = 0.0008498: log likelihood = 90.03
## AIC=-172.05 AICc=-170.97 BIC=-165.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001576755 0.02809103 0.02208631 -7.943973 25.3786 0.2107111
## ACF1
## Training set -0.02920623