the interesting things is here.
library(ggplot2) #for plots
redeepnona <- read.delim2("~/Dropbox/aktuellaKurser/case644/case2/rkod/redeepnona.txt")
cov <- read.csv("~/Dropbox/aktuellaKurser/case644/case2/rkod/cov.csv")
comp <- read.delim2("~/Dropbox/aktuellaKurser/case644/case2/rkod/comp.txt")
names(redeepnona)
## [1] "Date" "No..of.Shares" "Price" "Dividends"
## [5] "Split" "Firm" "r_E" "r_M"
sum(is.na(redeepnona))
## [1] 0
quantile( redeepnona$r_E ) # ser korrekt ut inga stora tal.
## 0% 25% 50% 75% 100%
## -0.5040 -0.0585 0.0150 0.1130 0.7740
ggplot(redeepnona, aes(r_E)) + geom_density() +ggtitle("density deepd")
table(redeepnona$Firm) # ca 100 av varje
##
## charlesschwab quickoreilly waterhouse
## 119 163 112
ggplot(redeepnona, aes(r_E)) +
ggtitle("density deepd") +
geom_density() +
facet_wrap(~Firm, ncol=1)
# I dislike no of shares variable, so I drop it (second column)
redeepnona <- redeepnona[,-c(2)]
#create "year" variable
#Date can be 1992-01-31, want character 1 to 4.
redeepnona$year <- substr(redeepnona$Date,1,4)
#this is a character, just like "james". I need to make it a numeric variable
redeepnona$year <- as.numeric(redeepnona$year)
nrow(redeepnona) #checking
## [1] 394
redeepnona <- subset(redeepnona, redeepnona$year < 1997)
nrow(redeepnona) #checking
## [1] 378
redeepnona <- subset(redeepnona, redeepnona$year > 1991)
nrow(redeepnona) #rimligt ty waterhouse som är störst och varit amet längst har 286 - 398 = 102 st rows
## [1] 177
rfm <- log(1 + 6.61/100)/12 #monthly rate. is a value.
redeepnona$rfm <- rfm #puts this value in every row in my dataframe "redeepnona"
redeepnona$rM.minus.rfm <- redeepnona$r_M - redeepnona$rfm
let us split the dataset redeepnona into a list of three smaller datasets. each smaller dataset is for the firm so to speak.
#splitta dataset
redeepnonasplit <- split(redeepnona, redeepnona$Firm)
we went from this:
to this:
note that we need to use the dollar sign to tell R which “dataframe” (we have 3 dataframes) in our “list” (we have 1 list) to use.
did we do it right? let us check.
redeepnonasplit$charlesschwab[1:3,]
## Date Price Dividends Split Firm r_E r_M year
## 52 1992-01-31 31.875 0.04 1 charlesschwab 0.051 -0.002 1992
## 53 1992-02-28 33.250 0.00 1 charlesschwab 0.043 0.013 1992
## 54 1992-03-31 34.625 0.00 1 charlesschwab 0.041 -0.024 1992
## rfm rM.minus.rfm
## 52 0.005333927 -0.007333927
## 53 0.005333927 0.007666073
## 54 0.005333927 -0.029333927
redeepnonasplit$quickoreilly[1:3,]
## Date Price Dividends Split Firm r_E r_M year
## 215 1992-01-31 27.500 0.00 1 quickoreilly -0.009 -0.002 1992
## 216 1992-02-28 28.500 0.18 1 quickoreilly 0.043 0.013 1992
## 217 1992-03-31 28.625 0.00 1 quickoreilly 0.004 -0.024 1992
## rfm rM.minus.rfm
## 215 0.005333927 -0.007333927
## 216 0.005333927 0.007666073
## 217 0.005333927 -0.029333927
redeepnonasplit$waterhouse[1:3,]
## Date Price Dividends Split Firm r_E r_M year
## 338 1992-01-31 26.500 0 1.0 waterhouse -0.036 -0.002 1992
## 339 1992-02-28 21.750 0 1.5 waterhouse 0.231 0.013 1992
## 340 1992-03-31 25.375 0 1.0 waterhouse 0.167 -0.024 1992
## rfm rM.minus.rfm
## 338 0.005333927 -0.007333927
## 339 0.005333927 0.007666073
## 340 0.005333927 -0.029333927
the familiar user of R can see this using the str() command to see the structure of the object.
str(redeepnonasplit) # antal obs på charlesschwab stämmer med antal rows om man i excel tar år 92 tilll 96 så det verkar stämma.
## List of 3
## $ charlesschwab:'data.frame': 60 obs. of 10 variables:
## ..$ Date : Factor w/ 163 levels "1984-02-29","1984-03-30",..: 96 97 98 99 100 101 102 103 104 105 ...
## ..$ Price : num [1:60] 31.9 33.2 34.6 28.5 28.9 ...
## ..$ Dividends : num [1:60] 0.04 0 0 0.06 0 0 0.06 0 0 0.06 ...
## ..$ Split : num [1:60] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Firm : Factor w/ 3 levels "charlesschwab",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..$ r_E : num [1:60] 0.051 0.043 0.041 -0.175 0.013 -0.186 0.05 -0.086 -0.2 0.128 ...
## ..$ r_M : num [1:60] -0.002 0.013 -0.024 0.014 0.007 -0.019 0.04 -0.021 0.012 0.011 ...
## ..$ year : num [1:60] 1992 1992 1992 1992 1992 ...
## ..$ rfm : num [1:60] 0.00533 0.00533 0.00533 0.00533 0.00533 ...
## ..$ rM.minus.rfm: num [1:60] -0.00733 0.00767 -0.02933 0.00867 0.00167 ...
## $ quickoreilly :'data.frame': 60 obs. of 10 variables:
## ..$ Date : Factor w/ 163 levels "1984-02-29","1984-03-30",..: 96 97 98 99 100 101 102 103 104 105 ...
## ..$ Price : num [1:60] 27.5 28.5 28.6 21.5 21.5 ...
## ..$ Dividends : num [1:60] 0 0.18 0 0 0.08 0 0 0.08 0 0 ...
## ..$ Split : num [1:60] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ Firm : Factor w/ 3 levels "charlesschwab",..: 2 2 2 2 2 2 2 2 2 2 ...
## ..$ r_E : num [1:60] -0.009 0.043 0.004 -0.249 0.004 -0.023 -0.042 -0.021 0.025 0.043 ...
## ..$ r_M : num [1:60] -0.002 0.013 -0.024 0.014 0.007 -0.019 0.04 -0.021 0.012 0.011 ...
## ..$ year : num [1:60] 1992 1992 1992 1992 1992 ...
## ..$ rfm : num [1:60] 0.00533 0.00533 0.00533 0.00533 0.00533 ...
## ..$ rM.minus.rfm: num [1:60] -0.00733 0.00767 -0.02933 0.00867 0.00167 ...
## $ waterhouse :'data.frame': 57 obs. of 10 variables:
## ..$ Date : Factor w/ 163 levels "1984-02-29","1984-03-30",..: 96 97 98 99 100 101 102 103 104 105 ...
## ..$ Price : num [1:57] 26.5 21.8 25.4 20.6 21.8 ...
## ..$ Dividends : num [1:57] 0 0 0 0 0 0 0 0.2 0 0 ...
## ..$ Split : num [1:57] 1 1.5 1 1 1 1 1 1 1 1 ...
## ..$ Firm : Factor w/ 3 levels "charlesschwab",..: 3 3 3 3 3 3 3 3 3 3 ...
## ..$ r_E : num [1:57] -0.036 0.231 0.167 -0.187 0.055 -0.213 0.029 -0.209 0.045 0.087 ...
## ..$ r_M : num [1:57] -0.002 0.013 -0.024 0.014 0.007 -0.019 0.04 -0.021 0.012 0.011 ...
## ..$ year : num [1:57] 1992 1992 1992 1992 1992 ...
## ..$ rfm : num [1:57] 0.00533 0.00533 0.00533 0.00533 0.00533 ...
## ..$ rM.minus.rfm: num [1:57] -0.00733 0.00767 -0.02933 0.00867 0.00167 ...
# kontroll att split är korrekt
#View(redeepnonasplit) #kontroll
#range(redeepnonasplit$charlesschwab$year) #kontroll.
summary(lm(r_E ~ rM.minus.rfm, redeepnonasplit$charlesschwab) )
##
## Call:
## lm(formula = r_E ~ rM.minus.rfm, data = redeepnonasplit$charlesschwab)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.233843 -0.045392 0.004957 0.053656 0.195256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01851 0.01347 1.374 0.175
## rM.minus.rfm 2.29995 0.53051 4.335 5.87e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1009 on 58 degrees of freedom
## Multiple R-squared: 0.2447, Adjusted R-squared: 0.2317
## F-statistic: 18.8 on 1 and 58 DF, p-value: 5.87e-05
summary(lm(r_E ~ rM.minus.rfm, redeepnonasplit$quickoreilly) )
##
## Call:
## lm(formula = r_E ~ rM.minus.rfm, data = redeepnonasplit$quickoreilly)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27718 -0.03722 0.00293 0.05818 0.15165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.009082 0.011906 0.763 0.449
## rM.minus.rfm 2.203988 0.468895 4.700 1.64e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08919 on 58 degrees of freedom
## Multiple R-squared: 0.2758, Adjusted R-squared: 0.2634
## F-statistic: 22.09 on 1 and 58 DF, p-value: 1.644e-05
summary(lm(r_E ~ rM.minus.rfm, redeepnonasplit$waterhouse) )
##
## Call:
## lm(formula = r_E ~ rM.minus.rfm, data = redeepnonasplit$waterhouse)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24092 -0.11601 -0.01438 0.08880 0.37527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02245 0.01985 1.131 0.263127
## rM.minus.rfm 3.18128 0.80503 3.952 0.000223 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1456 on 55 degrees of freedom
## Multiple R-squared: 0.2211, Adjusted R-squared: 0.207
## F-statistic: 15.62 on 1 and 55 DF, p-value: 0.0002233
let us save the estimates in a vector called “rermrf”, to easier compare it with the correct one.
rermrf <-
c(
lm(r_E ~ rM.minus.rfm, redeepnonasplit$charlesschwab)$coef[2] ,
lm(r_E ~ rM.minus.rfm, redeepnonasplit$quickoreilly)$coef[2] ,
lm(r_E ~ rM.minus.rfm, redeepnonasplit$waterhouse)$coef[2]
)
rermrf
## rM.minus.rfm rM.minus.rfm rM.minus.rfm
## 2.299948 2.203988 3.181276
as a bonus let us check if they have positive alpha.
intercepts <-
c(
lm(r_E ~ rM.minus.rfm, redeepnonasplit$charlesschwab)$coef[1] ,
lm(r_E ~ rM.minus.rfm, redeepnonasplit$quickoreilly)$coef[1] ,
lm(r_E ~ rM.minus.rfm, redeepnonasplit$waterhouse)$coef[1]
)
# note that intercepts = rf + alpha. since we modeleed with r_E as y.
intercepts > rfm # if true then positibe alpha stocks.
## (Intercept) (Intercept) (Intercept)
## TRUE TRUE TRUE
her estimates are
herbetas <- c(2.2, NA, 3.2) # i did not save the second one during lecture.
how do we differ? and why?
herbetas - rermrf
## rM.minus.rfm rM.minus.rfm rM.minus.rfm
## -0.09994803 NA 0.01872430
är deep bättre eller sämre än internet?
plot(density(comp$nasdaq))
lines(density(redeepnona$r_E), lty=2)
# deep disc is dotted
plot(density(comp$investmentservice)) #obs olika dataset
lines(density(redeepnona$r_E), lty=2)
instead of plots we may want to use quantiles, as the coding is faster.
quantile( redeepnonasplit$waterhouse$r_E )
## 0% 25% 50% 75% 100%
## -0.225 -0.057 0.013 0.167 0.473
quantile( redeepnona$r_E )
## 0% 25% 50% 75% 100%
## -0.249 -0.037 0.029 0.113 0.473
#vs
quantile(comp$nasdaq) # inte bättre än nasdaq för nasdaq är mer safe.
## 0% 25% 50% 75% 100%
## -0.224870 -0.009690 0.015075 0.039720 0.128370
quantile(comp$internet)
## 0% 25% 50% 75% 100%
## -0.41392000 -0.04937250 0.00863000 0.08613375 0.43129500
plot(density(comp$internet) )
lines(density(redeepnona$r_E), lty=2 )
#save as internet_vs_redeep
# vi ser att internet inte är så pass mkt bättre än deep, internet är den som når högst, så det som är bättre är att internet har mindrea area på negativa sidan. å andra sidan har den mer slh för positiva returns.