Data

setwd("c:/R")
require(xlsx); require(ggpmisc)
## Loading required package: xlsx
## Loading required package: ggpmisc
## Loading required package: ggplot2
## For news about 'ggpmisc', please, see https://www.r4photobiology.info/
## For on-line documentation see https://docs.r4photobiology.info/ggpmisc/
green<-read.xlsx("2005-Greenblatt-data1.xlsx", sheetIndex = 1, encoding="UTF-8")
colnames(green)<-c("종목","Stock","ROA","ROA_rank","PER",
                   "PER_rank","Score","Return_2005percent","ln(1+r)","Total_Asset(10e9)")
head(green)
##           종목 Stock  ROA ROA_rank  PER PER_rank Score Return_2005percent
## 1         방림     1 19.1        8 1.13        2    10              120.7
## 2     대한방직     2 18.5       10 0.99        1    11              108.3
## 3          CKF     3 34.5        1 2.97       13    14              322.5
## 4     현대제철     4 17.1       15 1.86        5    20               55.2
## 5   세아홀딩스     5 18.2       12 2.23        8    20              188.8
## 6 호남석유화학     6 20.0        5 3.11       17    22                3.4
##   ln(1+r) Total_Asset(10e9)
## 1 0.79163               253
## 2 0.73381               198
## 3 1.44102                31
## 4 0.43954              5707
## 5 1.06056               530
## 6 0.03343              2318

a)

library(ggplot2)
my.formula<-y ~ x
ggplot(green,aes(x=Score,y=green$`ln(1+r)`))+geom_point()+
  geom_smooth(method='lm')+ggtitle("Regression Plot")+ 
  stat_poly_eq(formula = my.formula, rr.digits=2, coef.digits=4,
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE)

cor(green$`ln(1+r)`,green$Score)
## [1] -0.1704437
cor(green$`ln(1+r)`,green$Score,method="spearman")
## [1] -0.167223
cor(green$`ln(1+r)`,green$Score,method="kendall")
## [1] -0.1106563

b)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
green<-green %>% mutate("r"=green$Return_2005percent/100)
V<-c()
for (i in 1:50){
  V[i]<-1+(1/i)*sum(green$r[1:i])
}
vdata<-data.frame("m"=1:50,"V"=V)
ggplot(vdata,aes(x=m,y=V))+geom_line(size=1,color="tan1")+geom_point(color="tan1")+
  labs(title = "상위 m개의 종목에 분산투자한 포트폴리오의 수익률\n", x = "m", y = "수익률")

c)

summary(lm(green$`ln(1+r)`~log(green$PER)))
## 
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$PER))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9864 -0.4258 -0.0330  0.4083  1.4383 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    0.663951   0.231000   2.874  0.00602 **
## log(green$PER) 0.005803   0.162293   0.036  0.97162   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.55 on 48 degrees of freedom
## Multiple R-squared:  2.664e-05,  Adjusted R-squared:  -0.02081 
## F-statistic: 0.001279 on 1 and 48 DF,  p-value: 0.9716
summary(lm(green$`ln(1+r)`~log(green$ROA)))
## 
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$ROA))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86055 -0.36032 -0.00892  0.36615  1.47348 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -0.3241     0.5652  -0.573   0.5690  
## log(green$ROA)   0.3909     0.2199   1.778   0.0818 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5328 on 48 degrees of freedom
## Multiple R-squared:  0.06178,    Adjusted R-squared:  0.04223 
## F-statistic: 3.161 on 1 and 48 DF,  p-value: 0.08176
summary(lm(green$`ln(1+r)`~log(green$`Total_Asset(10e9)`)))
## 
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$`Total_Asset(10e9)`))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76545 -0.30841 -0.05787  0.27407  1.31477 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.7595     0.2160   8.148 1.32e-10 ***
## log(green$`Total_Asset(10e9)`)  -0.1851     0.0352  -5.258 3.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4381 on 48 degrees of freedom
## Multiple R-squared:  0.3655, Adjusted R-squared:  0.3523 
## F-statistic: 27.65 on 1 and 48 DF,  p-value: 3.329e-06
my.formula<-y ~ x
ggplot(green,aes(x=log(green$`Total_Asset(10e9)`),y=green$`ln(1+r)`))+geom_point()+
  geom_smooth(method='lm')+ggtitle("Regression Plot") + 
  stat_poly_eq(formula = my.formula, rr.digits=2, coef.digits=4,
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE)

d)

d.fit<-lm(green$`ln(1+r)`~log(green$`Total_Asset(10e9)`))
green<-green %>% mutate("r.hat"=exp(fitted(d.fit))-1) %>%
  arrange(desc(r.hat))
V2<-c()
for (i in 1:50){
  V2[i]<-1+(1/i)*sum(green$r[1:i])
}
vdata2<-data.frame("m"=1:50,"V"=V2)
ggplot(vdata,aes(x=m,y=V))+geom_line(size=1,color="tan1")+geom_line(data=vdata2,size=1,color="steelblue2")+
  labs(title = "상위 m개의 종목에 분산투자한 포트폴리오의 수익률\n", x = "m", y = "수익률")+
  ylim(0,4)+
  geom_hline(yintercept = 1.54,linetype="dashed",size=1,color="brown2")