Data
setwd("c:/R")
require(xlsx); require(ggpmisc)
## Loading required package: xlsx
## Loading required package: ggpmisc
## Loading required package: ggplot2
## For news about 'ggpmisc', please, see https://www.r4photobiology.info/
## For on-line documentation see https://docs.r4photobiology.info/ggpmisc/
green<-read.xlsx("2005-Greenblatt-data1.xlsx", sheetIndex = 1, encoding="UTF-8")
colnames(green)<-c("종목","Stock","ROA","ROA_rank","PER",
"PER_rank","Score","Return_2005percent","ln(1+r)","Total_Asset(10e9)")
head(green)
## 종목 Stock ROA ROA_rank PER PER_rank Score Return_2005percent
## 1 방림 1 19.1 8 1.13 2 10 120.7
## 2 대한방직 2 18.5 10 0.99 1 11 108.3
## 3 CKF 3 34.5 1 2.97 13 14 322.5
## 4 현대제철 4 17.1 15 1.86 5 20 55.2
## 5 세아홀딩스 5 18.2 12 2.23 8 20 188.8
## 6 호남석유화학 6 20.0 5 3.11 17 22 3.4
## ln(1+r) Total_Asset(10e9)
## 1 0.79163 253
## 2 0.73381 198
## 3 1.44102 31
## 4 0.43954 5707
## 5 1.06056 530
## 6 0.03343 2318
a)
library(ggplot2)
my.formula<-y ~ x
ggplot(green,aes(x=Score,y=green$`ln(1+r)`))+geom_point()+
geom_smooth(method='lm')+ggtitle("Regression Plot")+
stat_poly_eq(formula = my.formula, rr.digits=2, coef.digits=4,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)

cor(green$`ln(1+r)`,green$Score)
## [1] -0.1704437
cor(green$`ln(1+r)`,green$Score,method="spearman")
## [1] -0.167223
cor(green$`ln(1+r)`,green$Score,method="kendall")
## [1] -0.1106563
b)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
green<-green %>% mutate("r"=green$Return_2005percent/100)
V<-c()
for (i in 1:50){
V[i]<-1+(1/i)*sum(green$r[1:i])
}
vdata<-data.frame("m"=1:50,"V"=V)
ggplot(vdata,aes(x=m,y=V))+geom_line(size=1,color="tan1")+geom_point(color="tan1")+
labs(title = "상위 m개의 종목에 분산투자한 포트폴리오의 수익률\n", x = "m", y = "수익률")

c)
summary(lm(green$`ln(1+r)`~log(green$PER)))
##
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$PER))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9864 -0.4258 -0.0330 0.4083 1.4383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.663951 0.231000 2.874 0.00602 **
## log(green$PER) 0.005803 0.162293 0.036 0.97162
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.55 on 48 degrees of freedom
## Multiple R-squared: 2.664e-05, Adjusted R-squared: -0.02081
## F-statistic: 0.001279 on 1 and 48 DF, p-value: 0.9716
summary(lm(green$`ln(1+r)`~log(green$ROA)))
##
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$ROA))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86055 -0.36032 -0.00892 0.36615 1.47348
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3241 0.5652 -0.573 0.5690
## log(green$ROA) 0.3909 0.2199 1.778 0.0818 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5328 on 48 degrees of freedom
## Multiple R-squared: 0.06178, Adjusted R-squared: 0.04223
## F-statistic: 3.161 on 1 and 48 DF, p-value: 0.08176
summary(lm(green$`ln(1+r)`~log(green$`Total_Asset(10e9)`)))
##
## Call:
## lm(formula = green$`ln(1+r)` ~ log(green$`Total_Asset(10e9)`))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76545 -0.30841 -0.05787 0.27407 1.31477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7595 0.2160 8.148 1.32e-10 ***
## log(green$`Total_Asset(10e9)`) -0.1851 0.0352 -5.258 3.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4381 on 48 degrees of freedom
## Multiple R-squared: 0.3655, Adjusted R-squared: 0.3523
## F-statistic: 27.65 on 1 and 48 DF, p-value: 3.329e-06
my.formula<-y ~ x
ggplot(green,aes(x=log(green$`Total_Asset(10e9)`),y=green$`ln(1+r)`))+geom_point()+
geom_smooth(method='lm')+ggtitle("Regression Plot") +
stat_poly_eq(formula = my.formula, rr.digits=2, coef.digits=4,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)

d)
d.fit<-lm(green$`ln(1+r)`~log(green$`Total_Asset(10e9)`))
green<-green %>% mutate("r.hat"=exp(fitted(d.fit))-1) %>%
arrange(desc(r.hat))
V2<-c()
for (i in 1:50){
V2[i]<-1+(1/i)*sum(green$r[1:i])
}
vdata2<-data.frame("m"=1:50,"V"=V2)
ggplot(vdata,aes(x=m,y=V))+geom_line(size=1,color="tan1")+geom_line(data=vdata2,size=1,color="steelblue2")+
labs(title = "상위 m개의 종목에 분산투자한 포트폴리오의 수익률\n", x = "m", y = "수익률")+
ylim(0,4)+
geom_hline(yintercept = 1.54,linetype="dashed",size=1,color="brown2")
