library(robustHD)
## Loading required package: ggplot2
## Loading required package: perry
## Loading required package: parallel
## Loading required package: robustbase
library(readxl)
library(readxl)
kandidata <- read_excel("~/Documents/kandi/kandidata.xlsx")
## Warning in strptime(x, format, tz = tz): unknown timezone 'zone/tz/2017c.
## 1.0/zoneinfo/Europe/Helsinki'
lf <- data.frame(kandidata$`P/Net Sales`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
kandidata$`P/EBITDA`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
kandidata$`P/EPS`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
kandidata$Label[kandidata$Label=="Leader"| kandidata$Label=="Follower"])
names(lf)[1] <- "psales"
names(lf)[2] <- "pebitda"
names(lf)[3] <- "peps"
names(lf)[4] <- "label"
lf$psalesrank <- rank(lf$psales)
lf$pebitdarank <- rank(lf$pebitda)
lf$pepsrank <- rank(lf$peps)
summary(lf)
## psales pebitda peps label
## Min. : 0.010 Min. : 0.010 Min. :-235.590 Follower:74
## 1st Qu.: 0.405 1st Qu.: 0.465 1st Qu.: 0.165 Leader :49
## Median : 1.440 Median : 1.110 Median : 0.930
## Mean : 2.799 Mean : 2.412 Mean : -1.173
## 3rd Qu.: 2.800 3rd Qu.: 1.920 3rd Qu.: 1.865
## Max. :39.770 Max. :26.700 Max. : 56.490
## psalesrank pebitdarank pepsrank
## Min. : 1.50 Min. : 1.00 Min. : 1.0
## 1st Qu.: 31.25 1st Qu.: 31.75 1st Qu.: 31.5
## Median : 61.50 Median : 62.00 Median : 62.0
## Mean : 62.00 Mean : 62.00 Mean : 62.0
## 3rd Qu.: 92.50 3rd Qu.: 92.50 3rd Qu.: 92.5
## Max. :123.00 Max. :123.00 Max. :123.0
shapiro.test(winsorize(lf$pebitda))
##
## Shapiro-Wilk normality test
##
## data: winsorize(lf$pebitda)
## W = 0.88857, p-value = 3.962e-08
boxplot(lf$psales ~lf$label)
boxplot(winsorize(lf$psales) ~lf$label)
print("P/Net Sales")
## [1] "P/Net Sales"
summary(kandidata$`P/Net Sales`[kandidata$Label == "Leader"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.010 0.260 0.950 1.554 2.030 11.240
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/Net Sales`[kandidata$Label == "Leader"])
## [1] 1.957683
summary(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0200 0.6075 1.6150 3.6228 3.4750 39.7700
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
## [1] 6.16832
print("winsorized summary of leader")
## [1] "winsorized summary of leader"
summary(winsorize(kandidata$`P/Net Sales`[kandidata$Label == "Leader"]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.010 0.260 0.950 1.301 2.030 3.470
sd(winsorize(kandidata$`P/Net Sales`[kandidata$Label == "Leader"]))
## [1] 1.148614
t.test(lf$psales ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$psales by lf$label
## t = 2.6873, df = 93.605, p-value = 0.008524
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.5400824 3.5966136
## sample estimates:
## mean in group Follower mean in group Leader
## 3.622838 1.554490
t.test(winsorize(lf$psales) ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(lf$psales) by lf$label
## t = 2.4582, df = 116.86, p-value = 0.01543
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1316888 1.2236179
## sample estimates:
## mean in group Follower mean in group Leader
## 2.069532 1.391879
wilcox.test(lf$psales ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: lf$psales by lf$label
## W = 2275.5, p-value = 0.01699
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.08002775 1.10002130
## sample estimates:
## difference in location
## 0.500079
print("P/EBITDA")
## [1] "P/EBITDA"
summary(kandidata$`P/EBITDA`[kandidata$Label == "Leader"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.010 0.230 0.870 1.441 1.580 8.740
summary(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0200 0.6075 1.6150 3.6228 3.4750 39.7700
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/EBITDA`[kandidata$Label == "Leader"])
## [1] 1.92205
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/EBITDA`[kandidata$Label == "Follower"])
## [1] 5.158308
boxplot(lf$pebitda ~lf$label)
boxplot(winsorize(lf$pebitda) ~lf$label)
print("P/EBITDA")
## [1] "P/EBITDA"
t.test(winsorize(lf$pebitda) ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(lf$pebitda) by lf$label
## t = 2.3113, df = 110.48, p-value = 0.02267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06072153 0.79081330
## sample estimates:
## mean in group Follower mean in group Leader
## 1.517230 1.091463
t.test(lf$pebitda ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$pebitda by lf$label
## t = 2.4489, df = 100.13, p-value = 0.01607
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3066207 2.9235062
## sample estimates:
## mean in group Follower mean in group Leader
## 3.055676 1.440612
wilcox.test(lf$pebitda ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: lf$pebitda by lf$label
## W = 2259.5, p-value = 0.02121
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.06002758 0.80992448
## sample estimates:
## difference in location
## 0.3700936
print("P/Net Income")
## [1] "P/Net Income"
summary(kandidata$`P/EPS`[kandidata$Label == "Leader"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -22.9600 0.0500 0.7100 0.6163 1.3700 10.0400
summary(kandidata$`P/EPS`[kandidata$Label == "Follower"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -235.5900 0.1825 1.0800 -2.3578 2.4625 56.4900
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/EPS`[kandidata$Label == "Leader"])
## [1] 4.273974
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/EPS`[kandidata$Label == "Follower"])
## [1] 35.97767
print("P/EPS")
## [1] "P/EPS"
boxplot(lf$peps ~lf$label)
boxplot(winsorize(lf$peps) ~lf$label)
t.test(lf$peps ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$peps by lf$label
## t = -0.70367, df = 76.092, p-value = 0.4838
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.392107 5.443778
## sample estimates:
## mean in group Follower mean in group Leader
## -2.3578378 0.6163265
t.test(winsorize(lf$peps) ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(lf$peps) by lf$label
## t = 1.1409, df = 110.04, p-value = 0.2564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2187701 0.8123802
## sample estimates:
## mean in group Follower mean in group Leader
## 1.1411206 0.8443156
wilcox.test(lf$peps ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: lf$peps by lf$label
## W = 2090.5, p-value = 0.1524
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.1500297 0.9800526
## sample estimates:
## difference in location
## 0.3599841
all <- data.frame(kandidata$`P/Net Sales`,
kandidata$`P/EBITDA`,
kandidata$`P/EPS`,
kandidata$Heatlabel)
names(all)[1] <- "psales"
names(all)[2] <- "pebitda"
names(all)[3] <- "peps"
names(all)[4] <- "label"
summary(all)
## psales pebitda peps label
## Min. : 0.010 Min. : 0.0100 Min. :-235.59000 Cold:207
## 1st Qu.: 0.530 1st Qu.: 0.5625 1st Qu.: 0.16000 Hot :123
## Median : 1.450 Median : 1.1550 Median : 0.96000
## Mean : 3.880 Mean : 2.2783 Mean : -0.03673
## 3rd Qu.: 2.967 3rd Qu.: 2.2425 3rd Qu.: 2.23000
## Max. :361.360 Max. :30.4300 Max. : 99.47000
boxplot(all$psales ~ all$label)
boxplot(winsorize(all$psales) ~ all$label)
sd(all$psales)
## [1] 20.23776
sd(all$pebitda)
## [1] 3.705528
sd(all$peps)
## [1] 19.99903
shapiro.test(all$psales)
##
## Shapiro-Wilk normality test
##
## data: all$psales
## W = 0.10671, p-value < 2.2e-16
shapiro.test(all$pebitda)
##
## Shapiro-Wilk normality test
##
## data: all$pebitda
## W = 0.52959, p-value < 2.2e-16
shapiro.test(all$peps)
##
## Shapiro-Wilk normality test
##
## data: all$peps
## W = 0.27698, p-value < 2.2e-16
shapiro.test(all$psales)
##
## Shapiro-Wilk normality test
##
## data: all$psales
## W = 0.10671, p-value < 2.2e-16
qqnorm(all$psales)
qqline(all$psales)
shapiro.test(winsorize(all$psales))
##
## Shapiro-Wilk normality test
##
## data: winsorize(all$psales)
## W = 0.87225, p-value = 6.736e-16
qqnorm(winsorize(all$psales))
qqline(winsorize(all$psales))
shapiro.test(all$pebitda)
##
## Shapiro-Wilk normality test
##
## data: all$pebitda
## W = 0.52959, p-value < 2.2e-16
qqnorm(all$pebitda)
qqline(all$pebitda)
shapiro.test(winsorize(all$pebitda))
##
## Shapiro-Wilk normality test
##
## data: winsorize(all$pebitda)
## W = 0.88762, p-value = 7.47e-15
qqnorm(winsorize(all$pebitda))
qqline(winsorize(all$pebitda))
shapiro.test(all$peps)
##
## Shapiro-Wilk normality test
##
## data: all$peps
## W = 0.27698, p-value < 2.2e-16
qqnorm(all$peps)
qqline(all$peps)
shapiro.test(winsorize(all$peps))
##
## Shapiro-Wilk normality test
##
## data: winsorize(all$peps)
## W = 0.93461, p-value = 7.221e-11
qqnorm(winsorize(all$peps))
qqline(winsorize(all$peps))
t.test(all$psales ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: all$psales by all$label
## t = 0.9506, df = 232.67, p-value = 0.3428
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.848827 5.296224
## sample estimates:
## mean in group Cold mean in group Hot
## 4.522560 2.798862
t.test(winsorize(all$psales) ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(all$psales) by all$label
## t = 0.92357, df = 257.16, p-value = 0.3566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1835206 0.5077064
## sample estimates:
## mean in group Cold mean in group Hot
## 1.931080 1.768987
wilcox.test(all$psales ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: all$psales by all$label
## W = 13674, p-value = 0.2602
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.1299745 0.4300182
## sample estimates:
## difference in location
## 0.160041
```
boxplot(all$pebitda ~ all$label)
boxplot(winsorize(all$pebitda) ~ all$label)
t.test(all$pebitda ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: all$pebitda by all$label
## t = -0.47661, df = 212.09, p-value = 0.6341
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.0966855 0.6696205
## sample estimates:
## mean in group Cold mean in group Hot
## 2.198744 2.412276
t.test(winsorize(all$pebitda) ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(all$pebitda) by all$label
## t = 1.0268, df = 256.12, p-value = 0.3055
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.116073 0.368973
## sample estimates:
## mean in group Cold mean in group Hot
## 1.498258 1.371808
wilcox.test(all$pebitda ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: all$pebitda by all$label
## W = 13566, p-value = 0.3188
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.1199669 0.3500238
## sample estimates:
## difference in location
## 0.1100859
boxplot(all$peps ~ all$label)
boxplot(winsorize(all$peps) ~ all$label)
t.test(all$peps ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: all$peps by all$label
## t = 0.67462, df = 154.59, p-value = 0.5009
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.492851 7.115776
## sample estimates:
## mean in group Cold mean in group Hot
## 0.6384541 -1.1730081
t.test(winsorize(all$peps) ~all$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: winsorize(all$peps) by all$label
## t = 0.689, df = 273.54, p-value = 0.4914
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2344882 0.4869961
## sample estimates:
## mean in group Cold mean in group Hot
## 1.163640 1.037386
wilcox.test(all$peps ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: all$peps by all$label
## W = 13180, p-value = 0.5925
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.2699961 0.4800481
## sample estimates:
## difference in location
## 0.109983
t.test(lf$psalesrank ~ lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$psalesrank by lf$label
## t = 2.4452, df = 104.06, p-value = 0.01616
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.965462 28.412089
## sample estimates:
## mean in group Follower mean in group Leader
## 68.25000 52.56122
t.test(lf$pebitdarank ~ lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$pebitdarank by lf$label
## t = 2.3421, df = 101.88, p-value = 0.02112
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.318668 27.973389
## sample estimates:
## mean in group Follower mean in group Leader
## 68.03378 52.88776
t.test(lf$pepsrank ~lf$label, mu=0, alt="two.sided")
##
## Welch Two Sample t-test
##
## data: lf$pepsrank by lf$label
## t = 1.4644, df = 108.66, p-value = 0.146
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.327502 22.154032
## sample estimates:
## mean in group Follower mean in group Leader
## 65.75000 56.33673