library(robustHD)
## Loading required package: ggplot2
## Loading required package: perry
## Loading required package: parallel
## Loading required package: robustbase
library(readxl)
library(readxl)
kandidata <- read_excel("~/Documents/kandi/kandidata.xlsx")
## Warning in strptime(x, format, tz = tz): unknown timezone 'zone/tz/2017c.
## 1.0/zoneinfo/Europe/Helsinki'

lf <- data.frame(kandidata$`P/Net Sales`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
                  kandidata$`P/EBITDA`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
                  kandidata$`P/EPS`[kandidata$Label=="Leader"| kandidata$Label=="Follower"],
                  kandidata$Label[kandidata$Label=="Leader"| kandidata$Label=="Follower"])

names(lf)[1] <- "psales"
names(lf)[2] <- "pebitda"
names(lf)[3] <- "peps"
names(lf)[4] <- "label"

lf$psalesrank <- rank(lf$psales)
lf$pebitdarank <- rank(lf$pebitda)
lf$pepsrank <- rank(lf$peps)

summary(lf)
##      psales          pebitda            peps               label   
##  Min.   : 0.010   Min.   : 0.010   Min.   :-235.590   Follower:74  
##  1st Qu.: 0.405   1st Qu.: 0.465   1st Qu.:   0.165   Leader  :49  
##  Median : 1.440   Median : 1.110   Median :   0.930                
##  Mean   : 2.799   Mean   : 2.412   Mean   :  -1.173                
##  3rd Qu.: 2.800   3rd Qu.: 1.920   3rd Qu.:   1.865                
##  Max.   :39.770   Max.   :26.700   Max.   :  56.490                
##    psalesrank      pebitdarank        pepsrank    
##  Min.   :  1.50   Min.   :  1.00   Min.   :  1.0  
##  1st Qu.: 31.25   1st Qu.: 31.75   1st Qu.: 31.5  
##  Median : 61.50   Median : 62.00   Median : 62.0  
##  Mean   : 62.00   Mean   : 62.00   Mean   : 62.0  
##  3rd Qu.: 92.50   3rd Qu.: 92.50   3rd Qu.: 92.5  
##  Max.   :123.00   Max.   :123.00   Max.   :123.0
shapiro.test(winsorize(lf$pebitda))
## 
##  Shapiro-Wilk normality test
## 
## data:  winsorize(lf$pebitda)
## W = 0.88857, p-value = 3.962e-08
boxplot(lf$psales ~lf$label)

boxplot(winsorize(lf$psales) ~lf$label)

print("P/Net Sales")
## [1] "P/Net Sales"
summary(kandidata$`P/Net Sales`[kandidata$Label == "Leader"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.010   0.260   0.950   1.554   2.030  11.240
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/Net Sales`[kandidata$Label == "Leader"])
## [1] 1.957683
summary(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0200  0.6075  1.6150  3.6228  3.4750 39.7700
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
## [1] 6.16832
print("winsorized summary of leader")
## [1] "winsorized summary of leader"
summary(winsorize(kandidata$`P/Net Sales`[kandidata$Label == "Leader"]))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.010   0.260   0.950   1.301   2.030   3.470
sd(winsorize(kandidata$`P/Net Sales`[kandidata$Label == "Leader"]))
## [1] 1.148614
t.test(lf$psales ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$psales by lf$label
## t = 2.6873, df = 93.605, p-value = 0.008524
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.5400824 3.5966136
## sample estimates:
## mean in group Follower   mean in group Leader 
##               3.622838               1.554490
t.test(winsorize(lf$psales) ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(lf$psales) by lf$label
## t = 2.4582, df = 116.86, p-value = 0.01543
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1316888 1.2236179
## sample estimates:
## mean in group Follower   mean in group Leader 
##               2.069532               1.391879
wilcox.test(lf$psales ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  lf$psales by lf$label
## W = 2275.5, p-value = 0.01699
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  0.08002775 1.10002130
## sample estimates:
## difference in location 
##               0.500079
print("P/EBITDA")
## [1] "P/EBITDA"
summary(kandidata$`P/EBITDA`[kandidata$Label == "Leader"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.010   0.230   0.870   1.441   1.580   8.740
summary(kandidata$`P/Net Sales`[kandidata$Label == "Follower"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0200  0.6075  1.6150  3.6228  3.4750 39.7700
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/EBITDA`[kandidata$Label == "Leader"])
## [1] 1.92205
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/EBITDA`[kandidata$Label == "Follower"])
## [1] 5.158308
boxplot(lf$pebitda ~lf$label)

boxplot(winsorize(lf$pebitda) ~lf$label)

print("P/EBITDA")
## [1] "P/EBITDA"
t.test(winsorize(lf$pebitda) ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(lf$pebitda) by lf$label
## t = 2.3113, df = 110.48, p-value = 0.02267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06072153 0.79081330
## sample estimates:
## mean in group Follower   mean in group Leader 
##               1.517230               1.091463
t.test(lf$pebitda ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$pebitda by lf$label
## t = 2.4489, df = 100.13, p-value = 0.01607
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.3066207 2.9235062
## sample estimates:
## mean in group Follower   mean in group Leader 
##               3.055676               1.440612
wilcox.test(lf$pebitda ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  lf$pebitda by lf$label
## W = 2259.5, p-value = 0.02121
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  0.06002758 0.80992448
## sample estimates:
## difference in location 
##              0.3700936
print("P/Net Income")
## [1] "P/Net Income"
summary(kandidata$`P/EPS`[kandidata$Label == "Leader"])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -22.9600   0.0500   0.7100   0.6163   1.3700  10.0400
summary(kandidata$`P/EPS`[kandidata$Label == "Follower"])
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -235.5900    0.1825    1.0800   -2.3578    2.4625   56.4900
print("SD of leader")
## [1] "SD of leader"
sd(kandidata$`P/EPS`[kandidata$Label == "Leader"])
## [1] 4.273974
print("SD of follower")
## [1] "SD of follower"
sd(kandidata$`P/EPS`[kandidata$Label == "Follower"])
## [1] 35.97767
print("P/EPS")
## [1] "P/EPS"
boxplot(lf$peps ~lf$label)

boxplot(winsorize(lf$peps) ~lf$label)

t.test(lf$peps ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$peps by lf$label
## t = -0.70367, df = 76.092, p-value = 0.4838
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.392107   5.443778
## sample estimates:
## mean in group Follower   mean in group Leader 
##             -2.3578378              0.6163265
t.test(winsorize(lf$peps) ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(lf$peps) by lf$label
## t = 1.1409, df = 110.04, p-value = 0.2564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2187701  0.8123802
## sample estimates:
## mean in group Follower   mean in group Leader 
##              1.1411206              0.8443156
wilcox.test(lf$peps ~ lf$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  lf$peps by lf$label
## W = 2090.5, p-value = 0.1524
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.1500297  0.9800526
## sample estimates:
## difference in location 
##              0.3599841
all <- data.frame(kandidata$`P/Net Sales`,
                  kandidata$`P/EBITDA`,
                  kandidata$`P/EPS`,
                  kandidata$Heatlabel)

names(all)[1] <- "psales"
names(all)[2] <- "pebitda"
names(all)[3] <- "peps"
names(all)[4] <- "label"

summary(all)
##      psales           pebitda             peps             label    
##  Min.   :  0.010   Min.   : 0.0100   Min.   :-235.59000   Cold:207  
##  1st Qu.:  0.530   1st Qu.: 0.5625   1st Qu.:   0.16000   Hot :123  
##  Median :  1.450   Median : 1.1550   Median :   0.96000             
##  Mean   :  3.880   Mean   : 2.2783   Mean   :  -0.03673             
##  3rd Qu.:  2.967   3rd Qu.: 2.2425   3rd Qu.:   2.23000             
##  Max.   :361.360   Max.   :30.4300   Max.   :  99.47000
boxplot(all$psales ~ all$label)

boxplot(winsorize(all$psales) ~ all$label)

sd(all$psales)
## [1] 20.23776
sd(all$pebitda)
## [1] 3.705528
sd(all$peps)
## [1] 19.99903

Normalitet

shapiro.test(all$psales)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$psales
## W = 0.10671, p-value < 2.2e-16
shapiro.test(all$pebitda)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$pebitda
## W = 0.52959, p-value < 2.2e-16
shapiro.test(all$peps)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$peps
## W = 0.27698, p-value < 2.2e-16
shapiro.test(all$psales)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$psales
## W = 0.10671, p-value < 2.2e-16
qqnorm(all$psales)
qqline(all$psales)

shapiro.test(winsorize(all$psales))
## 
##  Shapiro-Wilk normality test
## 
## data:  winsorize(all$psales)
## W = 0.87225, p-value = 6.736e-16
qqnorm(winsorize(all$psales))
qqline(winsorize(all$psales))

P/EBITDA

shapiro.test(all$pebitda)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$pebitda
## W = 0.52959, p-value < 2.2e-16
qqnorm(all$pebitda)
qqline(all$pebitda)

shapiro.test(winsorize(all$pebitda))
## 
##  Shapiro-Wilk normality test
## 
## data:  winsorize(all$pebitda)
## W = 0.88762, p-value = 7.47e-15
qqnorm(winsorize(all$pebitda))
qqline(winsorize(all$pebitda))

P/Net Income

shapiro.test(all$peps)
## 
##  Shapiro-Wilk normality test
## 
## data:  all$peps
## W = 0.27698, p-value < 2.2e-16
qqnorm(all$peps)
qqline(all$peps)

shapiro.test(winsorize(all$peps))
## 
##  Shapiro-Wilk normality test
## 
## data:  winsorize(all$peps)
## W = 0.93461, p-value = 7.221e-11
qqnorm(winsorize(all$peps))
qqline(winsorize(all$peps))

t.test(all$psales ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  all$psales by all$label
## t = 0.9506, df = 232.67, p-value = 0.3428
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.848827  5.296224
## sample estimates:
## mean in group Cold  mean in group Hot 
##           4.522560           2.798862
t.test(winsorize(all$psales) ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(all$psales) by all$label
## t = 0.92357, df = 257.16, p-value = 0.3566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1835206  0.5077064
## sample estimates:
## mean in group Cold  mean in group Hot 
##           1.931080           1.768987
wilcox.test(all$psales ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  all$psales by all$label
## W = 13674, p-value = 0.2602
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.1299745  0.4300182
## sample estimates:
## difference in location 
##               0.160041

```

boxplot(all$pebitda ~ all$label)

boxplot(winsorize(all$pebitda) ~ all$label)

t.test(all$pebitda ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  all$pebitda by all$label
## t = -0.47661, df = 212.09, p-value = 0.6341
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.0966855  0.6696205
## sample estimates:
## mean in group Cold  mean in group Hot 
##           2.198744           2.412276
t.test(winsorize(all$pebitda) ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(all$pebitda) by all$label
## t = 1.0268, df = 256.12, p-value = 0.3055
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.116073  0.368973
## sample estimates:
## mean in group Cold  mean in group Hot 
##           1.498258           1.371808
wilcox.test(all$pebitda ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  all$pebitda by all$label
## W = 13566, p-value = 0.3188
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.1199669  0.3500238
## sample estimates:
## difference in location 
##              0.1100859
boxplot(all$peps ~ all$label)

boxplot(winsorize(all$peps) ~ all$label)

t.test(all$peps ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  all$peps by all$label
## t = 0.67462, df = 154.59, p-value = 0.5009
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.492851  7.115776
## sample estimates:
## mean in group Cold  mean in group Hot 
##          0.6384541         -1.1730081
t.test(winsorize(all$peps) ~all$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  winsorize(all$peps) by all$label
## t = 0.689, df = 273.54, p-value = 0.4914
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2344882  0.4869961
## sample estimates:
## mean in group Cold  mean in group Hot 
##           1.163640           1.037386
wilcox.test(all$peps ~ all$label, mu=0, alt="two.sided", conf.int=T, conf.level=0.95, paired=F, exact=F,correct=T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  all$peps by all$label
## W = 13180, p-value = 0.5925
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.2699961  0.4800481
## sample estimates:
## difference in location 
##               0.109983
t.test(lf$psalesrank ~ lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$psalesrank by lf$label
## t = 2.4452, df = 104.06, p-value = 0.01616
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   2.965462 28.412089
## sample estimates:
## mean in group Follower   mean in group Leader 
##               68.25000               52.56122
t.test(lf$pebitdarank ~ lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$pebitdarank by lf$label
## t = 2.3421, df = 101.88, p-value = 0.02112
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   2.318668 27.973389
## sample estimates:
## mean in group Follower   mean in group Leader 
##               68.03378               52.88776
t.test(lf$pepsrank ~lf$label, mu=0, alt="two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lf$pepsrank by lf$label
## t = 1.4644, df = 108.66, p-value = 0.146
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.327502 22.154032
## sample estimates:
## mean in group Follower   mean in group Leader 
##               65.75000               56.33673