a. Tutorials
b. YouTube channels
a. Download the dataset
All files are available at this shared folder
For this Tutorial, download the file “FU902-dataset-01” to your local computer
Set a working directory using the command setwd(“C:/…/…/…/…”); where you have the downloaded file stored and where outputs will be exported (e.g., setwd(“C:/Users/MyPC/Documents/R/Paper01”)
Upload the download file from the working directory (for how to upload data in other formats, like txt, csv, please refer to the manuals listed above)
library(readxl)
dataset <- as.data.frame(read_excel("FU902-dataset-01.xlsx"))
attach(dataset)
head(dataset)
## Firm Year Industry Country Profit Revenue Orders ESG
## 1 1 2017 Services ESP 68761 435676 1.038 58
## 2 1 2018 Services ESP 27267 220696 0.976 39
## 3 1 2019 Services ESP 69729 255782 1.111 85
## 4 1 2020 Services ESP 87363 513659 1.055 63
## 5 1 2021 Services ESP 74637 615406 1.013 50
## 6 1 2022 Services ESP 139750 627756 1.112 85
b. Answer the questions
table(Firm)
## Firm
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
table(Year)
## Year
## 2017 2018 2019 2020 2021 2022
## 51 51 51 51 51 51
library(plm)
is.pbalanced(dataset)
## [1] TRUE
table(Country)/6
## Country
## CZE ESP NED SWE
## 10 12 18 11
table(Industry)/6
## Industry
## ICT Manufacturing Mining Services Utility
## 5 13 12 14 7
table(Country, Industry)/6
## Industry
## Country ICT Manufacturing Mining Services Utility
## CZE 1 3 3 1 2
## ESP 0 3 2 6 1
## NED 2 2 5 6 3
## SWE 2 5 2 1 1
min(Profit)
## [1] -41478
median(Profit)
## [1] 44216
max(Profit)
## [1] 195802
mean(Profit)
## [1] 53826.28
sd(Profit)
## [1] 43677.19
aggregate(Profit ~ Country, FUN = max)
## Country Profit
## 1 CZE 161901
## 2 ESP 195802
## 3 NED 177800
## 4 SWE 181660
aggregate(Profit ~ Country, FUN = min)
## Country Profit
## 1 CZE -20361
## 2 ESP -2351
## 3 NED -17488
## 4 SWE -41478
aggregate(Profit ~ Country, FUN = mean)
## Country Profit
## 1 CZE 46665.45
## 2 ESP 59288.17
## 3 NED 55115.19
## 4 SWE 52268.59
aggregate(Profit ~ Country, FUN = median)
## Country Profit
## 1 CZE 34165.5
## 2 ESP 50939.5
## 3 NED 44900.0
## 4 SWE 41557.5
aggregate(Profit ~ Country, FUN = sd)
## Country Profit
## 1 CZE 40407.52
## 2 ESP 41743.17
## 3 NED 44107.13
## 4 SWE 47728.59
aggregate(Profit ~ Industry, FUN = max)
## Industry Profit
## 1 ICT 159744
## 2 Manufacturing 181660
## 3 Mining 177800
## 4 Services 195802
## 5 Utility 179293
aggregate(Profit ~ Industry, FUN = min)
## Industry Profit
## 1 ICT -17488
## 2 Manufacturing -41478
## 3 Mining -20361
## 4 Services -14025
## 5 Utility -9245
aggregate(Profit ~ Industry, FUN = mean)
## Industry Profit
## 1 ICT 46424.43
## 2 Manufacturing 47364.54
## 3 Mining 59115.46
## 4 Services 57867.13
## 5 Utility 53964.83
aggregate(Profit ~ Industry, FUN = median)
## Industry Profit
## 1 ICT 32678.5
## 2 Manufacturing 36107.5
## 3 Mining 48511.0
## 4 Services 53417.5
## 5 Utility 43756.5
aggregate(Profit ~ Industry, FUN = sd)
## Industry Profit
## 1 ICT 42346.68
## 2 Manufacturing 42649.50
## 3 Mining 48335.25
## 4 Services 42439.02
## 5 Utility 40160.99
library(psych)
describeBy(dataset, Country)
##
## Descriptive statistics by group
## group: CZE
## vars n mean sd median trimmed mad min
## Firm 1 60 38.80 12.10 42.00 41.75 5.19 5.00
## Year 2 60 2019.50 1.72 2019.50 2019.50 2.22 2017.00
## Industry 3 60 3.00 1.28 3.00 3.00 1.48 1.00
## Country 4 60 1.00 0.00 1.00 1.00 0.00 1.00
## Profit 5 60 46665.45 40407.52 34165.50 42318.92 27075.24 -20361.00
## Revenue 6 60 341549.33 153517.89 316265.00 333537.83 177058.76 118035.00
## Orders 7 60 0.99 0.03 0.99 0.99 0.04 0.92
## ESG 8 60 48.42 16.42 43.50 47.77 12.60 15.00
## max range skew kurtosis se
## Firm 49.00 44.00 -2.06 3.18 1.56
## Year 2022.00 5.00 0.00 -1.33 0.22
## Industry 5.00 4.00 0.29 -1.04 0.16
## Country 1.00 0.00 NaN NaN 0.00
## Profit 161901.00 182262.00 1.02 0.71 5216.59
## Revenue 727096.00 609061.00 0.43 -0.79 19819.07
## Orders 1.07 0.15 -0.03 -0.34 0.00
## ESG 85.00 70.00 0.36 -0.34 2.12
## ------------------------------------------------------------
## group: ESP
## vars n mean sd median trimmed mad min
## Firm 1 72 25.67 13.65 29.0 26.28 14.83 1.00
## Year 2 72 2019.50 1.72 2019.5 2019.50 2.22 2017.00
## Industry 3 72 3.42 0.96 4.0 3.41 0.74 2.00
## Country 4 72 2.00 0.00 2.0 2.00 0.00 2.00
## Profit 5 72 59288.17 41743.17 50939.5 54950.24 39711.44 -2351.00
## Revenue 6 72 391353.92 171825.62 402012.5 387911.74 230438.29 111562.00
## Orders 7 72 1.01 0.05 1.0 1.00 0.05 0.92
## ESG 8 72 50.86 14.77 49.0 49.95 12.60 17.00
## max range skew kurtosis se
## Firm 45.00 44.00 -0.38 -1.08 1.61
## Year 2022.00 5.00 0.00 -1.32 0.20
## Industry 5.00 3.00 -0.33 -1.13 0.11
## Country 2.00 0.00 NaN NaN 0.00
## Profit 195802.00 198153.00 0.98 0.89 4919.48
## Revenue 717552.00 605990.00 0.10 -1.27 20249.84
## Orders 1.21 0.29 0.96 1.10 0.01
## ESG 90.00 73.00 0.51 0.09 1.74
## ------------------------------------------------------------
## group: NED
## vars n mean sd median trimmed mad min
## Firm 1 108 21.50 10.91 21.50 20.86 10.38 3.00
## Year 2 108 2019.50 1.72 2019.50 2019.50 2.22 2017.00
## Industry 3 108 3.33 1.21 3.50 3.41 0.74 1.00
## Country 4 108 3.00 0.00 3.00 3.00 0.00 3.00
## Profit 5 108 55115.19 44107.13 44900.00 51342.93 40479.43 -17488.00
## Revenue 6 108 381606.18 171534.37 365409.50 379468.38 217206.09 107043.00
## Orders 7 108 1.02 0.06 1.02 1.02 0.06 0.87
## ESG 8 108 48.66 15.90 47.00 47.86 13.34 16.00
## max range skew kurtosis se
## Firm 48.00 45.00 0.50 0.05 1.05
## Year 2022.00 5.00 0.00 -1.30 0.17
## Industry 5.00 4.00 -0.46 -0.65 0.12
## Country 3.00 0.00 NaN NaN 0.00
## Profit 177800.00 195288.00 0.78 -0.05 4244.21
## Revenue 741595.00 634552.00 0.12 -1.23 16505.90
## Orders 1.14 0.26 0.08 -0.57 0.01
## ESG 95.00 79.00 0.46 0.20 1.53
## ------------------------------------------------------------
## group: SWE
## vars n mean sd median trimmed mad min
## Firm 1 66 22.09 16.95 16.0 21.11 17.79 2.00
## Year 2 66 2019.50 1.72 2019.5 2019.50 2.22 2017.00
## Industry 3 66 2.45 1.17 2.0 2.33 1.48 1.00
## Country 4 66 4.00 0.00 4.0 4.00 0.00 4.00
## Profit 5 66 52268.59 47728.59 41557.5 47235.26 44079.92 -41478.00
## Revenue 6 66 377650.41 160171.43 374480.5 375495.63 198617.25 122046.00
## Orders 7 66 1.01 0.06 1.0 1.00 0.05 0.89
## ESG 8 66 48.65 16.92 46.0 47.59 11.86 16.00
## max range skew kurtosis se
## Firm 51.00 49.00 0.56 -1.13 2.09
## Year 2022.00 5.00 0.00 -1.32 0.21
## Industry 5.00 4.00 0.80 -0.19 0.14
## Country 4.00 0.00 NaN NaN 0.00
## Profit 181660.00 223138.00 0.91 0.44 5874.98
## Revenue 704345.00 582299.00 0.10 -1.27 19715.74
## Orders 1.16 0.27 0.79 0.14 0.01
## ESG 97.00 81.00 0.67 0.40 2.08
plot(as.factor(Country), Profit, main = "Boxplot for Profit", xlab="", ylab="")
library(dplyr)
CZE <- dataset %>% filter(Country == "CZE")
ESP <- dataset %>% filter(Country == "ESP")
mean(ESP$Profit) - mean(CZE$Profit)
## [1] 12622.72
A. Check whether data is normally distributed
shapiro.test(CZE$Profit)
##
## Shapiro-Wilk normality test
##
## data: CZE$Profit
## W = 0.91879, p-value = 0.0006855
shapiro.test(ESP$Profit)
##
## Shapiro-Wilk normality test
##
## data: ESP$Profit
## W = 0.93388, p-value = 0.0009448
B. Check the homogeneity of variance
ProfitCZE <- CZE$Profit
ProfitESP <- ESP$Profit
stacked <- stack(list(ProfitCZE = ProfitCZE, ProfitESP = ProfitESP))
library(car)
leveneTest(values~ind,data=stacked,center=mean)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.1445 0.7045
## 130
fligner.test(values~ind, data=stacked)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: values by ind
## Fligner-Killeen:med chi-squared = 0.67492, df = 1, p-value = 0.4113
C. Run the t-test
t.test(ProfitCZE, ProfitESP, var.equal = TRUE)
##
## Two Sample t-test
##
## data: ProfitCZE and ProfitESP
## t = -1.7552, df = 130, p-value = 0.08159
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -26850.726 1605.293
## sample estimates:
## mean of x mean of y
## 46665.45 59288.17
D. Alternatively, a nonparametric Mann Whitney U Test can be applied
wilcox.test(ProfitCZE, ProfitESP, exact = FALSE)
##
## Wilcoxon rank sum test with continuity correction
##
## data: ProfitCZE and ProfitESP
## W = 1747, p-value = 0.05941
## alternative hypothesis: true location shift is not equal to 0
please note that the Mann Whitney U test is a non-parametric test, not requiring the normality and variance tests (as it exploits directly the the original data)
please also note that the M-W U test is less efficient than the parametric t-test which may lead to a different conclusions (this is not the case for this dataset; however, note that p-value of the M-W U test is much closer to 0.05 than p-value of the t-test)
# Filter data first
CZEP2019 <- CZE %>% filter(Year == "2019")
CZEP2020 <- CZE %>% filter(Year == "2020")
# Calculate the difference (for illustrative purposes)
mean(CZEP2020$Profit) - mean(CZEP2019$Profit)
## [1] -21779.3
# Run the test (here only M-W U test for a paired sample)
wilcox.test(CZEP2019$Profit, CZEP2020$Profit, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: CZEP2019$Profit and CZEP2020$Profit
## V = 38, p-value = 0.3223
## alternative hypothesis: true location shift is not equal to 0
library(dplyr)
library(ggplot2)
# A graphical illustration using boxplots for each country and year
NED2019 <- ggplot(dataset %>% filter(Country == "NED", Year == 2019),
aes(Industry, Profit, fill = Industry)) +
geom_bar(stat = "summary", fun.y = "mean") +
labs(title = "NED2019", x = "") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100000) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
NED2020 <- ggplot(dataset %>% filter(Country == "NED", Year == 2020),
aes(Industry, Profit, fill = Industry)) +
geom_bar(stat = "summary", fun.y = "mean") +
labs(title = "NED2020", x = "") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100000) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
CZE2019 <- ggplot(dataset %>% filter(Country == "CZE", Year == 2019),
aes(Industry, Profit, fill = Industry)) +
geom_bar(stat = "summary", fun.y = "mean") +
labs(title = "CZE2019", x = "") +
scale_fill_brewer(palette = "Set2") +
ylim(0, 100000) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
CZE2020 <- ggplot(dataset %>% filter(Country == "CZE", Year == 2020),
aes(Industry, Profit, fill = Industry)) +
geom_bar(stat = "summary", fun.y = "mean") +
labs(title = "CZE2020", x = "") +
scale_fill_brewer(palette = "Set2") +
ylim(0, 100000) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# Plotting all graphs together
library(patchwork)
(NED2019 + NED2020) / (CZE2019 + CZE2020)
Well, this is a good research question.
To answer it, a researcher should:
clearly identify the contribution of examining this question
review the relevant literature and develop a theory explaining the determinants of the heterogeneity of the impacts
formulate variables for the empirical model
to test the theory and compare the results with expectations and other relevant studies
The following section provides the answers to the questions, but does not provide the code. It is up to you to develop it. Please note that there may be more ways to find the answer.
## [1] 107043
## [1] 160171.4
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 108 48.66 15.9 47 47.86 13.34 16 95 79 0.46 0.2 1.53
## [1] 79
Visualise the development of ESG in SWE over time
graphically
What is the difference between 2020 and 2019 Profit in ESP?
## [1] 20768.58
##
## Wilcoxon signed rank exact test
##
## data: ESPP2019$Profit and ESPP2020$Profit
## V = 21, p-value = 0.1763
## alternative hypothesis: true location shift is not equal to 0
What is the relation between Orders and Profit (with
a cross-country and cross-industry breakdown)?
How strong is the association between Orders and
Profit (separately for each country and industry)?