V tomto cvičení pracujeme s pripravenými dátami z predchádzajúcej úlohy. Cieľom je vytvoriť základné grafy, tabuľky opisných štatistík, vykonať jednoduché testovanie hypotéz a odhadnúť lineárny regresný model.
udaje1 <- read.csv2(
"cviko4/udaje/ChybnaDatabaza.csv",
header = TRUE,
sep = ";",
dec = ".",
na.strings = c("", "NA"),
stringsAsFactors = FALSE
)
head(udaje1)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
old_names <- names(udaje1)
udaje1 <- udaje1 %>%
rename_with(~ abbreviate(.x, strict = FALSE))
names(udaje1) <- make.unique(names(udaje1))
comparison <- data.frame(
Original_Name = old_names,
Shortened_Name = names(udaje1)
)
print(comparison)
## Original_Name Shortened_Name
## 1 YEARS YEAR
## 2 COMPANIES COMP
## 3 EXCHANGE.SECTOR EXCH
## 4 PRIMARY.BUSINESS PRIM
## 5 TOBIN.Q TOBI
## 6 MARKET.CAPITALIZATION MARK
## 7 RETURN.ON.ASSETS RETU
## 8 DEBT.TO.ASSET DEBT
## 9 FIRM.SIZE FIRM
## 10 SOCIAL.DISCLOSURE.INDEX SOCI
## 11 ENVIRONMENTAL.DISCLOSURE.INDEX ENVI
## 12 GOVERNANCE.DISCLOSURE.INDEX GOVE
## 13 ESG.INDEX ESG.
udaje.2013 <- udaje1 %>%
filter(YEAR == 2013) %>%
select(RETU, ESG., DEBT, FIRM)
head(udaje.2013)
library(ggplot2)
ggplot(udaje.2013, aes(x = FIRM, y = ESG.)) +
geom_point() +
theme_minimal() +
labs(
title = "ESG index v závislosti od veľkosti firmy (2013)",
x = "Veľkosť firmy",
y = "ESG index"
)
ggplot(
udaje1 %>% filter(!is.na(YEAR), !is.na(ESG.)),
aes(x = factor(YEAR), y = ESG.)
) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
theme_minimal() +
labs(
title = "ESG index podľa rokov",
x = "Rok",
y = "ESG index"
)
library(knitr)
esg.stats <- udaje1 %>%
filter(YEAR %in% 2013:2016) %>%
group_by(YEAR) %>%
summarise(
n = n(),
mean = mean(ESG., na.rm = TRUE),
sd = sd(ESG., na.rm = TRUE),
min = min(ESG., na.rm = TRUE),
q25 = quantile(ESG., 0.25, na.rm = TRUE),
median = median(ESG., na.rm = TRUE),
q75 = quantile(ESG., 0.75, na.rm = TRUE),
max = max(ESG., na.rm = TRUE),
.groups = "drop"
)
kable(esg.stats, digits = 2, caption = "Základné štatistiky ESG indexu (2013–2016)")
| YEAR | n | mean | sd | min | q25 | median | q75 | max |
|---|---|---|---|---|---|---|---|---|
| 2013 | 76 | 0.23 | 0.10 | 0 | 0.15 | 0.22 | 0.29 | 0.58 |
| 2014 | 76 | 0.24 | 0.11 | 0 | 0.16 | 0.24 | 0.31 | 0.57 |
| 2015 | 76 | 0.25 | 0.12 | 0 | 0.17 | 0.25 | 0.31 | 0.65 |
| 2016 | 75 | 0.26 | 0.12 | 0 | 0.17 | 0.27 | 0.32 | 0.64 |
t.test.result <- t.test(
udaje1$ESG.[udaje1$YEAR == 2013],
udaje1$ESG.[udaje1$YEAR == 2015]
)
t.test.result
##
## Welch Two Sample t-test
##
## data: udaje1$ESG.[udaje1$YEAR == 2013] and udaje1$ESG.[udaje1$YEAR == 2015]
## t = -1.3118, df = 146.15, p-value = 0.1916
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05974418 0.01207401
## sample estimates:
## mean of x mean of y
## 0.2276316 0.2514667
anova.result <- aov(ESG. ~ factor(YEAR), data = udaje1)
summary(anova.result)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(YEAR) 9 0.636 0.07064 3.381 0.000444 ***
## Residuals 748 15.626 0.02089
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
model <- lm(ESG. ~ RETU + FIRM + DEBT, data = udaje.2013)
summary(model)
##
## Call:
## lm(formula = ESG. ~ RETU + FIRM + DEBT, data = udaje.2013)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.182294 -0.060003 0.001775 0.051708 0.246346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.000e-01 7.981e-02 -3.759 0.000347 ***
## RETU 1.661e-04 9.086e-05 1.828 0.071807 .
## FIRM 7.832e-02 1.083e-02 7.234 4.37e-10 ***
## DEBT -5.500e-04 4.211e-04 -1.306 0.195724
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0807 on 71 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4328, Adjusted R-squared: 0.4088
## F-statistic: 18.06 on 3 and 71 DF, p-value: 8.234e-09