Lab 2

Author

Vladyslava Bondarenko

happiness <- read.csv('C:/Users/User/Desktop/study/2021.csv')
 my_hp <- subset(happiness, select = c('Ladder.score', 'Logged.GDP.per.capita', 'Freedom.to.make.life.choices', 'Generosity'))

Матрична діаграма розсіювання

plot(my_hp)

plot(my_hp$Ladder.score ~ my_hp$Logged.GDP.per.capita, col='brown2')
abline(lm(my_hp$Ladder.score ~ my_hp$Logged.GDP.per.capita))

plot(my_hp$Ladder.score ~ my_hp$Freedom.to.make.life.choices)


plotfuncLow <- function(data,mapping){
  p <- ggplot(data = data,mapping=mapping)+geom_point(shape=21,color="black",fill="grey50")+geom_smooth(method="lm",color="red4",fill="red2",alpha=0.3)+theme_bw()
  p
}

plotfuncmid <- function(data,mapping){
  p <- ggplot(data = data,mapping=mapping)+geom_density(alpha=0.5,color="black",fill="red")+theme_bw()
  p
}

library(GGally)

Warning: package 'GGally' was built under R version 4.2.1

Loading required package: ggplot2

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

ggpairs(my_hp,lower=list(continuous=plotfuncLow),diag=list(continuous=plotfuncmid))

`geom_smooth()` using formula 'y ~ x'

`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'

Кореляційна матриця

cor(my_hp)

                             Ladder.score Logged.GDP.per.capita
Ladder.score                   1.00000000             0.7897597
Logged.GDP.per.capita          0.78975970             1.0000000
Freedom.to.make.life.choices   0.60775307             0.4323235
Generosity                    -0.01779928            -0.1992864
                             Freedom.to.make.life.choices  Generosity
Ladder.score                                    0.6077531 -0.01779928
Logged.GDP.per.capita                           0.4323235 -0.19928640
Freedom.to.make.life.choices                    1.0000000  0.16943737
Generosity                                      0.1694374  1.00000000

m_cor <- cor(my_hp)
library(corrplot)

Warning: package 'corrplot' was built under R version 4.2.1

corrplot 0.92 loaded

corrplot(m_cor, type = "upper", order = "hclust", col=c("black", "white"),
         bg="lightblue")

corrplot(m_cor, order = "hclust", method = 'number')

library(qgraph)

Warning: package 'qgraph' was built under R version 4.2.1

qgraph(m_cor)

library("ggpubr")

Warning: package 'ggpubr' was built under R version 4.2.1

ggqqplot(my_hp$Ladder.score, ylab = "ladder score")

shapiro.test(my_hp$Ladder.score)


    Shapiro-Wilk normality test

data:  my_hp$Ladder.score
W = 0.99125, p-value = 0.4893

shapiro.test(my_hp$Logged.GDP.per.capita)


    Shapiro-Wilk normality test

data:  my_hp$Logged.GDP.per.capita
W = 0.96503, p-value = 0.0007689

shapiro.test(my_hp$Freedom.to.make.life.choices)


    Shapiro-Wilk normality test

data:  my_hp$Freedom.to.make.life.choices
W = 0.95454, p-value = 8.432e-05

shapiro.test(my_hp$Generosity)


    Shapiro-Wilk normality test

data:  my_hp$Generosity
W = 0.94303, p-value = 9.617e-06

#If the p-value is < 5%, then the correlation between x and y is significant
#Pearson correlation (r), which measures a linear dependence between two variables (x and y). It’s also known as a parametric correlation test because it depends to the distribution of the data. It can be used only when x and y are from normal distribution. The plot of y = f(x) is named the linear regression curve.

#Kendall tau and Spearman rho, which are rank-based correlation coefficients (non-parametric)
cor.test(my_hp$Ladder.score, my_hp$Logged.GDP.per.capita,  method="pearson")


    Pearson's product-moment correlation

data:  my_hp$Ladder.score and my_hp$Logged.GDP.per.capita
t = 15.61, df = 147, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7204524 0.8434475
sample estimates:
      cor 
0.7897597

#the p-value of the test is 1.29410^{-10}, which is less than the significance level alpha = 0.05. We can conclude that wt and mpg are significantly correlated with a correlation coefficient of -0.87 and p-value of 1.29410^{-10} .
cor.test(my_hp$Ladder.score, my_hp$Logged.GDP.per.capita,  method="spearman")

Warning in cor.test.default(my_hp$Ladder.score, my_hp$Logged.GDP.per.capita, :
Cannot compute exact p-value with ties


    Spearman's rank correlation rho

data:  my_hp$Ladder.score and my_hp$Logged.GDP.per.capita
S = 105074, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.8094072

cor.test(my_hp$Ladder.score, my_hp$Freedom.to.make.life.choices,  method="spearman")

Warning in cor.test.default(my_hp$Ladder.score,
my_hp$Freedom.to.make.life.choices, : Cannot compute exact p-value with ties


    Spearman's rank correlation rho

data:  my_hp$Ladder.score and my_hp$Freedom.to.make.life.choices
S = 216613, p-value = 2.26e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
     rho 
0.607086

cor.test(my_hp$Ladder.score, my_hp$Generosity,  method="spearman")

Warning in cor.test.default(my_hp$Ladder.score, my_hp$Generosity, method =
"spearman"): Cannot compute exact p-value with ties


    Spearman's rank correlation rho

data:  my_hp$Ladder.score and my_hp$Generosity
S = 555514, p-value = 0.9263
alternative hypothesis: true rho is not equal to 0
sample estimates:
         rho 
-0.007643045