MATH1324 Assignment 3

World Happiness Report

XIAN JING WONG S3772149

Last updated: 23 October, 2019

Introduction

Introduction Cont.

Problem Statement

Data

X2017 <- read.csv("world-happiness/2017.csv")

Descriptive Statistics and Visualisation

Wealth_summary <- X2017%>% 
            summarise(
             Min = min(Economy..GDP.per.Capita.,na.rm = TRUE),
             Q1 = quantile(Economy..GDP.per.Capita.,probs = .25,na.rm = TRUE),
             Median = median(Economy..GDP.per.Capita., na.rm = TRUE),
             Q3 = quantile(Economy..GDP.per.Capita.,probs = .75,na.rm = TRUE),
             Max = max(Economy..GDP.per.Capita.,na.rm = TRUE), 
             Mean = mean(Economy..GDP.per.Capita., na.rm = TRUE),
             SD = sd(Economy..GDP.per.Capita., na.rm = TRUE),
             n = n()
             )
knitr::kable(Wealth_summary)
Min Q1 Median Q3 Max Mean SD n
0 0.6633708 1.064578 1.318027 1.870766 0.9847182 0.4207927 155

Decsriptive Statistics Cont.

Health_summary<- X2017%>% 
            summarise(
             Min = min(Health..Life.Expectancy.,na.rm = TRUE),
             Q1 = quantile(Health..Life.Expectancy.,probs = .25,na.rm = TRUE),
             Median = median(Health..Life.Expectancy., na.rm = TRUE),
             Q3 = quantile(Health..Life.Expectancy.,probs = .75,na.rm = TRUE),
             Max = max(Health..Life.Expectancy.,na.rm = TRUE), 
             Mean = mean(Health..Life.Expectancy., na.rm = TRUE),
             SD = sd(Health..Life.Expectancy., na.rm = TRUE),
             n = n())


knitr::kable(Health_summary)
Min Q1 Median Q3 Max Mean SD n
0 0.3698663 0.6060416 0.7230075 0.9494924 0.5513408 0.2370727 155

Decsriptive Statistics Cont.

Boxplot

boxplot(
  X2017$Health..Life.Expectancy.,
  X2017$Economy..GDP.per.Capita.,
  ylab = "Scores Ratings",
  xlab = "Health and Happiness"
  )
axis(1, at = 1:2, labels = c("Health", "Wealth"))

Decsriptive Statistics Cont. (qqplot for Health)

X2017$Health..Life.Expectancy.  %>% qqPlot(dist="norm")

## [1] 139 106

qq plot for wealth

X2017$Economy..GDP.per.Capita.  %>% qqPlot(dist="norm")

## [1] 155  93

Matplot

matplot(t(data.frame(X2017$Health..Life.Expectancy.,X2017$Economy..GDP.per.Capita.)),
type = "b",
  pch = 19,
  col = 1,
  lty = 1,
  xlab = "Comparison",
  ylab = "Score rating",
  xaxt = "n"
  )
axis(1, at = 1:2, labels = c("Health Score", "Wealth Score"))

From the plot above, it can be concluded that the health score is directly proportional to the wealth score.

T-test

t.test(X2017$Health..Life.Expectancy., X2017$Economy..GDP.per.Capita.,
       paired = TRUE,
       alternative = "two.sided")
## 
##  Paired t-test
## 
## data:  X2017$Health..Life.Expectancy. and X2017$Economy..GDP.per.Capita.
## t = -21.153, df = 154, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4738510 -0.3929038
## sample estimates:
## mean of the differences 
##              -0.4333774

From t-test above, it can be concluded that the mean of the difference between health and wealth variable is -0.43.

Linear regression model

Derive a linear regression model to fit the available data as shown below:

b2 <- X2017$Economy..GDP.per.Capita.^2
a2 <- X2017$Health..Life.Expectancy.^2
ab <- X2017$Economy..GDP.per.Capita.*X2017$Health..Life.Expectancy.
sum_a <- sum(X2017$Economy..GDP.per.Capita.)
sum_b <- sum(X2017$Health..Life.Expectancy.)
sum_a_sq <- sum(X2017$Economy..GDP.per.Capita.^2)
sum_b_sq <- sum(X2017$Health..Life.Expectancy.^2)
sum_ab <- sum(X2017$Economy..GDP.per.Capita.*X2017$Health..Life.Expectancy.)
n <- length(X2017$Economy..GDP.per.Capita.) #Sample size

Laa <- sum_a_sq-((sum_a^2)/n)
Lbb <- sum_b_sq-((sum_b^2)/n)
Lab = sum_ab - (((sum_a)*(sum_b))/n)
y = Lab/Laa
x = mean(X2017$Economy..GDP.per.Capita. - y*mean(X2017$Health..Life.Expectancy.))

plot(X2017$Economy..GDP.per.Capita. ~ X2017$Health..Life.Expectancy., 
     data = X2017, alab = "Happiness Score", blab = "Economy Score")

abline(x = x, y = y, col= "Red")
abline(lm(X2017$Economy..GDP.per.Capita. ~ X2017$Health..Life.Expectancy.))

# Linear regression model

model <- lm( X2017$Economy..GDP.per.Capita. ~ X2017$Health..Life.Expectancy., data = X2017)
model %>% summary()
## 
## Call:
## lm(formula = X2017$Economy..GDP.per.Capita. ~ X2017$Health..Life.Expectancy., 
##     data = X2017)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43425 -0.13199 -0.01460  0.09913  0.64848 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.15968    0.04629   3.449 0.000726 ***
## X2017$Health..Life.Expectancy.  1.49642    0.07717  19.391  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.227 on 153 degrees of freedom
## Multiple R-squared:  0.7108, Adjusted R-squared:  0.7089 
## F-statistic:   376 on 1 and 153 DF,  p-value: < 2.2e-16

Hypothesis Testing

pf(q = 376,1,153,lower.tail = FALSE)
## [1] 4.638967e-43

P is less than 0.05, so that the H0 can be rejected and state that this data fits into the linear regression model statistically.

model %>% summary() %>% coef()
##                                 Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)                    0.1596804 0.04629111  3.449484 7.258877e-04
## X2017$Health..Life.Expectancy. 1.4964207 0.07717141 19.390869 4.635065e-43

Hypthesis Testing Cont.

plot(model)

\[H_0: \mu_1 = \mu_2 \]

\[H_A: \mu_1 \ne \mu_2\]

\[S = \sum^n_{i = 1}d^2_i\]

Discussion

References