#this is to set a working directory
setwd("C:\\Users\\melvo\\Documents\\CCMF\\Quantitive methods\\Coursework\\Meat and Happiness")

#this checks the working directory
getwd()
## [1] "C:/Users/melvo/Documents/CCMF/Quantitive methods/Coursework/Meat and Happiness"
#selecting your dataset
meathap <- read.csv("meathappy_plot2017.csv")

Relationship between the meat consumption of each country in 2017 against their happiness scores.

library(ggplot2)
ggplot(meathap, aes(x = meat, y = happiness)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("Meat Consumption kg/cap")+ylab("Happiness Score in Cantril Ladder") + ggtitle("Relationship between meat consumption and happiness 2017")
## `geom_smooth()` using formula 'y ~ x'

  reg1=lm(happiness~meat,meathap)
  coeftest(reg1,vcov=vcovHC)
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 4.7311081  0.1580845 29.9277 < 2.2e-16 ***
## meat        0.0150685  0.0029554  5.0987 1.107e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

GDP

Relationship between GDP per capita (gdppc) against the happiness scores

library(ggplot2)
ggplot(meathap, aes(x = gdppc, y = happiness)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("GDP/cap USD")+ylab("Happiness Score") + ggtitle("Relationship between GDP per capita and happiness 2017")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

reg2=lm(happiness~gdppc,meathap)
  coeftest(reg2,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 5.1365e+00 1.0939e-01  46.954 < 2.2e-16 ***
## gdppc       2.2409e-05 5.8678e-06   3.819 0.0002023 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

However, due to decreasing marginal effect of the happiness outcome as the GDP per capita increases instead log(gdppc) will be used:

library(ggplot2)
ggplot(meathap, aes(x = log(gdppc), y = happiness)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("log(GDP/cap)(log(USD))")+ylab("Happiness Score") + ggtitle("Relationship between log(GDP per capita) and happiness 2017")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

  reg2a=lm(happiness~log(gdppc),meathap)
  coeftest(reg2a,vcov=vcovHC)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 2.419899   0.572576  4.2263 4.311e-05 ***
## log(gdppc)  0.351304   0.066715  5.2658 5.275e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Now using log(gdppc) as a control variable.

  reg3=lm(happiness~meat+log(gdppc),meathap)
  coeftest(reg3,vcov=vcovHC)
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 2.7245866  0.7711490  3.5332 0.0005615 ***
## meat        0.0030575  0.0046892  0.6520 0.5154864    
## log(gdppc)  0.2990311  0.1091662  2.7392 0.0069843 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regions

6 regions: Europe, Africa, Asia & Pacific, South America, North America, Middle East

Used binary variables for each region to run the regression with all of them taken into account. So for example for eu the value is 1 for in europe and 0 for not in europe, and the same for all 6 of eu af apac me na sa

  reg4=lm(happiness~meat+eu+af+apac+me+na,meathap)
  coeftest(reg4,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  5.2496568  0.3388963 15.4905 < 2.2e-16 ***
## meat         0.0102612  0.0037059  2.7689  0.006429 ** 
## eu          -0.2149728  0.2918793 -0.7365  0.462716    
## af          -0.7559756  0.3230240 -2.3403  0.020756 *  
## apac        -0.1506666  0.3306295 -0.4557  0.649352    
## me          -0.0237506  0.3819590 -0.0622  0.950512    
## na           0.1070271  0.3356181  0.3189  0.750306    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Adding log(gdppc) as a control too:

  reg4=lm(happiness~meat+log(gdppc)+eu+af+apac+me+na,meathap)
  coeftest(reg4,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  3.4381852  1.0111228  3.4004 0.0008924 ***
## meat         0.0011048  0.0048216  0.2291 0.8191269    
## log(gdppc)   0.2741746  0.1301484  2.1066 0.0370568 *  
## eu          -0.4623548  0.3009354 -1.5364 0.1268540    
## af          -0.7348681  0.3436026 -2.1387 0.0343142 *  
## apac        -0.2524802  0.3584238 -0.7044 0.4824228    
## me          -0.1829090  0.3779904 -0.4839 0.6292658    
## na          -0.0151453  0.3307707 -0.0458 0.9635490    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

South america is the default category here.

Urbanisation

Urbanpercent is the percentage of the total population of a country who live in an urban setting.

library(ggplot2)
ggplot(meathap, aes(x = urbanpercent, y = happiness)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("Percentage of urbanisation of total population")+ylab("Happiness Score in Cantril Ladder") + ggtitle("Relationship between urbanisation and happiness 2017")
## `geom_smooth()` using formula 'y ~ x'

library(ggplot2)
ggplot(meathap, aes(x = urbanpercent, y = meat)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("Percentage of urbanisation of total population")+ylab("Meat Consumption kg/cap") + ggtitle("Relationship between urbanisation and meat consumption 2017")
## `geom_smooth()` using formula 'y ~ x'

regression of urbanisation’s impact on happiness

  reg5=lm(happiness~urbanpercent,meathap)
  coeftest(reg5,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  4.3432815  0.2917059 14.8892 < 2.2e-16 ***
## urbanpercent 0.0184056  0.0045641  4.0327 9.082e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

regression of urbanisation’s impact on meat consumption

  reg6=lm(meat~urbanpercent,meathap)
  coeftest(reg6,vcov=vcovHC)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -10.858103   4.430669 -2.4507  0.01551 *  
## urbanpercent   0.976342   0.079427 12.2923  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

using urbanisation as a control variable

  reg7=lm(happiness~meat + urbanpercent,meathap)
  coeftest(reg7,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.4668139  0.2870274 15.5623  < 2e-16 ***
## meat         0.0113770  0.0036764  3.0946  0.00239 ** 
## urbanpercent 0.0072978  0.0056542  1.2907  0.19899    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

both log(gdppc) and urbanisation as controls

  reg8=lm(happiness~meat + log(gdppc) + urbanpercent,meathap)
  coeftest(reg8,vcov=vcovHC)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   2.6815620  0.7863098  3.4103 0.0008562 ***
## meat          0.0031959  0.0047724  0.6697 0.5042183    
## log(gdppc)    0.3126308  0.1264809  2.4718 0.0146890 *  
## urbanpercent -0.0013470  0.0068961 -0.1953 0.8454287    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HDI

The human development index takes into account many factors for a country such as standard of living, knowledge, life expectancy

library(ggplot2)
ggplot(meathap, aes(x = HDI, y = happiness)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("HDI")+ylab("Happiness Score in Cantril Ladder") + ggtitle("Relationship between HDI and happiness 2017")
## `geom_smooth()` using formula 'y ~ x'

library(ggplot2)
ggplot(meathap, aes(x = HDI, y = meat)) +
    geom_smooth(method = "lm", se = FALSE) +
    geom_point()+theme_minimal()+
    xlab("HDI")+ylab("Meat Consumption kg/cap") + ggtitle("Relationship between HDI and meat consumption 2017")
## `geom_smooth()` using formula 'y ~ x'

regression of HDI and happiness

  reg9=lm(happiness~HDI,meathap)
  coeftest(reg9,vcov=vcovHC)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  3.07902    0.41082  7.4949 7.242e-12 ***
## HDI          3.28897    0.57725  5.6977 7.048e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

regression of HDI and meat consumption

  reg10=lm(meat~HDI,meathap)
  coeftest(reg10,vcov=vcovHC)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -65.1164     7.0269 -9.2667 3.436e-16 ***
## HDI         156.7815     9.8314 15.9470 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

using hdi as a control

  reg11=lm(happiness~meat + log(gdppc) + HDI,meathap)
  coeftest(reg11,vcov=vcovHC)
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 2.9139639  0.8254903  3.5300 0.0005689 ***
## meat        0.0024587  0.0047539  0.5172 0.6058598    
## log(gdppc)  0.1479905  0.2032424  0.7281 0.4677842    
## HDI         1.5858034  1.6505222  0.9608 0.3383760    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

log(gdppc) and urbanisation and hdi as controls

  reg12=lm(happiness~meat + log(gdppc)+ urbanpercent  + HDI,meathap)
  coeftest(reg12,vcov=vcovHC)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   2.8729870  0.8474960  3.3900 0.0009186 ***
## meat          0.0025905  0.0048375  0.5355 0.5931871    
## log(gdppc)    0.1612179  0.2177280  0.7405 0.4603191    
## urbanpercent -0.0012650  0.0069927 -0.1809 0.8567152    
## HDI           1.5810209  1.6671631  0.9483 0.3446686    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

All controls and regions

Now looking at using log(gdppc) and urbanisation as controls together with the dummy variables for each region. HDI not included due to multicolinearity issues covered below in VIF test.

  reg_final=lm(happiness~meat+log(gdppc)+urbanpercent+eu+af+apac+me+na,meathap)
  coeftest(reg_final,vcov=vcovHC)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.2906650  1.0126624  3.2495 0.001472 **
## meat          0.0015903  0.0048544  0.3276 0.743738   
## log(gdppc)    0.3315532  0.1501815  2.2077 0.029020 * 
## urbanpercent -0.0050643  0.0076356 -0.6632 0.508348   
## eu           -0.5529239  0.3193726 -1.7313 0.085774 . 
## af           -0.7891548  0.3613715 -2.1838 0.030773 * 
## apac         -0.3574374  0.3833506 -0.9324 0.352857   
## me           -0.2206237  0.3859988 -0.5716 0.568603   
## na           -0.0727212  0.3471850 -0.2095 0.834417   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Breusch-Pagan test

Determining whether it is necessary to robust standard errors via a coeftest() with vcov=vcovHC.

bptest(reg_final)
## 
##  studentized Breusch-Pagan test
## 
## data:  reg_final
## BP = 9.6104, df = 8, p-value = 0.2934

The H0 of the Breusch Pagan test is that no heteroscedasticity is present. If the pvalue <0.05 we can reject the null however here that is not the case and therefore we cannot reject the null hypothesis and heteroscedasticity is not present

Multicolinearity

Using vif to test for imperfect multicolinearity between x variables. First a full regression including HDI as a control variable is shown.

  reg_final_HDI=lm(happiness~meat+log(gdppc)+HDI+urbanpercent+eu+af+apac+me+na,meathap)
  coeftest(reg_final_HDI,vcov=vcovHC)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.2783660  1.0272585  3.1914 0.001779 **
## meat          0.0014681  0.0049479  0.2967 0.767164   
## log(gdppc)    0.2686708  0.2250785  1.1937 0.234795   
## HDI           0.7914173  2.2002882  0.3597 0.719669   
## urbanpercent -0.0053223  0.0077316 -0.6884 0.492453   
## eu           -0.5759524  0.3319270 -1.7352 0.085098 . 
## af           -0.7422288  0.4044310 -1.8352 0.068773 . 
## apac         -0.3671534  0.3866063 -0.9497 0.344049   
## me           -0.2208000  0.3882839 -0.5687 0.570578   
## na           -0.0571791  0.3548496 -0.1611 0.872238   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Now demonstrating the multicolinearity problem between HDI and log(gdppc).

vif(reg_final_HDI)
##         meat   log(gdppc)          HDI urbanpercent           eu           af 
##     3.501166    14.498592    15.623224     3.542206     4.520735     4.897289 
##         apac           me           na 
##     2.872000     2.564835     2.129123
vif(reg_final)
##         meat   log(gdppc) urbanpercent           eu           af         apac 
##     3.487242     5.854780     3.510446     4.402332     4.445861     2.861018 
##           me           na 
##     2.564832     2.109459

As log(gdppc) and HDI are both >10 there is a multicolinearity problem between the two. Therefore we removed HDI as a control variable and all x variables have vif<10 now. Hence, why HDI is not included in our final regressions.

Panel Data

Using data from 2010 to 2017 for all variables. However, for certain countries happiness data was not available for every year, hence the need to remove NA values from the panel dataset.

panelmeathap <- read.csv("meathappy_plot.csv", as.is=TRUE, na.strings=c(NA, "NA", " NA"))

removing the NA values from the panel data

panelmeathap2 <- panelmeathap[complete.cases(panelmeathap[ ,c("meat", "happiness", "gdppc", "urbanpercent", "HDI", "wworkpercent")]), ]

removing countries with less than 3 years worth of data in order to have a time series was at least 3 years for all countries in the panel regression

panelmeathap3 = panelmeathap2[panelmeathap2$name != "Central African Republic" & panelmeathap2$name != "Jamaica" & panelmeathap2$name != "Lao People's Democratic Republic" & panelmeathap2$name != "Lesotho" & panelmeathap2$name != "Oman" & panelmeathap2$name != "Mozambique" & panelmeathap2$name != "Sri Lanka" & panelmeathap2$name != "Trinidad and Tobago"& panelmeathap2$name != "Gambia"& panelmeathap2$name != "Iceland"& panelmeathap2$name != "Morocco"& panelmeathap2$name != "Namibia"& panelmeathap2$name != "Nigeria", ]

Fixed effects panel regression

The ordinary least squares (ols) regression used to compare the fixed effects plm regression to in the pFtest.

#OLS regression
ols=lm(happiness~meat+log(gdppc)+urbanpercent+eu+af+apac+me+na,panelmeathap3)
summary(ols)
## 
## Call:
## lm(formula = happiness ~ meat + log(gdppc) + urbanpercent + eu + 
##     af + apac + me + na, data = panelmeathap3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.72734 -0.41828  0.01015  0.44564  3.06278 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.048315   0.232832   4.502 7.54e-06 ***
## meat          0.001991   0.001409   1.413  0.15806    
## log(gdppc)    0.597172   0.036789  16.232  < 2e-16 ***
## urbanpercent -0.005207   0.001817  -2.867  0.00424 ** 
## eu           -0.764378   0.092149  -8.295 3.63e-16 ***
## af           -0.856208   0.098869  -8.660  < 2e-16 ***
## apac         -0.528375   0.102489  -5.155 3.07e-07 ***
## me           -0.461010   0.100300  -4.596 4.87e-06 ***
## na            0.154802   0.108681   1.424  0.15466    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.685 on 964 degrees of freedom
## Multiple R-squared:  0.638,  Adjusted R-squared:  0.635 
## F-statistic: 212.4 on 8 and 964 DF,  p-value: < 2.2e-16
#renaming a column heading
colnames(panelmeathap3)[1] <- gsub('^...','',colnames(panelmeathap3)[1])

fixed effects plm:

fixed <- plm(happiness~meat+log(gdppc)+urbanpercent, data=panelmeathap3,
    index=c("location","year"),
    model="within", effect="twoways")

coeftest(fixed,vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)  
## meat         0.0108945  0.0043433  2.5083  0.01232 *
## log(gdppc)   0.2270784  0.2005704  1.1322  0.25789  
## urbanpercent 0.0037451  0.0036471  1.0269  0.30477  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

test to demonstrate that a fixed effects plm is more effective than OLS for panel data

pFtest(fixed, ols)
## 
##  F test for twoways effects
## 
## data:  happiness ~ meat + log(gdppc) + urbanpercent
## F = 6.1013, df1 = 128, df2 = 836, p-value < 2.2e-16
## alternative hypothesis: significant effects

pFtest is a test that shows a p values <0.05 showing that the fixed effects model is a much better choice.

Hausman Test

Using the Hausman test to determine whether to used a fixed or random effects plm.

random <- plm(happiness~meat+log(gdppc)+urbanpercent, data=panelmeathap3,
    index=c("location","year"),
    model="random")

coeftest(random, vcov=vcovHC)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  1.2364657  0.3879970  3.1868  0.001485 ** 
## meat         0.0066635  0.0030271  2.2013  0.027953 *  
## log(gdppc)   0.4220301  0.0638544  6.6093 6.367e-11 ***
## urbanpercent 0.0034853  0.0029692  1.1738  0.240757    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Hausman test
phtest(fixed, random)
## 
##  Hausman Test
## 
## data:  happiness ~ meat + log(gdppc) + urbanpercent
## chisq = 2.9657, df = 3, p-value = 0.3969
## alternative hypothesis: one model is inconsistent

As the p-value of the hausman test is <0.05 we can use the fixed effects model.

Dickey-Fuller Test

Using a Dickey-Fuller test to test for stationarity of the time series data.

panel.set <- pdata.frame(panelmeathap3, index = c("location", "year"))
adf.test(panel.set$happiness, k=2)
## Warning in adf.test(panel.set$happiness, k = 2): p-value smaller than printed p-
## value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  panel.set$happiness
## Dickey-Fuller = -9.324, Lag order = 2, p-value = 0.01
## alternative hypothesis: stationary

P-value is < 0.05 and therefore the data is stationary and it is not necessary to first difference the time series.