FIT 3152 Assignment 1

Student ID: 33368066

Set Working Directory

getwd()

## [1] "/Users/deslyn/Desktop/Monash/FIT 3152 /Assignment 1"

setwd("~/Desktop/Monash/FIT 3152 /Assignment 1")

Read The File of The Data

rm(list = ls())
set.seed(33368066) # Your Student Number
VCData = read.csv("WVSExtract.csv")
VC = VCData[sample(1:nrow(VCData),50000, replace=FALSE),]
VC = VC[,c(1:6, sort(sample(7:46,17, replace = FALSE)), 47:53,
           sort(sample(54:69,10, replace = FALSE)))]

install and use libraries

#install.packages("dplyr")
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#install.packages("tidyr")
library(tidyr)

#install.packages("ggplot2")
library(ggplot2)


#install.packages("factoextra")
library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Describe Overall Data

Show attributes

colnames(VC)

##  [1] "Country"        "TPeople"        "TFamily"        "TNeighbourhood"
##  [5] "TKnow"          "TMeet"          "VFamily"        "VFriends"      
##  [9] "VLeisure"       "VReligion"      "HSatFin"        "HMedicine"     
## [13] "EPrivate"       "ECompetition"   "EHardWork"      "PIAB"          
## [17] "STOpportunity"  "STFaith"        "STImportant"    "PNewspaper"    
## [21] "PMobile"        "PEmail"         "PSocial"        "PDemImp"       
## [25] "PDemCurrent"    "PSatisfied"     "MF"             "Age"           
## [29] "Edu"            "Employment"     "CReligious"     "CPress"        
## [33] "CTelevision"    "CUnions"        "CCourts"        "CPParties"     
## [37] "CParliament"    "CCivilService"  "CElections"     "CEnvOrg"

Show number of rows and columns

dim(VC)

## [1] 50000    40

Show data types

str(VC)

## 'data.frame':    50000 obs. of  40 variables:
##  $ Country       : chr  "NLD" "TUN" "PHL" "SGP" ...
##  $ TPeople       : int  1 2 2 2 1 2 2 2 2 2 ...
##  $ TFamily       : int  1 3 1 1 1 1 1 1 1 1 ...
##  $ TNeighbourhood: int  1 4 2 2 2 2 2 1 3 2 ...
##  $ TKnow         : int  1 2 1 2 2 2 2 1 2 2 ...
##  $ TMeet         : int  2 4 3 3 3 2 3 4 4 3 ...
##  $ VFamily       : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ VFriends      : int  1 2 1 2 2 2 2 1 2 2 ...
##  $ VLeisure      : int  1 1 3 2 2 2 2 1 2 2 ...
##  $ VReligion     : int  4 2 2 2 1 2 3 1 3 1 ...
##  $ HSatFin       : int  10 10 9 10 7 10 4 7 6 7 ...
##  $ HMedicine     : int  4 4 3 4 4 4 3 4 4 3 ...
##  $ EPrivate      : int  7 3 6 6 7 1 5 10 1 2 ...
##  $ ECompetition  : int  8 1 7 5 8 1 -1 1 1 1 ...
##  $ EHardWork     : int  7 1 5 6 3 5 3 10 1 8 ...
##  $ PIAB          : int  4 4 4 2 4 2 3 1 4 4 ...
##  $ STOpportunity : int  5 8 7 10 2 10 5 5 10 9 ...
##  $ STFaith       : int  1 8 6 6 5 4 5 5 4 8 ...
##  $ STImportant   : int  1 1 4 6 6 2 4 5 3 1 ...
##  $ PNewspaper    : int  1 5 3 2 4 2 4 3 5 5 ...
##  $ PMobile       : int  1 5 1 1 1 1 1 3 1 1 ...
##  $ PEmail        : int  1 5 2 2 2 5 4 5 1 5 ...
##  $ PSocial       : int  3 1 1 1 1 1 1 1 1 5 ...
##  $ PDemImp       : int  10 10 7 10 8 10 9 10 8 10 ...
##  $ PDemCurrent   : int  7 4 7 10 8 9 5 10 5 3 ...
##  $ PSatisfied    : int  6 5 8 7 8 8 5 5 3 3 ...
##  $ MF            : int  2 2 2 2 2 2 2 1 2 2 ...
##  $ Age           : int  67 18 18 71 36 31 23 37 30 23 ...
##  $ Edu           : int  6 3 3 1 6 4 2 2 6 2 ...
##  $ Employment    : int  2 6 7 5 5 3 5 1 3 5 ...
##  $ CReligious    : int  4 4 1 2 1 2 3 3 4 3 ...
##  $ CPress        : int  2 4 1 2 2 2 -1 4 3 2 ...
##  $ CTelevision   : int  3 4 3 2 2 2 3 4 3 2 ...
##  $ CUnions       : int  1 4 2 2 2 2 3 3 3 2 ...
##  $ CCourts       : int  2 4 1 2 1 2 3 3 4 1 ...
##  $ CPParties     : int  3 4 2 2 2 2 3 3 4 2 ...
##  $ CParliament   : int  2 4 1 2 2 2 3 3 4 2 ...
##  $ CCivilService : int  2 4 1 2 2 2 3 3 3 1 ...
##  $ CElections    : int  2 4 1 2 2 2 2 2 2 3 ...
##  $ CEnvOrg       : int  1 4 1 2 2 2 2 2 -1 2 ...

Show head of data

head(VC)

##       Country TPeople TFamily TNeighbourhood TKnow TMeet VFamily VFriends
## 39129     NLD       1       1              1     1     2       1        1
## 17644     TUN       2       3              4     2     4       1        2
## 78258     PHL       2       1              2     1     3       1        1
## 56142     SGP       2       1              2     2     3       1        2
## 31553     HKG       1       1              2     2     3       1        2
## 4791      VNM       2       1              2     2     2       1        2
##       VLeisure VReligion HSatFin HMedicine EPrivate ECompetition EHardWork PIAB
## 39129        1         4      10         4        7            8         7    4
## 17644        1         2      10         4        3            1         1    4
## 78258        3         2       9         3        6            7         5    4
## 56142        2         2      10         4        6            5         6    2
## 31553        2         1       7         4        7            8         3    4
## 4791         2         2      10         4        1            1         5    2
##       STOpportunity STFaith STImportant PNewspaper PMobile PEmail PSocial
## 39129             5       1           1          1       1      1       3
## 17644             8       8           1          5       5      5       1
## 78258             7       6           4          3       1      2       1
## 56142            10       6           6          2       1      2       1
## 31553             2       5           6          4       1      2       1
## 4791             10       4           2          2       1      5       1
##       PDemImp PDemCurrent PSatisfied MF Age Edu Employment CReligious CPress
## 39129      10           7          6  2  67   6          2          4      2
## 17644      10           4          5  2  18   3          6          4      4
## 78258       7           7          8  2  18   3          7          1      1
## 56142      10          10          7  2  71   1          5          2      2
## 31553       8           8          8  2  36   6          5          1      2
## 4791       10           9          8  2  31   4          3          2      2
##       CTelevision CUnions CCourts CPParties CParliament CCivilService
## 39129           3       1       2         3           2             2
## 17644           4       4       4         4           4             4
## 78258           3       2       1         2           1             1
## 56142           2       2       2         2           2             2
## 31553           2       2       1         2           2             2
## 4791            2       2       2         2           2             2
##       CElections CEnvOrg
## 39129          2       1
## 17644          4       4
## 78258          1       1
## 56142          2       2
## 31553          2       2
## 4791           2       2

Treat -ve numbers as NA

VC[VC == -1 | VC == -2 | VC == -3 | VC == -4 | VC == -5] <- NA
#View(VC)
any(VC < 0, na.rm = TRUE)

## [1] FALSE

colSums(is.na(VC))

##        Country        TPeople        TFamily TNeighbourhood          TKnow 
##              0            642            160            391            288 
##          TMeet        VFamily       VFriends       VLeisure      VReligion 
##            669             77            160            269            459 
##        HSatFin      HMedicine       EPrivate   ECompetition      EHardWork 
##            301            303           1741            809            701 
##           PIAB  STOpportunity        STFaith    STImportant     PNewspaper 
##           2515           1432           1934           1776            472 
##        PMobile         PEmail        PSocial        PDemImp    PDemCurrent 
##            497           1005           1888            926           1368 
##     PSatisfied             MF            Age            Edu     Employment 
##           1806             42            269            520            622 
##     CReligious         CPress    CTelevision        CUnions        CCourts 
##           1017           1117            739           3581           1769 
##      CPParties    CParliament  CCivilService     CElections        CEnvOrg 
##           1650           1739           1666           1959           3257

Show summary of data

summary(VC)

##    Country             TPeople         TFamily     TNeighbourhood 
##  Length:50000       Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  Class :character   1st Qu.:2.000   1st Qu.:1.00   1st Qu.:2.000  
##  Mode  :character   Median :2.000   Median :1.00   Median :2.000  
##                     Mean   :1.757   Mean   :1.27   Mean   :2.182  
##                     3rd Qu.:2.000   3rd Qu.:1.00   3rd Qu.:3.000  
##                     Max.   :2.000   Max.   :4.00   Max.   :4.000  
##                     NA's   :642     NA's   :160    NA's   :391    
##      TKnow           TMeet          VFamily         VFriends    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :3.000   Median :1.000   Median :2.000  
##  Mean   :2.066   Mean   :3.012   Mean   :1.114   Mean   :1.712  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :288     NA's   :669     NA's   :77      NA's   :160    
##     VLeisure       VReligion        HSatFin         HMedicine    
##  Min.   :1.000   Min.   :1.000   Min.   : 1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 5.000   1st Qu.:3.000  
##  Median :2.000   Median :2.000   Median : 6.000   Median :4.000  
##  Mean   :1.785   Mean   :1.984   Mean   : 6.208   Mean   :3.348  
##  3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.: 8.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :10.000   Max.   :4.000  
##  NA's   :269     NA's   :459     NA's   :301      NA's   :303    
##     EPrivate       ECompetition      EHardWork           PIAB      
##  Min.   : 1.000   Min.   : 1.000   Min.   : 1.000   Min.   :1.000  
##  1st Qu.: 4.000   1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.:1.000  
##  Median : 5.000   Median : 4.000   Median : 4.000   Median :2.000  
##  Mean   : 5.658   Mean   : 4.069   Mean   : 4.455   Mean   :2.411  
##  3rd Qu.: 8.000   3rd Qu.: 6.000   3rd Qu.: 7.000   3rd Qu.:3.000  
##  Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :4.000  
##  NA's   :1741     NA's   :809      NA's   :701      NA's   :2515   
##  STOpportunity       STFaith        STImportant       PNewspaper   
##  Min.   : 1.000   Min.   : 1.000   Min.   : 1.000   Min.   :1.000  
##  1st Qu.: 6.000   1st Qu.: 3.000   1st Qu.: 2.000   1st Qu.:2.000  
##  Median : 8.000   Median : 5.000   Median : 5.000   Median :4.000  
##  Mean   : 7.556   Mean   : 5.531   Mean   : 4.605   Mean   :3.374  
##  3rd Qu.:10.000   3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:5.000  
##  Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :5.000  
##  NA's   :1432     NA's   :1934     NA's   :1776     NA's   :472    
##     PMobile          PEmail         PSocial         PDemImp      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 7.000  
##  Median :1.000   Median :4.000   Median :2.000   Median : 9.000  
##  Mean   :2.421   Mean   :3.514   Mean   :2.672   Mean   : 8.367  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:10.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :10.000  
##  NA's   :497     NA's   :1005    NA's   :1888    NA's   :926     
##   PDemCurrent       PSatisfied           MF             Age       
##  Min.   : 1.000   Min.   : 1.000   Min.   :1.000   Min.   : 16.0  
##  1st Qu.: 5.000   1st Qu.: 3.000   1st Qu.:1.000   1st Qu.: 29.0  
##  Median : 6.000   Median : 5.000   Median :2.000   Median : 41.0  
##  Mean   : 6.169   Mean   : 5.342   Mean   :1.526   Mean   : 43.1  
##  3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:2.000   3rd Qu.: 55.0  
##  Max.   :10.000   Max.   :10.000   Max.   :2.000   Max.   :100.0  
##  NA's   :1368     NA's   :1806     NA's   :42      NA's   :269    
##       Edu          Employment      CReligious        CPress     
##  Min.   :0.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :2.000   Median :3.000  
##  Mean   :3.547   Mean   :3.129   Mean   :2.192   Mean   :2.704  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :8.000   Max.   :8.000   Max.   :4.000   Max.   :4.000  
##  NA's   :520     NA's   :622     NA's   :1017    NA's   :1117   
##   CTelevision       CUnions         CCourts        CPParties    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :2.000   Median :3.000  
##  Mean   :2.628   Mean   :2.704   Mean   :2.413   Mean   :2.963  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :739     NA's   :3581    NA's   :1769    NA's   :1650   
##   CParliament    CCivilService    CElections       CEnvOrg    
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.00  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:2.000   1st Qu.:2.00  
##  Median :3.000   Median :3.00   Median :3.000   Median :2.00  
##  Mean   :2.792   Mean   :2.56   Mean   :2.594   Mean   :2.38  
##  3rd Qu.:4.000   3rd Qu.:3.00   3rd Qu.:3.000   3rd Qu.:3.00  
##  Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.00  
##  NA's   :1739    NA's   :1666   NA's   :1959    NA's   :3257

Q1 Charts

trust_long <- VC %>%
  select(TPeople, TFamily, TNeighbourhood, TKnow, TMeet) %>%
  pivot_longer(cols = everything(), names_to = "Trust_Variable", values_to = "Response")

ggplot(trust_long, aes(x = factor(Response))) +
  geom_bar(fill = "#66CCFF", color = "black") +
  facet_wrap(~Trust_Variable, ncol = 3, scales = "free_y") +
  xlab("Response") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Trust Variables") +
  theme_minimal()

importance_long <- VC %>%
  select(VFamily, VFriends, VLeisure, VReligion) %>%
  pivot_longer(cols = everything(), names_to = "Value_Type", values_to = "Response")

ggplot(importance_long, aes(x = factor(Response))) +
  geom_bar(fill = "#66CCFF", color = "black") +
  facet_wrap(~Value_Type, ncol = 2) +
  xlab("Importance Rating") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Importance in Life") +
  theme_minimal() +
  theme(panel.spacing.y = unit(2, "lines"))

ggplot(VC, aes(x = HSatFin)) +
  geom_histogram(binwidth = 1, fill = "#66CCFF", color = "black") +
  xlab("Financial Satisfaction (1 = Not at all, 10 = Completely)") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Financial Satisfaction") +
  theme_minimal()

## Warning: Removed 301 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(VC, aes(x = factor(HMedicine))) +
  geom_bar(fill = "#66CCFF", color = "black") +
  xlab("Response (1 = Often, 4 = Never)") +
  ylab("Number of Respondents") +
  ggtitle("Access to Medicine in the Last 12 Months") +
  theme_minimal()

econ_long <- VC %>%
  select(EPrivate, ECompetition, EHardWork) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response")

ggplot(econ_long, aes(x = Response)) +
  geom_histogram(binwidth = 1, fill = "#66CCFF", color = "black", boundary = 1, closed = "left") +
  facet_wrap(~Variable, ncol = 1) +
  scale_x_continuous(breaks = 1:10, limits = c(1, 10)) +
  xlab("Response Scale (1 = Collectivist View, 10 = Individualist View)") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Economic Values") +
  theme_minimal() +
  theme(panel.spacing.y = unit(2, "lines"))

## Warning: Removed 3251 rows containing non-finite outside the scale range
## (`stat_bin()`).

st_long <- VC %>%
  select(STOpportunity, STFaith, STImportant) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response")

ggplot(st_long, aes(x = Response)) +
  geom_histogram(binwidth = 1, fill = "#66CCFF", color = "black", boundary = 0.5, closed = "left") +
  facet_wrap(~Variable, ncol = 1) +
  scale_x_continuous(breaks = 1:10, limits = c(1, 10)) +
  xlab("Rating (1 = Not at all, 10 = Completely)") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Democratic Values and Satisfaction") +
  theme_minimal() +
  theme(panel.spacing.y = unit(2, "lines"))

## Warning: Removed 5142 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_bar()`).

pol_long <- VC %>%
  select(PDemImp, PDemCurrent, PSatisfied) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response")


ggplot(pol_long, aes(x = Response)) +
  geom_histogram(binwidth = 1, fill = "#66CCFF", color = "black", boundary = 0.5, closed = "left") +
  facet_wrap(~Variable, ncol = 1) +
  scale_x_continuous(breaks = 1:10, limits = c(1, 10)) +
  xlab("Rating (1 = Not at all, 10 = Completely)") +
  ylab("Number of Respondents") +
  ggtitle("Distribution of Democratic Values and Satisfaction") +
  theme_minimal() +
  theme(panel.spacing.y = unit(2, "lines"))

## Warning: Removed 4100 rows containing non-finite outside the scale range (`stat_bin()`).
## Removed 6 rows containing missing values or values outside the scale range
## (`geom_bar()`).

confidence_data <- VC %>%
  select(CReligious, CPress, CTelevision, CUnions, CCourts, CPParties, CParliament, CCivilService, 
         CElections, CEnvOrg) %>%
  pivot_longer(cols = everything(), names_to = "Organisation", values_to = "Response") %>%
  filter(!is.na(Response))

# Plot: Each facet is a separate organisation showing response distribution
ggplot(confidence_data, aes(x = factor(Response))) +
  geom_bar(fill = "#66CCFF", color = "black") +
  facet_wrap(~Organisation, ncol = 2) +
  labs(title = "Confidence in Social Organisations",
       x = "Response (1 = A great deal, 4 = None at all)",
       y = "Number of Respondents") +
  theme_minimal()

Q2

Separate into two datasets for focus country vs other countries

MalaysiaData <- VC %>% filter(Country == "MYS")
OtherCountriesData <- VC %>% filter(Country != "MYS")

Combine both data into a single dataset

CombinedData <- bind_rows(MalaysiaData, OtherCountriesData)

Show summary for Malaysia and Other Countries

summary(MalaysiaData)

##    Country             TPeople         TFamily      TNeighbourhood
##  Length:675         Min.   :1.000   Min.   :1.000   Min.   :1.0   
##  Class :character   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:2.0   
##  Mode  :character   Median :2.000   Median :1.000   Median :2.0   
##                     Mean   :1.812   Mean   :1.289   Mean   :2.2   
##                     3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:3.0   
##                     Max.   :2.000   Max.   :3.000   Max.   :4.0   
##                                                                   
##      TKnow           TMeet          VFamily        VFriends        VLeisure    
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:3.000   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :3.000   Median :1.00   Median :2.000   Median :2.000  
##  Mean   :1.993   Mean   :3.139   Mean   :1.04   Mean   :1.726   Mean   :1.652  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.00   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.000  
##                                                                                
##    VReligion        HSatFin         HMedicine        EPrivate     
##  Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.: 5.000   1st Qu.:3.000   1st Qu.: 4.000  
##  Median :1.000   Median : 6.000   Median :4.000   Median : 5.000  
##  Mean   :1.401   Mean   : 6.157   Mean   :3.387   Mean   : 5.348  
##  3rd Qu.:2.000   3rd Qu.: 8.000   3rd Qu.:4.000   3rd Qu.: 7.000  
##  Max.   :4.000   Max.   :10.000   Max.   :4.000   Max.   :10.000  
##                                                                   
##   ECompetition      EHardWork           PIAB       STOpportunity   
##  Min.   : 1.000   Min.   : 1.000   Min.   :1.000   Min.   : 1.000  
##  1st Qu.: 2.000   1st Qu.: 3.000   1st Qu.:1.000   1st Qu.: 6.000  
##  Median : 4.000   Median : 5.000   Median :2.000   Median : 8.000  
##  Mean   : 4.089   Mean   : 4.679   Mean   :2.247   Mean   : 7.477  
##  3rd Qu.: 6.000   3rd Qu.: 7.000   3rd Qu.:3.000   3rd Qu.: 9.000  
##  Max.   :10.000   Max.   :10.000   Max.   :4.000   Max.   :10.000  
##                                                                    
##     STFaith        STImportant       PNewspaper       PMobile    
##  Min.   : 1.000   Min.   : 1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.: 5.000   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:1.00  
##  Median : 6.000   Median : 4.000   Median :2.000   Median :1.00  
##  Mean   : 6.194   Mean   : 4.459   Mean   :2.415   Mean   :1.57  
##  3rd Qu.: 8.000   3rd Qu.: 6.000   3rd Qu.:4.000   3rd Qu.:2.00  
##  Max.   :10.000   Max.   :10.000   Max.   :5.000   Max.   :5.00  
##                                                                  
##      PEmail         PSocial         PDemImp        PDemCurrent    
##  Min.   :1.000   Min.   :1.000   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 7.000   1st Qu.: 4.000  
##  Median :2.000   Median :1.000   Median : 9.000   Median : 6.000  
##  Mean   :2.535   Mean   :1.684   Mean   : 8.216   Mean   : 5.956  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:10.000   3rd Qu.: 8.000  
##  Max.   :5.000   Max.   :5.000   Max.   :10.000   Max.   :10.000  
##                                                                   
##    PSatisfied           MF             Age             Edu       
##  Min.   : 1.000   Min.   :1.000   Min.   :18.00   Min.   :0.000  
##  1st Qu.: 3.000   1st Qu.:1.000   1st Qu.:27.00   1st Qu.:2.000  
##  Median : 5.000   Median :1.000   Median :35.00   Median :2.000  
##  Mean   : 5.179   Mean   :1.483   Mean   :38.55   Mean   :3.366  
##  3rd Qu.: 7.000   3rd Qu.:2.000   3rd Qu.:50.00   3rd Qu.:5.000  
##  Max.   :10.000   Max.   :2.000   Max.   :79.00   Max.   :8.000  
##                                                                  
##    Employment      CReligious        CPress       CTelevision   
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :1.000   Median :2.000   Median :3.000   Median :3.000  
##  Mean   :2.443   Mean   :1.794   Mean   :2.667   Mean   :2.631  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :8.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##                  NA's   :1       NA's   :2       NA's   :1      
##     CUnions         CCourts       CPParties      CParliament    CCivilService  
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.00   Median :3.000   Median :3.000   Median :2.000  
##  Mean   :2.366   Mean   :2.24   Mean   :2.835   Mean   :2.657   Mean   :2.355  
##  3rd Qu.:3.000   3rd Qu.:3.00   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :1       NA's   :1      NA's   :1       NA's   :2       NA's   :2      
##    CElections       CEnvOrg     
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :2.000  
##  Mean   :2.654   Mean   :2.076  
##  3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000  
##  NA's   :1       NA's   :2

Show summary for Other Countries

summary(OtherCountriesData)

##    Country             TPeople         TFamily     TNeighbourhood 
##  Length:49325       Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  Class :character   1st Qu.:2.000   1st Qu.:1.00   1st Qu.:2.000  
##  Mode  :character   Median :2.000   Median :1.00   Median :2.000  
##                     Mean   :1.756   Mean   :1.27   Mean   :2.182  
##                     3rd Qu.:2.000   3rd Qu.:1.00   3rd Qu.:3.000  
##                     Max.   :2.000   Max.   :4.00   Max.   :4.000  
##                     NA's   :642     NA's   :160    NA's   :391    
##      TKnow           TMeet         VFamily         VFriends        VLeisure    
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :3.00   Median :1.000   Median :2.000   Median :2.000  
##  Mean   :2.067   Mean   :3.01   Mean   :1.115   Mean   :1.712   Mean   :1.787  
##  3rd Qu.:2.000   3rd Qu.:4.00   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :288     NA's   :669    NA's   :77      NA's   :160     NA's   :269    
##    VReligion        HSatFin         HMedicine        EPrivate     
##  Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   : 1.000  
##  1st Qu.:1.000   1st Qu.: 5.000   1st Qu.:3.000   1st Qu.: 4.000  
##  Median :2.000   Median : 6.000   Median :4.000   Median : 5.000  
##  Mean   :1.992   Mean   : 6.208   Mean   :3.347   Mean   : 5.662  
##  3rd Qu.:3.000   3rd Qu.: 8.000   3rd Qu.:4.000   3rd Qu.: 8.000  
##  Max.   :4.000   Max.   :10.000   Max.   :4.000   Max.   :10.000  
##  NA's   :459     NA's   :301      NA's   :303     NA's   :1741    
##   ECompetition      EHardWork           PIAB       STOpportunity   
##  Min.   : 1.000   Min.   : 1.000   Min.   :1.000   Min.   : 1.000  
##  1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.: 6.000  
##  Median : 4.000   Median : 4.000   Median :2.000   Median : 8.000  
##  Mean   : 4.069   Mean   : 4.452   Mean   :2.413   Mean   : 7.557  
##  3rd Qu.: 6.000   3rd Qu.: 7.000   3rd Qu.:3.000   3rd Qu.:10.000  
##  Max.   :10.000   Max.   :10.000   Max.   :4.000   Max.   :10.000  
##  NA's   :809      NA's   :701      NA's   :2515    NA's   :1432    
##     STFaith        STImportant       PNewspaper       PMobile     
##  Min.   : 1.000   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 3.000   1st Qu.: 2.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 5.000   Median : 5.000   Median :4.000   Median :1.000  
##  Mean   : 5.521   Mean   : 4.607   Mean   :3.387   Mean   :2.433  
##  3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :10.000   Max.   :10.000   Max.   :5.000   Max.   :5.000  
##  NA's   :1934     NA's   :1776     NA's   :472     NA's   :497    
##      PEmail         PSocial         PDemImp        PDemCurrent    
##  Min.   :1.000   Min.   :1.000   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 7.000   1st Qu.: 5.000  
##  Median :4.000   Median :2.000   Median : 9.000   Median : 6.000  
##  Mean   :3.528   Mean   :2.686   Mean   : 8.369   Mean   : 6.172  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:10.000   3rd Qu.: 8.000  
##  Max.   :5.000   Max.   :5.000   Max.   :10.000   Max.   :10.000  
##  NA's   :1005    NA's   :1888    NA's   :926      NA's   :1368    
##    PSatisfied           MF             Age              Edu      
##  Min.   : 1.000   Min.   :1.000   Min.   : 16.00   Min.   :0.00  
##  1st Qu.: 3.000   1st Qu.:1.000   1st Qu.: 29.00   1st Qu.:2.00  
##  Median : 5.000   Median :2.000   Median : 41.00   Median :3.00  
##  Mean   : 5.344   Mean   :1.527   Mean   : 43.16   Mean   :3.55  
##  3rd Qu.: 7.000   3rd Qu.:2.000   3rd Qu.: 55.00   3rd Qu.:6.00  
##  Max.   :10.000   Max.   :2.000   Max.   :100.00   Max.   :8.00  
##  NA's   :1806     NA's   :42      NA's   :269      NA's   :520   
##    Employment      CReligious        CPress       CTelevision   
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :2.000   Median :3.000   Median :3.000  
##  Mean   :3.139   Mean   :2.197   Mean   :2.704   Mean   :2.628  
##  3rd Qu.:5.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :8.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :622     NA's   :1016    NA's   :1115    NA's   :738    
##     CUnions         CCourts        CPParties      CParliament   
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :2.000   Median :3.000   Median :3.000  
##  Mean   :2.708   Mean   :2.416   Mean   :2.965   Mean   :2.794  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :3580    NA's   :1768    NA's   :1649    NA's   :1737   
##  CCivilService     CElections       CEnvOrg     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.000   Median :3.000   Median :2.000  
##  Mean   :2.563   Mean   :2.593   Mean   :2.385  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :1664    NA's   :1958    NA's   :3255

Add ‘Group’ Column

CombinedData <- CombinedData %>%
  mutate(Group = ifelse(Country == "MYS", "Malaysia", "Others"))

Graphs

trust_long <- CombinedData %>%
  select(TPeople, TFamily, TNeighbourhood, TKnow, TMeet, Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))


ggplot(trust_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Trustworthiness",
    x = "Response",
    y = "Frequency"
  ) + 
  scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

importance_long <- CombinedData %>%
  select(VFriends, VLeisure, VReligion, Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(importance_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Importance in Life",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

hw_long <- CombinedData %>%
  select(HSatFin, HMedicine, Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(hw_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Happiness and Wellbeing",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

e_long <- CombinedData %>%
  select(EPrivate, ECompetition, EHardWork, Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(e_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Economic Values",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

st_long <- CombinedData %>%
  select(STOpportunity, STFaith, STImportant,Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(st_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Opinion on Science and Technology ",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

p1_long <- CombinedData %>%
  select(PNewspaper,  PMobile,  PEmail, PSocial, Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(p1_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Media Usage for Politic",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

p2_long <- CombinedData %>%
  select( PDemImp, PDemCurrent,  PSatisfied,  Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(p2_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Political Opinions",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

c_long <- CombinedData %>%
  select( CReligious, CPress, CTelevision, CUnions, CCourts, CPParties, CParliament, CCivilService, 
          CElections, CEnvOrg,  Group) %>%
  pivot_longer(cols = -Group, names_to = "Variable", values_to = "Response") %>%
  filter(!is.na(Response))

ggplot(c_long, aes(x = factor(Response), fill = Group)) +
  geom_bar() +
  facet_grid(Group ~ Variable, scales = "free_y") +
  labs(
    title = "Distribution of Confidence in Social Organisations",
    x = "Response",
    y = "Frequency"
  ) + scale_fill_manual(values = c("Malaysia" = "#FF6666", "Others" = "#66CCFF")) +
  theme_minimal()

Hypothesis Testing

t.test(MalaysiaData$TKnow, OtherCountriesData$TKnow, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$TKnow and OtherCountriesData$TKnow
## t = -3.2307, df = 708.28, p-value = 0.000646
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.03643982
## sample estimates:
## mean of x mean of y 
##  1.992593  2.066929

t.test(MalaysiaData$VReligion, OtherCountriesData$VReligion,"less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$VReligion and OtherCountriesData$VReligion
## t = -21.416, df = 719.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.5446887
## sample estimates:
## mean of x mean of y 
##  1.401481  1.991548

t.test(MalaysiaData$VLeisure, MalaysiaData$VFriends,"less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$VLeisure and MalaysiaData$VFriends
## t = -2.2723, df = 1330.8, p-value = 0.01161
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.02041567
## sample estimates:
## mean of x mean of y 
##  1.651852  1.725926

t.test(OtherCountriesData$VFriends, OtherCountriesData$VLeisure,"less")

## 
##  Welch Two Sample t-test
## 
## data:  OtherCountriesData$VFriends and OtherCountriesData$VLeisure
## t = -15.449, df = 97895, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.06764058
## sample estimates:
## mean of x mean of y 
##  1.711522  1.787223

t.test(MalaysiaData$HMedicine, OtherCountriesData$HMedicine,"greater")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$HMedicine and OtherCountriesData$HMedicine
## t = 1.2405, df = 698.5, p-value = 0.1076
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.01292976         Inf
## sample estimates:
## mean of x mean of y 
##  3.386667  3.347211

t.test(OtherCountriesData$HSatFin, MalaysiaData$HSatFin,"greater")

## 
##  Welch Two Sample t-test
## 
## data:  OtherCountriesData$HSatFin and MalaysiaData$HSatFin
## t = 0.66318, df = 701.88, p-value = 0.2537
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.07627198         Inf
## sample estimates:
## mean of x mean of y 
##  6.208449  6.157037

t.test(MalaysiaData$EPrivate, OtherCountriesData$EPrivate,"less" )

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$EPrivate and OtherCountriesData$EPrivate
## t = -3.4295, df = 701.98, p-value = 0.00032
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.1631174
## sample estimates:
## mean of x mean of y 
##  5.348148  5.661987

t.test(MalaysiaData$EHardWork, OtherCountriesData$EHardWork, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$EHardWork and OtherCountriesData$EHardWork
## t = 2.2875, df = 698.95, p-value = 0.9888
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.3900777
## sample estimates:
## mean of x mean of y 
##  4.678519  4.451732

t.test(MalaysiaData$ECompetition, OtherCountriesData$ECompetition, "greater")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$ECompetition and OtherCountriesData$ECompetition
## t = 0.22175, df = 699.82, p-value = 0.4123
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.1299001        Inf
## sample estimates:
## mean of x mean of y 
##  4.088889  4.068678

t.test(MalaysiaData$STOpportunity, OtherCountriesData$STOpportunity, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$STOpportunity and OtherCountriesData$STOpportunity
## t = -1.1959, df = 709.81, p-value = 0.1161
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.03031784
## sample estimates:
## mean of x mean of y 
##  7.477037  7.557409

t.test(MalaysiaData$STImportant, OtherCountriesData$STImportant, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$STImportant and OtherCountriesData$STImportant
## t = -1.518, df = 700.51, p-value = 0.06473
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.01257329
## sample estimates:
## mean of x mean of y 
##  4.459259  4.607184

t.test(MalaysiaData$STFaith, OtherCountriesData$STFaith, "greater")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$STFaith and OtherCountriesData$STFaith
## t = 8.0566, df = 710.94, p-value = 1.656e-15
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.5351212       Inf
## sample estimates:
## mean of x mean of y 
##  6.194074  5.521449

t.test(MalaysiaData$PNewspaper, OtherCountriesData$PNewspaper, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PNewspaper and OtherCountriesData$PNewspaper
## t = -17.855, df = 698.38, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.8828032
## sample estimates:
## mean of x mean of y 
##  2.414815  3.387325

t.test(MalaysiaData$PMobile, OtherCountriesData$PMobile, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PMobile and OtherCountriesData$PMobile
## t = -19.687, df = 718.77, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.7902458
## sample estimates:
## mean of x mean of y 
##  1.570370  2.432764

t.test(MalaysiaData$PEmail, OtherCountriesData$PEmail, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PEmail and OtherCountriesData$PEmail
## t = -16.232, df = 695.15, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.8921135
## sample estimates:
## mean of x mean of y 
##  2.534815  3.527670

t.test(MalaysiaData$PSocial, OtherCountriesData$PSocial, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PSocial and OtherCountriesData$PSocial
## t = -20.903, df = 715.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.9226826
## sample estimates:
## mean of x mean of y 
##  1.684444  2.686047

t.test(MalaysiaData$PDemImp, OtherCountriesData$PDemImp, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PDemImp and OtherCountriesData$PDemImp
## t = -2.069, df = 698, p-value = 0.01946
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.03111437
## sample estimates:
## mean of x mean of y 
##  8.216296  8.368851

t.test(MalaysiaData$PDemCurrent, OtherCountriesData$PDemCurrent, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PDemCurrent and OtherCountriesData$PDemCurrent
## t = -2.2401, df = 695.18, p-value = 0.0127
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.0573577
## sample estimates:
## mean of x mean of y 
##  5.955556  6.172217

t.test(MalaysiaData$PSatisfied, OtherCountriesData$PSatisfied, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$PSatisfied and OtherCountriesData$PSatisfied
## t = -1.6334, df = 695.3, p-value = 0.05142
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf 0.001377133
## sample estimates:
## mean of x mean of y 
##  5.179259  5.344052

t.test(MalaysiaData$CReligious, OtherCountriesData$CReligious, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CReligious and OtherCountriesData$CReligious
## t = -14.694, df = 711.68, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.3581093
## sample estimates:
## mean of x mean of y 
##  1.793769  2.197085

t.test(MalaysiaData$CPress, OtherCountriesData$CPress, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CPress and OtherCountriesData$CPress
## t = -1.3631, df = 701.34, p-value = 0.08664
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf 0.007768984
## sample estimates:
## mean of x mean of y 
##  2.667162  2.704460

t.test(MalaysiaData$CTelevision, OtherCountriesData$CTelevision, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CTelevision and OtherCountriesData$CTelevision
## t = 0.085813, df = 697.66, p-value = 0.5342
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.05120508
## sample estimates:
## mean of x mean of y 
##  2.630564  2.628028

t.test(MalaysiaData$CUnions, OtherCountriesData$CUnions, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CUnions and OtherCountriesData$CUnions
## t = -13.473, df = 710.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf -0.300194
## sample estimates:
## mean of x mean of y 
##  2.366469  2.708471

t.test(MalaysiaData$CCourts, OtherCountriesData$CCourts, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CCourts and OtherCountriesData$CCourts
## t = -5.8823, df = 702.02, p-value = 3.13e-09
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.1263172
## sample estimates:
## mean of x mean of y 
##  2.240356  2.415796

t.test(MalaysiaData$CPParties, OtherCountriesData$CPParties, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CPParties and OtherCountriesData$CPParties
## t = -4.3517, df = 699.4, p-value = 7.762e-06
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.08035167
## sample estimates:
## mean of x mean of y 
##  2.835312  2.964594

t.test(MalaysiaData$CParliament, OtherCountriesData$CParliament, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CParliament and OtherCountriesData$CParliament
## t = -4.3814, df = 698.71, p-value = 6.801e-06
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##         -Inf -0.08550639
## sample estimates:
## mean of x mean of y 
##  2.656761  2.793772

t.test(MalaysiaData$CCivilService, OtherCountriesData$CCivilService, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CCivilService and OtherCountriesData$CCivilService
## t = -7.3608, df = 700.79, p-value = 2.569e-13
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.1611138
## sample estimates:
## mean of x mean of y 
##  2.355126  2.562682

t.test(MalaysiaData$CElections, OtherCountriesData$CElections, "greater")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CElections and OtherCountriesData$CElections
## t = 1.8801, df = 698.28, p-value = 0.03026
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.007558222         Inf
## sample estimates:
## mean of x mean of y 
##  2.654303  2.593324

t.test(MalaysiaData$CEnvOrg, OtherCountriesData$CEnvOrg, "less")

## 
##  Welch Two Sample t-test
## 
## data:  MalaysiaData$CEnvOrg and OtherCountriesData$CEnvOrg
## t = -11.988, df = 707.38, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.2666831
## sample estimates:
## mean of x mean of y 
##  2.075780  2.384936

Q2(b) Finding the best predictors for MALAYSIA

remove Group and Country columns

MalaysiaData$Country<- NULL
MalaysiaData$Group <- NULL
predictors <- names(MalaysiaData)[1:29]

CReligious Regression Model

religious_fit <-lm(paste("CReligious ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(religious_fit)

## 
## Call:
## lm(formula = paste("CReligious ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.84757 -0.47371  0.00959  0.39773  1.86372 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.831869   0.339104   2.453  0.01443 *  
## TPeople         0.044924   0.066224   0.678  0.49778    
## TFamily         0.165844   0.057020   2.909  0.00376 ** 
## TNeighbourhood  0.146716   0.048634   3.017  0.00266 ** 
## TKnow           0.112644   0.046700   2.412  0.01614 *  
## TMeet          -0.053520   0.040880  -1.309  0.19094    
## VFamily        -0.095667   0.118709  -0.806  0.42060    
## VFriends        0.039848   0.045199   0.882  0.37831    
## VLeisure       -0.006356   0.040041  -0.159  0.87393    
## VReligion       0.305298   0.038381   7.954 8.11e-15 ***
## HSatFin        -0.002863   0.013369  -0.214  0.83049    
## HMedicine       0.032129   0.031548   1.018  0.30887    
## EPrivate       -0.028767   0.011143  -2.582  0.01005 *  
## ECompetition   -0.009855   0.012476  -0.790  0.42988    
## EHardWork       0.022334   0.010601   2.107  0.03553 *  
## PIAB           -0.005779   0.025506  -0.227  0.82083    
## STOpportunity  -0.037552   0.015417  -2.436  0.01513 *  
## STFaith        -0.002095   0.011725  -0.179  0.85825    
## STImportant    -0.010899   0.010841  -1.005  0.31513    
## PNewspaper      0.030630   0.018406   1.664  0.09657 .  
## PMobile         0.024394   0.026384   0.925  0.35553    
## PEmail         -0.045189   0.018125  -2.493  0.01291 *  
## PSocial        -0.012469   0.024456  -0.510  0.61033    
## PDemImp         0.006245   0.014378   0.434  0.66420    
## PDemCurrent     0.008531   0.015519   0.550  0.58271    
## PSatisfied     -0.016111   0.015049  -1.071  0.28477    
## MF              0.088179   0.050073   1.761  0.07871 .  
## Age             0.003223   0.002017   1.598  0.11061    
## Edu             0.003657   0.014969   0.244  0.80708    
## Employment     -0.003227   0.012628  -0.256  0.79839    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6115 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2755, Adjusted R-squared:  0.2428 
## F-statistic: 8.442 on 29 and 644 DF,  p-value: < 2.2e-16

CPress Regression Model

press_fit <-lm(paste("CPress ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(press_fit)

## 
## Call:
## lm(formula = paste("CPress ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.20475 -0.41530  0.04822  0.38929  1.98155 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.5113657  0.3486950   7.202 1.66e-12 ***
## TPeople         0.0773325  0.0681901   1.134 0.257187    
## TFamily        -0.0014967  0.0586136  -0.026 0.979636    
## TNeighbourhood  0.1182055  0.0499919   2.364 0.018351 *  
## TKnow           0.0158757  0.0480303   0.331 0.741103    
## TMeet           0.0525150  0.0420325   1.249 0.211977    
## VFamily         0.0302391  0.1220230   0.248 0.804357    
## VFriends        0.0311839  0.0464838   0.671 0.502554    
## VLeisure        0.0858844  0.0411712   2.086 0.037369 *  
## VReligion      -0.0592176  0.0394676  -1.500 0.133998    
## HSatFin        -0.0120396  0.0137425  -0.876 0.381311    
## HMedicine       0.0051642  0.0324288   0.159 0.873524    
## EPrivate       -0.0206135  0.0114539  -1.800 0.072377 .  
## ECompetition   -0.0155269  0.0128296  -1.210 0.226631    
## EHardWork       0.0056361  0.0108972   0.517 0.605187    
## PIAB           -0.0513440  0.0262271  -1.958 0.050701 .  
## STOpportunity   0.0103102  0.0158473   0.651 0.515540    
## STFaith        -0.0090734  0.0120546  -0.753 0.451913    
## STImportant    -0.0026860  0.0111711  -0.240 0.810066    
## PNewspaper      0.0653459  0.0189245   3.453 0.000591 ***
## PMobile        -0.0260262  0.0272197  -0.956 0.339354    
## PEmail          0.0230649  0.0186337   1.238 0.216240    
## PSocial        -0.0102433  0.0251530  -0.407 0.683968    
## PDemImp        -0.0009136  0.0147842  -0.062 0.950743    
## PDemCurrent    -0.0131028  0.0159520  -0.821 0.411732    
## PSatisfied     -0.0789830  0.0154752  -5.104 4.39e-07 ***
## MF             -0.0405846  0.0515042  -0.788 0.430995    
## Age             0.0037938  0.0020746   1.829 0.067913 .  
## Edu             0.0045820  0.0153872   0.298 0.765966    
## Employment     -0.0156706  0.0129834  -1.207 0.227888    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6285 on 643 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.2336, Adjusted R-squared:  0.199 
## F-statistic: 6.758 on 29 and 643 DF,  p-value: < 2.2e-16

CTelevision Regression Model

tv_fit <-lm(paste("CTelevision ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(tv_fit)

## 
## Call:
## lm(formula = paste("CTelevision ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.17463 -0.42129  0.05246  0.43590  2.10916 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.4549835  0.3794208   6.470 1.94e-10 ***
## TPeople         0.0638471  0.0740971   0.862  0.38919    
## TFamily         0.0844186  0.0637990   1.323  0.18624    
## TNeighbourhood  0.0889933  0.0544167   1.635  0.10245    
## TKnow           0.0467594  0.0522520   0.895  0.37118    
## TMeet           0.0818148  0.0457405   1.789  0.07414 .  
## VFamily         0.0406716  0.1328229   0.306  0.75954    
## VFriends        0.0350059  0.0505723   0.692  0.48906    
## VLeisure        0.0717992  0.0448012   1.603  0.10951    
## VReligion      -0.0733203  0.0429439  -1.707  0.08824 .  
## HSatFin        -0.0242254  0.0149589  -1.619  0.10584    
## HMedicine       0.0496394  0.0352989   1.406  0.16013    
## EPrivate       -0.0231641  0.0124679  -1.858  0.06364 .  
## ECompetition   -0.0208704  0.0139595  -1.495  0.13539    
## EHardWork       0.0029452  0.0118619   0.248  0.80399    
## PIAB           -0.0331349  0.0285382  -1.161  0.24604    
## STOpportunity  -0.0038249  0.0172498  -0.222  0.82459    
## STFaith         0.0044021  0.0131190   0.336  0.73732    
## STImportant    -0.0073010  0.0121300  -0.602  0.54746    
## PNewspaper      0.0599172  0.0205943   2.909  0.00375 ** 
## PMobile        -0.0333278  0.0295213  -1.129  0.25934    
## PEmail          0.0021241  0.0202801   0.105  0.91661    
## PSocial        -0.0126438  0.0273634  -0.462  0.64419    
## PDemImp        -0.0038512  0.0160869  -0.239  0.81087    
## PDemCurrent     0.0023979  0.0173643   0.138  0.89021    
## PSatisfied     -0.0894599  0.0168381  -5.313 1.49e-07 ***
## MF             -0.0841934  0.0560259  -1.503  0.13339    
## Age             0.0019397  0.0022571   0.859  0.39047    
## Edu             0.0081554  0.0167483   0.487  0.62647    
## Employment     -0.0004848  0.0141288  -0.034  0.97264    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6842 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2251, Adjusted R-squared:  0.1902 
## F-statistic:  6.45 on 29 and 644 DF,  p-value: < 2.2e-16

CUnions Regression Model

unions_fit <-lm(paste("CUnions ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(unions_fit)

## 
## Call:
## lm(formula = paste("CUnions ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.69803 -0.38142 -0.05185  0.42261  1.95271 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.962174   0.324898   6.039 2.62e-09 ***
## TPeople        -0.032196   0.063449  -0.507  0.61203    
## TFamily         0.035216   0.054631   0.645  0.51941    
## TNeighbourhood  0.132247   0.046597   2.838  0.00468 ** 
## TKnow           0.061483   0.044743   1.374  0.16988    
## TMeet           0.068378   0.039168   1.746  0.08133 .  
## VFamily         0.198849   0.113736   1.748  0.08088 .  
## VFriends        0.057264   0.043305   1.322  0.18653    
## VLeisure       -0.054497   0.038363  -1.421  0.15593    
## VReligion       0.019007   0.036773   0.517  0.60542    
## HSatFin        -0.014927   0.012809  -1.165  0.24433    
## HMedicine       0.022133   0.030227   0.732  0.46430    
## EPrivate       -0.050609   0.010676  -4.740 2.63e-06 ***
## ECompetition    0.004307   0.011954   0.360  0.71873    
## EHardWork       0.002247   0.010157   0.221  0.82502    
## PIAB           -0.033048   0.024437  -1.352  0.17674    
## STOpportunity  -0.005896   0.014771  -0.399  0.68991    
## STFaith         0.005450   0.011234   0.485  0.62776    
## STImportant     0.003884   0.010387   0.374  0.70861    
## PNewspaper      0.047255   0.017635   2.680  0.00756 ** 
## PMobile        -0.028632   0.025279  -1.133  0.25779    
## PEmail         -0.022293   0.017366  -1.284  0.19970    
## PSocial         0.024589   0.023431   1.049  0.29439    
## PDemImp        -0.011765   0.013775  -0.854  0.39338    
## PDemCurrent     0.006212   0.014869   0.418  0.67623    
## PSatisfied     -0.058762   0.014418  -4.075 5.17e-05 ***
## MF             -0.050695   0.047975  -1.057  0.29105    
## Age             0.005601   0.001933   2.898  0.00389 ** 
## Edu             0.009800   0.014342   0.683  0.49463    
## Employment      0.004092   0.012099   0.338  0.73529    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5859 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.223,  Adjusted R-squared:  0.188 
## F-statistic: 6.374 on 29 and 644 DF,  p-value: < 2.2e-16

CCourts Regression Model

courts_fit <-lm(paste("CCourts ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(courts_fit)

## 
## Call:
## lm(formula = paste("CCourts ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.91448 -0.41149 -0.01823  0.40831  2.14388 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.097440   0.368601   5.690 1.93e-08 ***
## TPeople         0.038188   0.071984   0.531 0.595944    
## TFamily         0.110649   0.061980   1.785 0.074691 .  
## TNeighbourhood  0.119815   0.052865   2.266 0.023755 *  
## TKnow          -0.008814   0.050762  -0.174 0.862210    
## TMeet           0.046552   0.044436   1.048 0.295207    
## VFamily         0.087456   0.129035   0.678 0.498163    
## VFriends        0.034297   0.049130   0.698 0.485383    
## VLeisure       -0.019010   0.043524  -0.437 0.662429    
## VReligion       0.084604   0.041719   2.028 0.042977 *  
## HSatFin        -0.001990   0.014532  -0.137 0.891140    
## HMedicine      -0.045513   0.034292  -1.327 0.184910    
## EPrivate       -0.027752   0.012112  -2.291 0.022273 *  
## ECompetition   -0.002480   0.013561  -0.183 0.854982    
## EHardWork      -0.005767   0.011524  -0.500 0.616927    
## PIAB           -0.074923   0.027724  -2.702 0.007065 ** 
## STOpportunity   0.007374   0.016758   0.440 0.660044    
## STFaith         0.005924   0.012745   0.465 0.642228    
## STImportant     0.036502   0.011784   3.098 0.002036 ** 
## PNewspaper      0.043813   0.020007   2.190 0.028893 *  
## PMobile        -0.028157   0.028679  -0.982 0.326582    
## PEmail         -0.014407   0.019702  -0.731 0.464890    
## PSocial         0.039070   0.026583   1.470 0.142126    
## PDemImp        -0.001584   0.015628  -0.101 0.919283    
## PDemCurrent    -0.032207   0.016869  -1.909 0.056675 .  
## PSatisfied     -0.072538   0.016358  -4.434 1.09e-05 ***
## MF             -0.115110   0.054428  -2.115 0.034822 *  
## Age             0.007598   0.002193   3.465 0.000566 ***
## Edu            -0.003847   0.016271  -0.236 0.813175    
## Employment     -0.021353   0.013726  -1.556 0.120286    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6647 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2798, Adjusted R-squared:  0.2474 
## F-statistic: 8.629 on 29 and 644 DF,  p-value: < 2.2e-16

CPParties Regression Model

pparties_fit <-lm(paste("CPParties ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(pparties_fit)

## 
## Call:
## lm(formula = paste("CPParties ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49944 -0.45367  0.02017  0.40673  2.20180 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.7685707  0.3693735   7.495 2.20e-13 ***
## TPeople         0.0050290  0.0721349   0.070  0.94444    
## TFamily         0.1242695  0.0621096   2.001  0.04583 *  
## TNeighbourhood  0.0892726  0.0529757   1.685  0.09244 .  
## TKnow           0.0269689  0.0508683   0.530  0.59618    
## TMeet           0.1211178  0.0445293   2.720  0.00671 ** 
## VFamily        -0.3755175  0.1293057  -2.904  0.00381 ** 
## VFriends        0.0546851  0.0492331   1.111  0.26710    
## VLeisure        0.1010592  0.0436148   2.317  0.02081 *  
## VReligion       0.0534902  0.0418067   1.279  0.20119    
## HSatFin        -0.0246700  0.0145628  -1.694  0.09074 .  
## HMedicine      -0.0267239  0.0343642  -0.778  0.43705    
## EPrivate       -0.0231530  0.0121377  -1.908  0.05690 .  
## ECompetition   -0.0195661  0.0135899  -1.440  0.15042    
## EHardWork      -0.0080502  0.0115478  -0.697  0.48598    
## PIAB           -0.0358835  0.0277825  -1.292  0.19696    
## STOpportunity   0.0336396  0.0167930   2.003  0.04558 *  
## STFaith         0.0037755  0.0127716   0.296  0.76762    
## STImportant     0.0105486  0.0118088   0.893  0.37204    
## PNewspaper      0.0799062  0.0200490   3.986 7.50e-05 ***
## PMobile        -0.0611096  0.0287395  -2.126  0.03386 *  
## PEmail         -0.0205007  0.0197430  -1.038  0.29948    
## PSocial         0.0008734  0.0266388   0.033  0.97386    
## PDemImp        -0.0182233  0.0156609  -1.164  0.24501    
## PDemCurrent    -0.0082236  0.0169045  -0.486  0.62680    
## PSatisfied     -0.0843937  0.0163922  -5.148 3.49e-07 ***
## MF              0.0065304  0.0545423   0.120  0.90473    
## Age             0.0037714  0.0021974   1.716  0.08659 .  
## Edu             0.0032411  0.0163048   0.199  0.84250    
## Employment     -0.0050819  0.0137547  -0.369  0.71190    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6661 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2725, Adjusted R-squared:  0.2398 
## F-statistic: 8.319 on 29 and 644 DF,  p-value: < 2.2e-16

CParliament Regression Model

parliament_fit <-lm(paste("CParliament ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(parliament_fit)

## 
## Call:
## lm(formula = paste("CParliament ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.27151 -0.46743 -0.00139  0.43898  2.07331 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.9614505  0.3755208   5.223 2.38e-07 ***
## TPeople         0.0736642  0.0734763   1.003  0.31645    
## TFamily         0.0471001  0.0631304   0.746  0.45589    
## TNeighbourhood  0.1419993  0.0539156   2.634  0.00865 ** 
## TKnow           0.0617029  0.0517310   1.193  0.23340    
## TMeet           0.0815982  0.0452868   1.802  0.07204 .  
## VFamily         0.1198413  0.1314430   0.912  0.36225    
## VFriends        0.0813080  0.0500671   1.624  0.10487    
## VLeisure        0.0940626  0.0443530   2.121  0.03432 *  
## VReligion      -0.0198660  0.0425030  -0.467  0.64037    
## HSatFin        -0.0310949  0.0148044  -2.100  0.03608 *  
## HMedicine      -0.0082341  0.0349372  -0.236  0.81375    
## EPrivate       -0.0242420  0.0123440  -1.964  0.04998 *  
## ECompetition    0.0008545  0.0138251   0.062  0.95074    
## EHardWork      -0.0057952  0.0117403  -0.494  0.62174    
## PIAB           -0.0641828  0.0282429  -2.273  0.02338 *  
## STOpportunity   0.0464865  0.0170759   2.722  0.00666 ** 
## STFaith        -0.0112668  0.0129821  -0.868  0.38579    
## STImportant     0.0257292  0.0120198   2.141  0.03268 *  
## PNewspaper      0.0962606  0.0203851   4.722 2.87e-06 ***
## PMobile        -0.0739562  0.0292248  -2.531  0.01162 *  
## PEmail          0.0086139  0.0200679   0.429  0.66789    
## PSocial         0.0195411  0.0270921   0.721  0.47100    
## PDemImp        -0.0303471  0.0159187  -1.906  0.05705 .  
## PDemCurrent    -0.0132684  0.0171824  -0.772  0.44027    
## PSatisfied     -0.1037145  0.0166621  -6.225 8.71e-10 ***
## MF             -0.0437682  0.0554872  -0.789  0.43052    
## Age             0.0060115  0.0022366   2.688  0.00738 ** 
## Edu             0.0213352  0.0165755   1.287  0.19851    
## Employment     -0.0014379  0.0140258  -0.103  0.91838    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.677 on 643 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3205, Adjusted R-squared:  0.2899 
## F-statistic: 10.46 on 29 and 643 DF,  p-value: < 2.2e-16

CCivilService Regression Model

civilService_fit <-lm(paste("CCivilService ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(civilService_fit)

## 
## Call:
## lm(formula = paste("CCivilService ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.92472 -0.41276 -0.01511  0.41946  2.16233 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.760126   0.338538   5.199 2.69e-07 ***
## TPeople         0.049450   0.066240   0.747  0.45562    
## TFamily         0.007559   0.056913   0.133  0.89438    
## TNeighbourhood  0.133846   0.048606   2.754  0.00606 ** 
## TKnow           0.078264   0.046636   1.678  0.09380 .  
## TMeet           0.054172   0.040827   1.327  0.18502    
## VFamily         0.085243   0.118498   0.719  0.47218    
## VFriends       -0.002189   0.045136  -0.048  0.96133    
## VLeisure        0.050749   0.039985   1.269  0.20483    
## VReligion       0.093911   0.038317   2.451  0.01452 *  
## HSatFin        -0.011527   0.013346  -0.864  0.38807    
## HMedicine      -0.012175   0.031496  -0.387  0.69922    
## EPrivate       -0.050061   0.011128  -4.499 8.12e-06 ***
## ECompetition    0.004043   0.012464   0.324  0.74575    
## EHardWork       0.007423   0.010584   0.701  0.48335    
## PIAB           -0.008320   0.025461  -0.327  0.74396    
## STOpportunity   0.001812   0.015394   0.118  0.90631    
## STFaith        -0.002818   0.011704  -0.241  0.80978    
## STImportant     0.029202   0.010836   2.695  0.00722 ** 
## PNewspaper      0.039726   0.018378   2.162  0.03101 *  
## PMobile        -0.053341   0.026347  -2.025  0.04332 *  
## PEmail         -0.009670   0.018092  -0.534  0.59318    
## PSocial         0.056244   0.024424   2.303  0.02161 *  
## PDemImp         0.001886   0.014351   0.131  0.89549    
## PDemCurrent    -0.032658   0.015490  -2.108  0.03539 *  
## PSatisfied     -0.072825   0.015021  -4.848 1.56e-06 ***
## MF              0.026377   0.050023   0.527  0.59817    
## Age             0.004514   0.002016   2.239  0.02551 *  
## Edu             0.019639   0.014943   1.314  0.18924    
## Employment     -0.004138   0.012645  -0.327  0.74357    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6103 on 643 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3198, Adjusted R-squared:  0.2892 
## F-statistic: 10.43 on 29 and 643 DF,  p-value: < 2.2e-16

CElections Regression Model

elections_fit <-lm(paste("CElections ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(elections_fit)

## 
## Call:
## lm(formula = paste("CElections ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.32021 -0.45047  0.01843  0.44908  2.04721 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.607318   0.390625   6.675 5.35e-11 ***
## TPeople        -0.031518   0.076285  -0.413  0.67963    
## TFamily         0.025295   0.065683   0.385  0.70028    
## TNeighbourhood  0.041372   0.056024   0.738  0.46050    
## TKnow           0.045190   0.053795   0.840  0.40119    
## TMeet           0.145437   0.047091   3.088  0.00210 ** 
## VFamily        -0.211551   0.136745  -1.547  0.12234    
## VFriends       -0.001922   0.052066  -0.037  0.97057    
## VLeisure        0.039531   0.046124   0.857  0.39173    
## VReligion       0.114499   0.044212   2.590  0.00982 ** 
## HSatFin        -0.025282   0.015401  -1.642  0.10116    
## HMedicine       0.008783   0.036341   0.242  0.80909    
## EPrivate       -0.021529   0.012836  -1.677  0.09398 .  
## ECompetition   -0.003139   0.014372  -0.218  0.82719    
## EHardWork      -0.003868   0.012212  -0.317  0.75156    
## PIAB           -0.078768   0.029381  -2.681  0.00753 ** 
## STOpportunity   0.023687   0.017759   1.334  0.18275    
## STFaith         0.027909   0.013506   2.066  0.03919 *  
## STImportant     0.008692   0.012488   0.696  0.48666    
## PNewspaper      0.053467   0.021202   2.522  0.01192 *  
## PMobile        -0.051030   0.030393  -1.679  0.09364 .  
## PEmail          0.038076   0.020879   1.824  0.06867 .  
## PSocial        -0.000962   0.028171  -0.034  0.97277    
## PDemImp        -0.024536   0.016562  -1.481  0.13897    
## PDemCurrent    -0.030937   0.017877  -1.731  0.08402 .  
## PSatisfied     -0.112008   0.017335  -6.461 2.05e-10 ***
## MF              0.003437   0.057680   0.060  0.95250    
## Age             0.004820   0.002324   2.074  0.03845 *  
## Edu             0.038872   0.017243   2.254  0.02451 *  
## Employment     -0.003195   0.014546  -0.220  0.82619    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7044 on 644 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3179, Adjusted R-squared:  0.2872 
## F-statistic: 10.35 on 29 and 644 DF,  p-value: < 2.2e-16

CEnvOrg Regression Model

envOrg_fit <-lm(paste("CEnvOrg ~", paste(predictors, collapse = " + ")), data = MalaysiaData)
summary(envOrg_fit)

## 
## Call:
## lm(formula = paste("CEnvOrg ~", paste(predictors, collapse = " + ")), 
##     data = MalaysiaData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66566 -0.32269 -0.04139  0.34109  2.07727 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.365984   0.331492   4.121 4.27e-05 ***
## TPeople         0.084878   0.064717   1.312 0.190151    
## TFamily         0.059408   0.055867   1.063 0.288013    
## TNeighbourhood  0.122552   0.047650   2.572 0.010337 *  
## TKnow           0.018990   0.045694   0.416 0.677845    
## TMeet           0.011756   0.039959   0.294 0.768703    
## VFamily         0.301206   0.116153   2.593 0.009726 ** 
## VFriends        0.088966   0.044304   2.008 0.045051 *  
## VLeisure        0.056206   0.039248   1.432 0.152610    
## VReligion       0.041347   0.037522   1.102 0.270896    
## HSatFin        -0.005520   0.013065  -0.423 0.672772    
## HMedicine       0.042734   0.030830   1.386 0.166194    
## EPrivate       -0.042245   0.010958  -3.855 0.000127 ***
## ECompetition    0.004173   0.012220   0.341 0.732846    
## EHardWork       0.005809   0.010367   0.560 0.575422    
## PIAB           -0.038453   0.024926  -1.543 0.123397    
## STOpportunity  -0.018621   0.015114  -1.232 0.218387    
## STFaith         0.003381   0.011468   0.295 0.768230    
## STImportant     0.024992   0.010595   2.359 0.018635 *  
## PNewspaper      0.025014   0.018039   1.387 0.166026    
## PMobile        -0.053250   0.025936  -2.053 0.040461 *  
## PEmail          0.005943   0.017712   0.336 0.737334    
## PSocial         0.038486   0.024141   1.594 0.111386    
## PDemImp        -0.033050   0.014093  -2.345 0.019320 *  
## PDemCurrent    -0.026035   0.015168  -1.716 0.086558 .  
## PSatisfied     -0.020288   0.014710  -1.379 0.168303    
## MF             -0.035830   0.049008  -0.731 0.464981    
## Age             0.005134   0.001972   2.604 0.009433 ** 
## Edu             0.015872   0.014629   1.085 0.278332    
## Employment     -0.016833   0.012344  -1.364 0.173151    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5975 on 643 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.2168, Adjusted R-squared:  0.1815 
## F-statistic: 6.138 on 29 and 643 DF,  p-value: < 2.2e-16

Q2(c) Finding the best predictors for OTHER COUNTRIES

CReligious Regression Model

religious_fit1 <-lm(paste("CReligious ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(religious_fit1)

## 
## Call:
## lm(formula = paste("CReligious ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.71911 -0.62611 -0.08408  0.51104  2.92032 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     8.583e-01  5.006e-02  17.146  < 2e-16 ***
## TPeople         3.055e-02  1.095e-02   2.791  0.00526 ** 
## TFamily         1.255e-01  8.180e-03  15.338  < 2e-16 ***
## TNeighbourhood  1.046e-01  6.459e-03  16.202  < 2e-16 ***
## TKnow           2.523e-04  6.419e-03   0.039  0.96865    
## TMeet           3.031e-02  6.093e-03   4.974 6.59e-07 ***
## VFamily         3.992e-02  1.228e-02   3.251  0.00115 ** 
## VFriends       -7.604e-04  6.226e-03  -0.122  0.90279    
## VLeisure       -6.806e-02  5.624e-03 -12.102  < 2e-16 ***
## VReligion       4.632e-01  4.264e-03 108.634  < 2e-16 ***
## HSatFin        -1.424e-02  1.876e-03  -7.591 3.24e-14 ***
## HMedicine       3.662e-02  4.858e-03   7.537 4.90e-14 ***
## EPrivate       -2.577e-03  1.529e-03  -1.686  0.09185 .  
## ECompetition   -1.205e-05  1.651e-03  -0.007  0.99418    
## EHardWork       1.273e-02  1.522e-03   8.359  < 2e-16 ***
## PIAB           -7.680e-03  3.929e-03  -1.955  0.05062 .  
## STOpportunity  -8.090e-03  1.887e-03  -4.288 1.81e-05 ***
## STFaith        -1.562e-03  1.493e-03  -1.046  0.29551    
## STImportant    -6.334e-03  1.508e-03  -4.200 2.68e-05 ***
## PNewspaper      1.979e-02  2.854e-03   6.932 4.23e-12 ***
## PMobile         9.622e-03  3.097e-03   3.107  0.00189 ** 
## PEmail         -7.345e-03  3.155e-03  -2.328  0.01989 *  
## PSocial        -3.375e-02  3.136e-03 -10.762  < 2e-16 ***
## PDemImp         6.847e-03  2.096e-03   3.267  0.00109 ** 
## PDemCurrent    -1.467e-02  2.089e-03  -7.022 2.22e-12 ***
## PSatisfied     -2.011e-02  1.994e-03 -10.087  < 2e-16 ***
## MF             -2.745e-02  8.603e-03  -3.191  0.00142 ** 
## Age             3.362e-03  2.936e-04  11.452  < 2e-16 ***
## Edu             2.337e-02  2.358e-03   9.910  < 2e-16 ***
## Employment     -4.663e-03  2.121e-03  -2.199  0.02789 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8166 on 37663 degrees of freedom
##   (11632 observations deleted due to missingness)
## Multiple R-squared:  0.3272, Adjusted R-squared:  0.3266 
## F-statistic: 631.5 on 29 and 37663 DF,  p-value: < 2.2e-16

CPress Regression Model

press_fit1 <-lm(paste("CPress ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(press_fit1)

## 
## Call:
## lm(formula = paste("CPress ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.52120 -0.57174  0.05916  0.54732  2.24470 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.9091704  0.0501795  38.047  < 2e-16 ***
## TPeople         0.1003115  0.0109518   9.159  < 2e-16 ***
## TFamily         0.0579296  0.0082026   7.062 1.67e-12 ***
## TNeighbourhood  0.1220080  0.0064704  18.856  < 2e-16 ***
## TKnow           0.0251641  0.0064396   3.908 9.33e-05 ***
## TMeet           0.0817680  0.0061124  13.377  < 2e-16 ***
## VFamily         0.0030697  0.0122629   0.250 0.802340    
## VFriends        0.0104522  0.0062433   1.674 0.094114 .  
## VLeisure       -0.0251203  0.0056401  -4.454 8.46e-06 ***
## VReligion       0.0415268  0.0042513   9.768  < 2e-16 ***
## HSatFin        -0.0017157  0.0018816  -0.912 0.361861    
## HMedicine       0.0096338  0.0048686   1.979 0.047848 *  
## EPrivate       -0.0038450  0.0015336  -2.507 0.012177 *  
## ECompetition   -0.0059695  0.0016560  -3.605 0.000313 ***
## EHardWork       0.0049086  0.0015257   3.217 0.001296 ** 
## PIAB           -0.0173750  0.0039338  -4.417 1.00e-05 ***
## STOpportunity  -0.0150298  0.0018900  -7.952 1.88e-15 ***
## STFaith         0.0063083  0.0014963   4.216 2.50e-05 ***
## STImportant     0.0017947  0.0015114   1.188 0.235036    
## PNewspaper      0.0579735  0.0028530  20.320  < 2e-16 ***
## PMobile         0.0213940  0.0031014   6.898 5.35e-12 ***
## PEmail         -0.0041654  0.0031538  -1.321 0.186584    
## PSocial        -0.0210196  0.0031370  -6.701 2.11e-11 ***
## PDemImp         0.0077374  0.0020996   3.685 0.000229 ***
## PDemCurrent    -0.0228283  0.0020946 -10.898  < 2e-16 ***
## PSatisfied     -0.0503727  0.0019982 -25.209  < 2e-16 ***
## MF              0.0169808  0.0086136   1.971 0.048686 *  
## Age             0.0015929  0.0002943   5.413 6.23e-08 ***
## Edu             0.0285736  0.0023621  12.097  < 2e-16 ***
## Employment     -0.0065466  0.0021236  -3.083 0.002052 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8177 on 37666 degrees of freedom
##   (11629 observations deleted due to missingness)
## Multiple R-squared:  0.1239, Adjusted R-squared:  0.1232 
## F-statistic: 183.6 on 29 and 37666 DF,  p-value: < 2.2e-16

CCTelevision Regression Model

tv_fit1 <-lm(paste("CTelevision ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(tv_fit1)

## 
## Call:
## lm(formula = paste("CTelevision ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.69178 -0.55195  0.04084  0.54491  2.28116 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.0113050  0.0497427  40.434  < 2e-16 ***
## TPeople         0.0489470  0.0108698   4.503 6.72e-06 ***
## TFamily         0.0764083  0.0081367   9.391  < 2e-16 ***
## TNeighbourhood  0.1229000  0.0064129  19.165  < 2e-16 ***
## TKnow           0.0298230  0.0063771   4.677 2.93e-06 ***
## TMeet           0.0649741  0.0060596  10.723  < 2e-16 ***
## VFamily         0.0206484  0.0121786   1.695 0.089994 .  
## VFriends       -0.0037921  0.0061883  -0.613 0.540026    
## VLeisure       -0.0242735  0.0055870  -4.345 1.40e-05 ***
## VReligion       0.0548746  0.0042214  12.999  < 2e-16 ***
## HSatFin         0.0002839  0.0018660   0.152 0.879068    
## HMedicine       0.0107026  0.0048266   2.217 0.026600 *  
## EPrivate       -0.0057524  0.0015201  -3.784 0.000154 ***
## ECompetition   -0.0033084  0.0016416  -2.015 0.043869 *  
## EHardWork       0.0095399  0.0015129   6.306 2.90e-10 ***
## PIAB           -0.0113266  0.0039034  -2.902 0.003713 ** 
## STOpportunity  -0.0174042  0.0018742  -9.286  < 2e-16 ***
## STFaith         0.0044723  0.0014842   3.013 0.002586 ** 
## STImportant    -0.0013409  0.0014988  -0.895 0.370983    
## PNewspaper      0.0404341  0.0028337  14.269  < 2e-16 ***
## PMobile         0.0244197  0.0030754   7.940 2.07e-15 ***
## PEmail         -0.0190288  0.0031339  -6.072 1.28e-09 ***
## PSocial        -0.0168399  0.0031136  -5.408 6.39e-08 ***
## PDemImp         0.0160958  0.0020812   7.734 1.07e-14 ***
## PDemCurrent    -0.0256792  0.0020762 -12.368  < 2e-16 ***
## PSatisfied     -0.0539080  0.0019804 -27.221  < 2e-16 ***
## MF             -0.0339314  0.0085460  -3.970 7.19e-05 ***
## Age             0.0015468  0.0002918   5.301 1.16e-07 ***
## Edu             0.0390766  0.0023429  16.679  < 2e-16 ***
## Employment     -0.0074566  0.0021073  -3.539 0.000403 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8133 on 37857 degrees of freedom
##   (11438 observations deleted due to missingness)
## Multiple R-squared:  0.1275, Adjusted R-squared:  0.1269 
## F-statistic: 190.8 on 29 and 37857 DF,  p-value: < 2.2e-16

CUnions Regression Model

unions_fit1 <-lm(paste("CUnions ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(unions_fit1)

## 
## Call:
## lm(formula = paste("CUnions ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.73303 -0.58858  0.06318  0.55349  2.39630 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.8303954  0.0517112  35.397  < 2e-16 ***
## TPeople         0.0805995  0.0112958   7.135 9.83e-13 ***
## TFamily         0.0536544  0.0083956   6.391 1.67e-10 ***
## TNeighbourhood  0.1150891  0.0066787  17.232  < 2e-16 ***
## TKnow           0.0638419  0.0066366   9.620  < 2e-16 ***
## TMeet           0.1052323  0.0063149  16.664  < 2e-16 ***
## VFamily         0.0160011  0.0125990   1.270 0.204082    
## VFriends        0.0271758  0.0064470   4.215 2.50e-05 ***
## VLeisure       -0.0309708  0.0058322  -5.310 1.10e-07 ***
## VReligion       0.0233110  0.0043950   5.304 1.14e-07 ***
## HSatFin        -0.0008336  0.0019418  -0.429 0.667715    
## HMedicine      -0.0002425  0.0050224  -0.048 0.961490    
## EPrivate       -0.0118732  0.0015854  -7.489 7.10e-14 ***
## ECompetition   -0.0057822  0.0017114  -3.379 0.000729 ***
## EHardWork       0.0074787  0.0015788   4.737 2.18e-06 ***
## PIAB           -0.0015799  0.0040670  -0.388 0.697673    
## STOpportunity  -0.0154456  0.0019484  -7.927 2.30e-15 ***
## STFaith         0.0119306  0.0015463   7.716 1.23e-14 ***
## STImportant     0.0079120  0.0015656   5.054 4.35e-07 ***
## PNewspaper      0.0276107  0.0029455   9.374  < 2e-16 ***
## PMobile         0.0264199  0.0032103   8.230  < 2e-16 ***
## PEmail         -0.0147523  0.0032520  -4.536 5.74e-06 ***
## PSocial        -0.0068863  0.0032457  -2.122 0.033872 *  
## PDemImp         0.0004242  0.0021649   0.196 0.844669    
## PDemCurrent    -0.0196348  0.0021556  -9.109  < 2e-16 ***
## PSatisfied     -0.0430130  0.0020562 -20.919  < 2e-16 ***
## MF             -0.0244913  0.0088869  -2.756 0.005856 ** 
## Age             0.0032592  0.0003048  10.692  < 2e-16 ***
## Edu             0.0290188  0.0024367  11.909  < 2e-16 ***
## Employment      0.0049510  0.0021924   2.258 0.023938 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8292 on 36318 degrees of freedom
##   (12977 observations deleted due to missingness)
## Multiple R-squared:  0.1164, Adjusted R-squared:  0.1157 
## F-statistic: 164.9 on 29 and 36318 DF,  p-value: < 2.2e-16

CCourts Regression Model

courts_fit1 <-lm(paste("CCourts ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(courts_fit1)

## 
## Call:
## lm(formula = paste("CCourts ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7712 -0.5773 -0.0483  0.6045  2.7906 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.7582225  0.0524567  33.518  < 2e-16 ***
## TPeople         0.1306002  0.0114452  11.411  < 2e-16 ***
## TFamily         0.1029577  0.0085681  12.016  < 2e-16 ***
## TNeighbourhood  0.1189688  0.0067644  17.587  < 2e-16 ***
## TKnow           0.0859663  0.0067231  12.787  < 2e-16 ***
## TMeet           0.0399095  0.0063848   6.251 4.13e-10 ***
## VFamily         0.0358750  0.0128445   2.793 0.005224 ** 
## VFriends        0.0426352  0.0065282   6.531 6.62e-11 ***
## VLeisure       -0.0451438  0.0058876  -7.668 1.79e-14 ***
## VReligion       0.0292720  0.0044504   6.577 4.85e-11 ***
## HSatFin        -0.0078607  0.0019661  -3.998 6.40e-05 ***
## HMedicine      -0.0299411  0.0050857  -5.887 3.96e-09 ***
## EPrivate       -0.0003851  0.0016027  -0.240 0.810114    
## ECompetition    0.0047792  0.0017305   2.762 0.005753 ** 
## EHardWork       0.0085654  0.0015955   5.368 7.99e-08 ***
## PIAB            0.0074118  0.0041139   1.802 0.071610 .  
## STOpportunity  -0.0118556  0.0019749  -6.003 1.95e-09 ***
## STFaith         0.0216174  0.0015633  13.828  < 2e-16 ***
## STImportant     0.0103402  0.0015805   6.542 6.13e-11 ***
## PNewspaper      0.0078457  0.0029862   2.627 0.008609 ** 
## PMobile         0.0152345  0.0032428   4.698 2.64e-06 ***
## PEmail          0.0045003  0.0032992   1.364 0.172557    
## PSocial        -0.0103212  0.0032813  -3.145 0.001659 ** 
## PDemImp         0.0062224  0.0021944   2.836 0.004576 ** 
## PDemCurrent    -0.0393886  0.0021874 -18.007  < 2e-16 ***
## PSatisfied     -0.0615576  0.0020873 -29.491  < 2e-16 ***
## MF             -0.0308125  0.0090037  -3.422 0.000622 ***
## Age             0.0014461  0.0003079   4.697 2.65e-06 ***
## Edu             0.0249583  0.0024681  10.112  < 2e-16 ***
## Employment     -0.0014405  0.0022201  -0.649 0.516450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8543 on 37627 degrees of freedom
##   (11668 observations deleted due to missingness)
## Multiple R-squared:  0.1653, Adjusted R-squared:  0.1647 
## F-statistic:   257 on 29 and 37627 DF,  p-value: < 2.2e-16

CPParties Regression Model

pparties_fit1 <-lm(paste("CPParties ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(pparties_fit1)

## 
## Call:
## lm(formula = paste("CPParties ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0593 -0.5426  0.0812  0.5667  2.2098 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.5479291  0.0495333  51.439  < 2e-16 ***
## TPeople         0.1549180  0.0108195  14.318  < 2e-16 ***
## TFamily         0.0528648  0.0080785   6.544 6.07e-11 ***
## TNeighbourhood  0.1115485  0.0063911  17.454  < 2e-16 ***
## TKnow          -0.0036830  0.0063590  -0.579 0.562478    
## TMeet           0.0997853  0.0060364  16.531  < 2e-16 ***
## VFamily         0.0039000  0.0121003   0.322 0.747221    
## VFriends        0.0258051  0.0061643   4.186 2.84e-05 ***
## VLeisure       -0.0511128  0.0055679  -9.180  < 2e-16 ***
## VReligion      -0.0026734  0.0042026  -0.636 0.524699    
## HSatFin        -0.0071512  0.0018571  -3.851 0.000118 ***
## HMedicine      -0.0179458  0.0048055  -3.734 0.000188 ***
## EPrivate       -0.0071900  0.0015143  -4.748 2.06e-06 ***
## ECompetition   -0.0046951  0.0016360  -2.870 0.004110 ** 
## EHardWork       0.0061514  0.0015081   4.079 4.53e-05 ***
## PIAB            0.0064109  0.0038892   1.648 0.099284 .  
## STOpportunity  -0.0107299  0.0018646  -5.755 8.75e-09 ***
## STFaith         0.0006943  0.0014781   0.470 0.638545    
## STImportant     0.0088916  0.0014934   5.954 2.64e-09 ***
## PNewspaper      0.0243826  0.0028200   8.646  < 2e-16 ***
## PMobile         0.0214074  0.0030653   6.984 2.92e-12 ***
## PEmail         -0.0194924  0.0031182  -6.251 4.12e-10 ***
## PSocial        -0.0049811  0.0031017  -1.606 0.108303    
## PDemImp         0.0148856  0.0020737   7.178 7.20e-13 ***
## PDemCurrent    -0.0218543  0.0020666 -10.575  < 2e-16 ***
## PSatisfied     -0.0971143  0.0019719 -49.249  < 2e-16 ***
## MF             -0.0247179  0.0085068  -2.906 0.003667 ** 
## Age             0.0023192  0.0002908   7.974 1.58e-15 ***
## Edu             0.0351360  0.0023335  15.057  < 2e-16 ***
## Employment      0.0017558  0.0020976   0.837 0.402573    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8053 on 37425 degrees of freedom
##   (11870 observations deleted due to missingness)
## Multiple R-squared:  0.2017, Adjusted R-squared:  0.2011 
## F-statistic: 326.1 on 29 and 37425 DF,  p-value: < 2.2e-16

CParliament Regression Model

parliament_fit1 <-lm(paste("CParliament ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(parliament_fit1)

## 
## Call:
## lm(formula = paste("CParliament ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0699 -0.5570  0.0258  0.5944  2.6969 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.4605742  0.0511954  48.062  < 2e-16 ***
## TPeople         0.1372536  0.0111821  12.274  < 2e-16 ***
## TFamily         0.0761690  0.0083695   9.101  < 2e-16 ***
## TNeighbourhood  0.1188440  0.0066034  17.997  < 2e-16 ***
## TKnow           0.0217232  0.0065757   3.304 0.000956 ***
## TMeet           0.0726331  0.0062473  11.626  < 2e-16 ***
## VFamily         0.0256773  0.0125195   2.051 0.040276 *  
## VFriends        0.0264293  0.0063749   4.146 3.39e-05 ***
## VLeisure       -0.0733440  0.0057527 -12.750  < 2e-16 ***
## VReligion       0.0169512  0.0043452   3.901 9.59e-05 ***
## HSatFin        -0.0047803  0.0019207  -2.489 0.012819 *  
## HMedicine      -0.0066144  0.0049704  -1.331 0.183273    
## EPrivate       -0.0071121  0.0015656  -4.543 5.57e-06 ***
## ECompetition   -0.0025141  0.0016908  -1.487 0.137059    
## EHardWork       0.0107800  0.0015584   6.918 4.67e-12 ***
## PIAB           -0.0011882  0.0040210  -0.296 0.767613    
## STOpportunity  -0.0136740  0.0019285  -7.091 1.36e-12 ***
## STFaith         0.0085475  0.0015268   5.598 2.18e-08 ***
## STImportant     0.0123821  0.0015425   8.027 1.03e-15 ***
## PNewspaper      0.0305232  0.0029140  10.475  < 2e-16 ***
## PMobile         0.0247284  0.0031659   7.811 5.82e-15 ***
## PEmail         -0.0182465  0.0032199  -5.667 1.46e-08 ***
## PSocial        -0.0169188  0.0032039  -5.281 1.29e-07 ***
## PDemImp         0.0068491  0.0021442   3.194 0.001403 ** 
## PDemCurrent    -0.0357470  0.0021376 -16.723  < 2e-16 ***
## PSatisfied     -0.1075365  0.0020387 -52.747  < 2e-16 ***
## MF             -0.0124910  0.0087918  -1.421 0.155397    
## Age             0.0031255  0.0003006  10.397  < 2e-16 ***
## Edu             0.0279792  0.0024125  11.598  < 2e-16 ***
## Employment     -0.0028275  0.0021679  -1.304 0.192157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8324 on 37443 degrees of freedom
##   (11852 observations deleted due to missingness)
## Multiple R-squared:  0.2331, Adjusted R-squared:  0.2325 
## F-statistic: 392.5 on 29 and 37443 DF,  p-value: < 2.2e-16

CCivilservice Regression Model

civilService_fit1 <-lm(paste("CCivilService ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(civilService_fit1)

## 
## Call:
## lm(formula = paste("CCivilService ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68310 -0.53522 -0.04386  0.56213  2.70629 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.0646250  0.0496034  41.623  < 2e-16 ***
## TPeople         0.1065775  0.0108187   9.851  < 2e-16 ***
## TFamily         0.0814748  0.0081009  10.057  < 2e-16 ***
## TNeighbourhood  0.1251903  0.0063978  19.568  < 2e-16 ***
## TKnow           0.0710298  0.0063698  11.151  < 2e-16 ***
## TMeet           0.0624669  0.0060448  10.334  < 2e-16 ***
## VFamily         0.0296303  0.0120893   2.451 0.014253 *  
## VFriends        0.0377155  0.0061758   6.107 1.03e-09 ***
## VLeisure       -0.0583793  0.0055811 -10.460  < 2e-16 ***
## VReligion       0.0295541  0.0042056   7.027 2.14e-12 ***
## HSatFin        -0.0017552  0.0018595  -0.944 0.345212    
## HMedicine      -0.0281608  0.0048151  -5.848 5.00e-09 ***
## EPrivate       -0.0074817  0.0015169  -4.932 8.16e-07 ***
## ECompetition   -0.0003192  0.0016363  -0.195 0.845347    
## EHardWork       0.0073731  0.0015081   4.889 1.02e-06 ***
## PIAB            0.0086458  0.0038940   2.220 0.026404 *  
## STOpportunity  -0.0202937  0.0018682 -10.863  < 2e-16 ***
## STFaith         0.0158413  0.0014799  10.705  < 2e-16 ***
## STImportant     0.0123390  0.0014947   8.255  < 2e-16 ***
## PNewspaper      0.0095516  0.0028212   3.386 0.000711 ***
## PMobile         0.0247785  0.0030684   8.075 6.93e-16 ***
## PEmail         -0.0135800  0.0031190  -4.354 1.34e-05 ***
## PSocial        -0.0063287  0.0031068  -2.037 0.041649 *  
## PDemImp         0.0027000  0.0020753   1.301 0.193253    
## PDemCurrent    -0.0311706  0.0020700 -15.058  < 2e-16 ***
## PSatisfied     -0.0631067  0.0019734 -31.979  < 2e-16 ***
## MF             -0.0219841  0.0085137  -2.582 0.009821 ** 
## Age             0.0012846  0.0002912   4.411 1.03e-05 ***
## Edu             0.0164163  0.0023356   7.029 2.12e-12 ***
## Employment      0.0057480  0.0020995   2.738 0.006188 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.806 on 37428 degrees of freedom
##   (11867 observations deleted due to missingness)
## Multiple R-squared:  0.1702, Adjusted R-squared:  0.1696 
## F-statistic: 264.8 on 29 and 37428 DF,  p-value: < 2.2e-16

CElections Regression Model

elections_fit1 <-lm(paste("CElections ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(elections_fit1)

## 
## Call:
## lm(formula = paste("CElections ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.74178 -0.57198 -0.00728  0.61101  2.65978 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.537e+00  5.214e-02  48.653  < 2e-16 ***
## TPeople         9.740e-02  1.139e-02   8.552  < 2e-16 ***
## TFamily         5.210e-02  8.518e-03   6.116 9.68e-10 ***
## TNeighbourhood  9.678e-02  6.723e-03  14.396  < 2e-16 ***
## TKnow           5.872e-02  6.693e-03   8.773  < 2e-16 ***
## TMeet           7.764e-02  6.358e-03  12.212  < 2e-16 ***
## VFamily         6.358e-03  1.273e-02   0.500 0.617412    
## VFriends        6.994e-03  6.484e-03   1.079 0.280754    
## VLeisure       -2.812e-02  5.863e-03  -4.796 1.63e-06 ***
## VReligion       2.662e-02  4.428e-03   6.013 1.84e-09 ***
## HSatFin        -1.223e-02  1.954e-03  -6.257 3.97e-10 ***
## HMedicine      -2.376e-02  5.059e-03  -4.697 2.65e-06 ***
## EPrivate       -5.834e-03  1.593e-03  -3.662 0.000250 ***
## ECompetition    1.922e-03  1.720e-03   1.118 0.263780    
## EHardWork       5.267e-03  1.585e-03   3.322 0.000893 ***
## PIAB           -4.998e-03  4.093e-03  -1.221 0.222039    
## STOpportunity  -1.019e-02  1.963e-03  -5.191 2.10e-07 ***
## STFaith         1.777e-02  1.555e-03  11.430  < 2e-16 ***
## STImportant     7.326e-03  1.571e-03   4.662 3.14e-06 ***
## PNewspaper      3.482e-02  2.970e-03  11.723  < 2e-16 ***
## PMobile         2.084e-02  3.225e-03   6.463 1.04e-10 ***
## PEmail         -4.279e-03  3.282e-03  -1.304 0.192316    
## PSocial        -3.612e-02  3.264e-03 -11.065  < 2e-16 ***
## PDemImp        -4.191e-03  2.184e-03  -1.919 0.054945 .  
## PDemCurrent    -5.185e-02  2.174e-03 -23.854  < 2e-16 ***
## PSatisfied     -7.920e-02  2.074e-03 -38.186  < 2e-16 ***
## MF              2.738e-02  8.956e-03   3.057 0.002236 ** 
## Age            -2.105e-05  3.062e-04  -0.069 0.945192    
## Edu             7.523e-03  2.456e-03   3.063 0.002191 ** 
## Employment     -2.445e-03  2.208e-03  -1.107 0.268107    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8489 on 37539 degrees of freedom
##   (11756 observations deleted due to missingness)
## Multiple R-squared:  0.2023, Adjusted R-squared:  0.2017 
## F-statistic: 328.4 on 29 and 37539 DF,  p-value: < 2.2e-16

CEnvOrg Regression Model

envOrg_fit1 <-lm(paste("CEnvOrg ~", paste(predictors, collapse = " + ")), data = OtherCountriesData)
summary(envOrg_fit1)

## 
## Call:
## lm(formula = paste("CEnvOrg ~", paste(predictors, collapse = " + ")), 
##     data = OtherCountriesData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3753 -0.4935 -0.1447  0.5987  2.4425 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.9259603  0.0522292  36.875  < 2e-16 ***
## TPeople         0.0362593  0.0114021   3.180 0.001474 ** 
## TFamily         0.0427891  0.0085068   5.030 4.93e-07 ***
## TNeighbourhood  0.0461439  0.0067344   6.852 7.40e-12 ***
## TKnow           0.0921628  0.0067015  13.753  < 2e-16 ***
## TMeet           0.0644677  0.0063716  10.118  < 2e-16 ***
## VFamily         0.0123119  0.0126803   0.971 0.331580    
## VFriends        0.0070642  0.0065141   1.084 0.278174    
## VLeisure        0.0147078  0.0058912   2.497 0.012545 *  
## VReligion       0.0226768  0.0044291   5.120 3.07e-07 ***
## HSatFin        -0.0122703  0.0019607  -6.258 3.94e-10 ***
## HMedicine       0.0126856  0.0050699   2.502 0.012350 *  
## EPrivate       -0.0122745  0.0016019  -7.662 1.87e-14 ***
## ECompetition   -0.0030567  0.0017278  -1.769 0.076878 .  
## EHardWork       0.0127929  0.0015929   8.031 9.94e-16 ***
## PIAB           -0.0044143  0.0040993  -1.077 0.281559    
## STOpportunity  -0.0160351  0.0019678  -8.149 3.79e-16 ***
## STFaith         0.0076186  0.0015595   4.885 1.04e-06 ***
## STImportant     0.0118278  0.0015770   7.500 6.53e-14 ***
## PNewspaper      0.0242427  0.0029757   8.147 3.85e-16 ***
## PMobile         0.0232832  0.0032422   7.181 7.04e-13 ***
## PEmail         -0.0119354  0.0032805  -3.638 0.000275 ***
## PSocial        -0.0026019  0.0032726  -0.795 0.426583    
## PDemImp        -0.0206731  0.0021909  -9.436  < 2e-16 ***
## PDemCurrent    -0.0221262  0.0021796 -10.151  < 2e-16 ***
## PSatisfied     -0.0258636  0.0020777 -12.448  < 2e-16 ***
## MF             -0.0417786  0.0089737  -4.656 3.24e-06 ***
## Age             0.0043046  0.0003066  14.039  < 2e-16 ***
## Edu             0.0235489  0.0024571   9.584  < 2e-16 ***
## Employment     -0.0060407  0.0022090  -2.735 0.006250 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.84 on 36573 degrees of freedom
##   (12722 observations deleted due to missingness)
## Multiple R-squared:  0.07992,    Adjusted R-squared:  0.07919 
## F-statistic: 109.5 on 29 and 36573 DF,  p-value: < 2.2e-16

Q3 Clustering

Q3 (a)

set.seed(9999)
ED <- read.csv("q3_external.csv", header = TRUE)
EDS <- ED
EDS[,2:6] <- scale(ED[,2:6])

library(cluster)

create function to calculate average silhouette score

i_silhouette_score <- function(k){
  km <- kmeans(EDS[,2:6], centers = k, nstart = 20)
  ss <- silhouette(km$cluster, dist(EDS[,2:6]))
  mean(ss[,3])
}

determine optimal number of groups

k <- 2:10

avg_sil <- sapply(k, i_silhouette_score)
plot(k, type= 'b', avg_sil, xlab = 'Number of clusters', ylab = 'Average Silhouette Scores')

perform kmeans clustering

EDSkfit <- kmeans(EDS[,2:6],2, nstart = 20)
#EDSkfit
table(actual= EDS$Country, fitted= EDSkfit$cluster)

##                           fitted
## actual                     1 2
##   Afghanistan              1 0
##   Albania                  1 0
##   Algeria                  1 0
##   Angola                   1 0
##   Antigua and Barbuda      1 0
##   Argentina                0 1
##   Armenia                  0 1
##   Australia                0 1
##   Austria                  0 1
##   Azerbaijan               1 0
##   Bahamas                  0 1
##   Bahrain                  0 1
##   Bangladesh               1 0
##   Barbados                 0 1
##   Belarus                  1 0
##   Belgium                  0 1
##   Belize                   1 0
##   Benin                    1 0
##   Bhutan                   1 0
##   Bolivia                  1 0
##   Bosnia and Herzegovina   1 0
##   Botswana                 1 0
##   Brazil                   0 1
##   Brunei Darussalam        0 1
##   Bulgaria                 0 1
##   Burkina Faso             1 0
##   Burundi                  1 0
##   Cabo Verde               1 0
##   Cambodia                 1 0
##   Cameroon                 1 0
##   Canada                   0 1
##   Central African Republic 1 0
##   Chad                     1 0
##   Chile                    0 1
##   China                    0 1
##   Colombia                 0 1
##   Comoros                  1 0
##   Congo                    1 0
##   Costa Rica               0 1
##   Côte d'Ivoire            1 0
##   Croatia                  0 1
##   Cuba                     1 0
##   Cyprus                   0 1
##   Czechia                  0 1
##   Denmark                  0 1
##   Djibouti                 1 0
##   Dominican Republic       1 0
##   Ecuador                  0 1
##   Egypt                    1 0
##   El Salvador              1 0
##   Equatorial Guinea        1 0
##   Eritrea                  1 0
##   Estonia                  0 1
##   Eswatini                 1 0
##   Ethiopia                 1 0
##   Fiji                     1 0
##   Finland                  0 1
##   France                   0 1
##   Gabon                    1 0
##   Gambia                   1 0
##   Georgia                  0 1
##   Germany                  0 1
##   Ghana                    1 0
##   Greece                   0 1
##   Grenada                  1 0
##   Guatemala                1 0
##   Guinea                   1 0
##   Guinea-Bissau            1 0
##   Guyana                   1 0
##   Haiti                    1 0
##   Honduras                 1 0
##   Hungary                  0 1
##   Iceland                  0 1
##   India                    1 0
##   Indonesia                1 0
##   Iran                     1 0
##   Iraq                     1 0
##   Ireland                  0 1
##   Israel                   0 1
##   Italy                    0 1
##   Jamaica                  1 0
##   Japan                    0 1
##   Jordan                   1 0
##   Kazakhstan               0 1
##   Kenya                    1 0
##   Kiribati                 1 0
##   Kuwait                   0 1
##   Kyrgyzstan               1 0
##   Laos                     1 0
##   Latvia                   0 1
##   Lebanon                  1 0
##   Lesotho                  1 0
##   Liberia                  1 0
##   Libya                    1 0
##   Lithuania                0 1
##   Luxembourg               0 1
##   Madagascar               1 0
##   Malawi                   1 0
##   Malaysia                 0 1
##   Maldives                 1 0
##   Mali                     1 0
##   Malta                    0 1
##   Mauritania               1 0
##   Mauritius                1 0
##   Mexico                   0 1
##   Moldova                  1 0
##   Mongolia                 1 0
##   Montenegro               1 0
##   Morocco                  1 0
##   Mozambique               1 0
##   Myanmar                  1 0
##   Namibia                  1 0
##   Nepal                    1 0
##   Netherlands              0 1
##   New Zealand              0 1
##   Nicaragua                1 0
##   Niger                    1 0
##   Nigeria                  1 0
##   North Korea              1 0
##   North Macedonia          1 0
##   Norway                   0 1
##   Oman                     1 0
##   Pakistan                 1 0
##   Panama                   0 1
##   Papua New Guinea         1 0
##   Paraguay                 1 0
##   Peru                     0 1
##   Philippines              1 0
##   Poland                   0 1
##   Portugal                 0 1
##   Qatar                    0 1
##   Romania                  0 1
##   Russia                   0 1
##   Rwanda                   1 0
##   Samoa                    1 0
##   Sao Tome and Principe    1 0
##   Saudi Arabia             0 1
##   Senegal                  1 0
##   Serbia                   1 0
##   Seychelles               0 1
##   Sierra Leone             1 0
##   Singapore                0 1
##   Slovakia                 0 1
##   Slovenia                 0 1
##   Solomon Islands          1 0
##   Somalia                  1 0
##   South Africa             1 0
##   South Korea              0 1
##   South Sudan              1 0
##   Spain                    0 1
##   Sri Lanka                1 0
##   Sudan                    1 0
##   Suriname                 1 0
##   Sweden                   0 1
##   Switzerland              0 1
##   Syrian Arab Republic     1 0
##   Tajikistan               1 0
##   Tanzania                 1 0
##   Thailand                 0 1
##   Timor-Leste              1 0
##   Togo                     1 0
##   Tonga                    1 0
##   Trinidad and Tobago      0 1
##   Tunisia                  1 0
##   Turkey                   0 1
##   Turkmenistan             1 0
##   Uganda                   1 0
##   Ukraine                  1 0
##   United Arab Emirates     0 1
##   United Kingdom           0 1
##   United States of America 0 1
##   Uruguay                  0 1
##   Uzbekistan               1 0
##   Vanuatu                  1 0
##   Venezuela                1 0
##   Viet Nam                 0 1
##   Yemen                    1 0
##   Zambia                   1 0
##   Zimbabwe                 1 0

ED$cluster <- EDSkfit$cluster
#View(ED)

countries similar to Malaysia

malaysia_cluster <- ED[ED$Country == "Malaysia", "cluster"]
similar_countries_cluster <- ED[which(ED$cluster == malaysia_cluster),]
print(similar_countries_cluster$Country)

##  [1] "Argentina"                "Armenia"                 
##  [3] "Australia"                "Austria"                 
##  [5] "Bahamas"                  "Bahrain"                 
##  [7] "Barbados"                 "Belgium"                 
##  [9] "Brazil"                   "Brunei Darussalam"       
## [11] "Bulgaria"                 "Canada"                  
## [13] "Chile"                    "China"                   
## [15] "Colombia"                 "Costa Rica"              
## [17] "Croatia"                  "Cyprus"                  
## [19] "Czechia"                  "Denmark"                 
## [21] "Ecuador"                  "Estonia"                 
## [23] "Finland"                  "France"                  
## [25] "Georgia"                  "Germany"                 
## [27] "Greece"                   "Hungary"                 
## [29] "Iceland"                  "Ireland"                 
## [31] "Israel"                   "Italy"                   
## [33] "Japan"                    "Kazakhstan"              
## [35] "Kuwait"                   "Latvia"                  
## [37] "Lithuania"                "Luxembourg"              
## [39] "Malaysia"                 "Malta"                   
## [41] "Mexico"                   "Netherlands"             
## [43] "New Zealand"              "Norway"                  
## [45] "Panama"                   "Peru"                    
## [47] "Poland"                   "Portugal"                
## [49] "Qatar"                    "South Korea"             
## [51] "Romania"                  "Russia"                  
## [53] "Saudi Arabia"             "Seychelles"              
## [55] "Singapore"                "Slovakia"                
## [57] "Slovenia"                 "Spain"                   
## [59] "Sweden"                   "Switzerland"             
## [61] "Thailand"                 "Trinidad and Tobago"     
## [63] "Turkey"                   "United Arab Emirates"    
## [65] "United Kingdom"           "United States of America"
## [67] "Uruguay"                  "Viet Nam"

#create cluster plot

rownames(EDS) <- ED$Country

fviz_cluster(EDSkfit, data = EDS[,2:6],
             palette=c("red", "blue"),
             ellipse.type = "euclid",
             star.plot = T,
             repel = T,
             ggtheme = theme())

find unique countries in VCData

unique(VCData$Country)

##  [1] "CAN" "MEX" "PAK" "NGA" "BGD" "RUS" "IDN" "USA" "PHL" "DEU" "AND" "BOL"
## [13] "NLD" "MAC" "MNG" "CHN" "ARM" "ZWE" "UKR" "AUS" "IND" "LBY" "NZL" "KAZ"
## [25] "TUR" "SVK" "CYP" "TJK" "COL" "IRN" "VNM" "UZB" "TUN" "BRA" "GBR" "CZE"
## [37] "MMR" "SRB" "GRC" "LBN" "GTM" "IRQ" "PER" "KGZ" "KEN" "THA" "KOR" "PRI"
## [49] "SGP" "HKG" "MYS" "EGY" "CHL" "MAR" "ECU" "VEN" "ROU" "JPN" "MDV" "NIC"
## [61] "ARG" "TWN" "JOR" "URY" "ETH"

Q3 (b)

filter dataset to include only cluster countries

ClusterCountries <- c("ARG", "ARM", "AUS", "BRA", "CAN", "CHL", "CHN", "COL", "CYP", "CZE",
"ECU", "DEU", "GRC", "JPN", "KAZ", "MEX", "NLD", "NZL", "PER",
"ROU", "RUS", "SGP", "SVK", "KOR", "THA", "TUR", "GBR", "USA", "URY", "VNM")

ClusterCountriesData <- CombinedData %>% filter(Country %in% ClusterCountries)
ClusterCountriesData$Group <- NULL
#View(ClusterCountriesData)

Finding the best predictors for CLUSTER COUNTRIES

CReligious Regression Model

religious_fit2 <-lm(paste("CReligious ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(religious_fit2)

## 
## Call:
## lm(formula = paste("CReligious ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54583 -0.58496 -0.01565  0.51228  2.69236 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.1761934  0.0712525  16.507  < 2e-16 ***
## TPeople         0.0322257  0.0139695   2.307  0.02107 *  
## TFamily         0.1061138  0.0110980   9.562  < 2e-16 ***
## TNeighbourhood  0.1104760  0.0092462  11.948  < 2e-16 ***
## TKnow          -0.0029553  0.0093824  -0.315  0.75278    
## TMeet           0.0396751  0.0087941   4.512 6.47e-06 ***
## VFamily         0.0392182  0.0151658   2.586  0.00972 ** 
## VFriends       -0.0380177  0.0087226  -4.359 1.32e-05 ***
## VLeisure       -0.0530996  0.0082885  -6.406 1.52e-10 ***
## VReligion       0.4397484  0.0055477  79.267  < 2e-16 ***
## HSatFin        -0.0086220  0.0027008  -3.192  0.00141 ** 
## HMedicine       0.0019499  0.0073291   0.266  0.79021    
## EPrivate        0.0007071  0.0022520   0.314  0.75354    
## ECompetition   -0.0031231  0.0024707  -1.264  0.20623    
## EHardWork       0.0149439  0.0021854   6.838 8.27e-12 ***
## PIAB           -0.0127858  0.0052819  -2.421  0.01550 *  
## STOpportunity  -0.0038602  0.0026150  -1.476  0.13991    
## STFaith        -0.0122099  0.0021145  -5.775 7.84e-09 ***
## STImportant    -0.0174129  0.0021213  -8.208 2.38e-16 ***
## PNewspaper      0.0147102  0.0037264   3.948 7.92e-05 ***
## PMobile        -0.0052281  0.0043053  -1.214  0.22463    
## PEmail         -0.0092228  0.0040669  -2.268  0.02335 *  
## PSocial        -0.0138346  0.0042588  -3.249  0.00116 ** 
## PDemImp         0.0161737  0.0030831   5.246 1.57e-07 ***
## PDemCurrent    -0.0038406  0.0030188  -1.272  0.20331    
## PSatisfied     -0.0236360  0.0028072  -8.420  < 2e-16 ***
## MF             -0.0486136  0.0115735  -4.200 2.68e-05 ***
## Age             0.0019484  0.0004007   4.863 1.17e-06 ***
## Edu             0.0081634  0.0032163   2.538  0.01115 *  
## Employment     -0.0070537  0.0028934  -2.438  0.01478 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7784 on 19060 degrees of freedom
##   (6386 observations deleted due to missingness)
## Multiple R-squared:  0.3223, Adjusted R-squared:  0.3212 
## F-statistic: 312.5 on 29 and 19060 DF,  p-value: < 2.2e-16

CPress Regression Model

press_fit2 <-lm(paste("CPress ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(press_fit2)

## 
## Call:
## lm(formula = paste("CPress ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.55568 -0.53787  0.03502  0.52172  2.16818 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.2691731  0.0700846  32.378  < 2e-16 ***
## TPeople         0.1322409  0.0136975   9.654  < 2e-16 ***
## TFamily         0.0595585  0.0109096   5.459 4.84e-08 ***
## TNeighbourhood  0.1234149  0.0090860  13.583  < 2e-16 ***
## TKnow          -0.0216410  0.0092254  -2.346 0.018995 *  
## TMeet           0.0771256  0.0086438   8.923  < 2e-16 ***
## VFamily         0.0025041  0.0148950   0.168 0.866491    
## VFriends       -0.0050908  0.0085796  -0.593 0.552948    
## VLeisure       -0.0290420  0.0081426  -3.567 0.000362 ***
## VReligion      -0.0017159  0.0054408  -0.315 0.752481    
## HSatFin        -0.0069011  0.0026554  -2.599 0.009360 ** 
## HMedicine      -0.0003295  0.0072115  -0.046 0.963561    
## EPrivate       -0.0046898  0.0022147  -2.118 0.034227 *  
## ECompetition   -0.0073477  0.0024278  -3.026 0.002477 ** 
## EHardWork       0.0075460  0.0021481   3.513 0.000444 ***
## PIAB           -0.0114822  0.0051839  -2.215 0.026775 *  
## STOpportunity  -0.0213710  0.0025671  -8.325  < 2e-16 ***
## STFaith         0.0061760  0.0020748   2.977 0.002917 ** 
## STImportant     0.0051852  0.0020837   2.488 0.012838 *  
## PNewspaper      0.0565323  0.0036531  15.475  < 2e-16 ***
## PMobile         0.0116193  0.0042232   2.751 0.005942 ** 
## PEmail          0.0017292  0.0039885   0.434 0.664629    
## PSocial        -0.0127841  0.0041766  -3.061 0.002210 ** 
## PDemImp         0.0184911  0.0030302   6.102 1.07e-09 ***
## PDemCurrent    -0.0162619  0.0029707  -5.474 4.45e-08 ***
## PSatisfied     -0.0616288  0.0027611 -22.320  < 2e-16 ***
## MF             -0.0146571  0.0113582  -1.290 0.196914    
## Age            -0.0000148  0.0003934  -0.038 0.969990    
## Edu             0.0232867  0.0031572   7.376 1.70e-13 ***
## Employment     -0.0015516  0.0028422  -0.546 0.585131    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7656 on 19136 degrees of freedom
##   (6310 observations deleted due to missingness)
## Multiple R-squared:  0.1492, Adjusted R-squared:  0.1479 
## F-statistic: 115.7 on 29 and 19136 DF,  p-value: < 2.2e-16

CTelevision Regression Model

television_fit2 <-lm(paste("CTelevision ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(television_fit2)

## 
## Call:
## lm(formula = paste("CTelevision ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.50890 -0.51909  0.02971  0.50489  2.11937 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.3047779  0.0680719  33.858  < 2e-16 ***
## TPeople         0.0843542  0.0133156   6.335 2.43e-10 ***
## TFamily         0.0749011  0.0106133   7.057 1.76e-12 ***
## TNeighbourhood  0.1197881  0.0088248  13.574  < 2e-16 ***
## TKnow          -0.0035554  0.0089611  -0.397 0.691551    
## TMeet           0.0553652  0.0083960   6.594 4.39e-11 ***
## VFamily         0.0108970  0.0145030   0.751 0.452444    
## VFriends        0.0004292  0.0083305   0.052 0.958910    
## VLeisure       -0.0272047  0.0079064  -3.441 0.000581 ***
## VReligion      -0.0015982  0.0052905  -0.302 0.762584    
## HSatFin        -0.0061466  0.0025807  -2.382 0.017239 *  
## HMedicine      -0.0012646  0.0070090  -0.180 0.856821    
## EPrivate       -0.0055837  0.0021516  -2.595 0.009462 ** 
## ECompetition   -0.0054399  0.0023591  -2.306 0.021128 *  
## EHardWork       0.0105431  0.0020872   5.051 4.43e-07 ***
## PIAB           -0.0086883  0.0050401  -1.724 0.084753 .  
## STOpportunity  -0.0180053  0.0024944  -7.218 5.46e-13 ***
## STFaith         0.0030921  0.0020181   1.532 0.125503    
## STImportant    -0.0015464  0.0020241  -0.764 0.444868    
## PNewspaper      0.0430018  0.0035542  12.099  < 2e-16 ***
## PMobile         0.0192633  0.0041050   4.693 2.72e-06 ***
## PEmail         -0.0095552  0.0038851  -2.459 0.013924 *  
## PSocial        -0.0062877  0.0040628  -1.548 0.121731    
## PDemImp         0.0270263  0.0029417   9.187  < 2e-16 ***
## PDemCurrent    -0.0150920  0.0028863  -5.229 1.72e-07 ***
## PSatisfied     -0.0667465  0.0026823 -24.884  < 2e-16 ***
## MF             -0.0602095  0.0110424  -5.453 5.03e-08 ***
## Age            -0.0005919  0.0003823  -1.548 0.121581    
## Edu             0.0425865  0.0030676  13.883  < 2e-16 ***
## Employment      0.0017832  0.0027627   0.645 0.518639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7455 on 19196 degrees of freedom
##   (6250 observations deleted due to missingness)
## Multiple R-squared:  0.1519, Adjusted R-squared:  0.1506 
## F-statistic: 118.5 on 29 and 19196 DF,  p-value: < 2.2e-16

CUnions Regression Model

unions_fit2 <-lm(paste("CUnions ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(unions_fit2)

## 
## Call:
## lm(formula = paste("CUnions ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68680 -0.55524  0.04165  0.53219  2.12814 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.0494521  0.0726543  28.208  < 2e-16 ***
## TPeople         0.1274032  0.0142086   8.967  < 2e-16 ***
## TFamily         0.0656061  0.0112728   5.820 5.99e-09 ***
## TNeighbourhood  0.1025118  0.0094460  10.852  < 2e-16 ***
## TKnow           0.0540421  0.0095769   5.643 1.70e-08 ***
## TMeet           0.1012732  0.0089984  11.255  < 2e-16 ***
## VFamily        -0.0073202  0.0154233  -0.475 0.635065    
## VFriends        0.0400002  0.0089124   4.488 7.23e-06 ***
## VLeisure       -0.0310266  0.0084889  -3.655 0.000258 ***
## VReligion      -0.0267813  0.0056657  -4.727 2.30e-06 ***
## HSatFin         0.0020994  0.0027658   0.759 0.447816    
## HMedicine      -0.0133589  0.0074778  -1.786 0.074038 .  
## EPrivate       -0.0193189  0.0023099  -8.364  < 2e-16 ***
## ECompetition   -0.0130539  0.0025313  -5.157 2.53e-07 ***
## EHardWork       0.0066405  0.0022392   2.966 0.003025 ** 
## PIAB            0.0004356  0.0053946   0.081 0.935638    
## STOpportunity  -0.0141016  0.0026652  -5.291 1.23e-07 ***
## STFaith         0.0068331  0.0021611   3.162 0.001570 ** 
## STImportant     0.0067258  0.0021753   3.092 0.001992 ** 
## PNewspaper      0.0328075  0.0038033   8.626  < 2e-16 ***
## PMobile         0.0223323  0.0044077   5.067 4.09e-07 ***
## PEmail         -0.0153554  0.0041433  -3.706 0.000211 ***
## PSocial         0.0106467  0.0043512   2.447 0.014422 *  
## PDemImp         0.0068868  0.0031468   2.189 0.028645 *  
## PDemCurrent    -0.0071974  0.0030806  -2.336 0.019484 *  
## PSatisfied     -0.0461116  0.0028630 -16.106  < 2e-16 ***
## MF             -0.0643364  0.0118028  -5.451 5.07e-08 ***
## Age             0.0019973  0.0004100   4.872 1.11e-06 ***
## Edu             0.0179176  0.0032801   5.463 4.75e-08 ***
## Employment      0.0069688  0.0029519   2.361 0.018249 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7815 on 18443 degrees of freedom
##   (7003 observations deleted due to missingness)
## Multiple R-squared:  0.1345, Adjusted R-squared:  0.1332 
## F-statistic: 98.86 on 29 and 18443 DF,  p-value: < 2.2e-16

CCourts Regression Model

courts_fit2 <-lm(paste("CCourts ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(courts_fit2)

## 
## Call:
## lm(formula = paste("CCourts ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.73393 -0.52403 -0.04758  0.55818  2.51880 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.9959774  0.0732911  27.234  < 2e-16 ***
## TPeople         0.1630897  0.0143083  11.398  < 2e-16 ***
## TFamily         0.0873433  0.0114072   7.657 2.00e-14 ***
## TNeighbourhood  0.1158712  0.0094974  12.200  < 2e-16 ***
## TKnow           0.1040286  0.0096409  10.790  < 2e-16 ***
## TMeet           0.0319911  0.0090369   3.540 0.000401 ***
## VFamily         0.0104735  0.0155894   0.672 0.501700    
## VFriends        0.0574147  0.0089739   6.398 1.61e-10 ***
## VLeisure       -0.0579654  0.0085106  -6.811 9.98e-12 ***
## VReligion       0.0092364  0.0056917   1.623 0.104655    
## HSatFin        -0.0088992  0.0027762  -3.206 0.001350 ** 
## HMedicine      -0.0745899  0.0075407  -9.892  < 2e-16 ***
## EPrivate        0.0055652  0.0023182   2.401 0.016378 *  
## ECompetition   -0.0015328  0.0025424  -0.603 0.546594    
## EHardWork       0.0096956  0.0022497   4.310 1.64e-05 ***
## PIAB            0.0092143  0.0054208   1.700 0.089182 .  
## STOpportunity  -0.0161504  0.0026855  -6.014 1.84e-09 ***
## STFaith         0.0185850  0.0021709   8.561  < 2e-16 ***
## STImportant     0.0101866  0.0021813   4.670 3.03e-06 ***
## PNewspaper      0.0103560  0.0038213   2.710 0.006733 ** 
## PMobile         0.0233043  0.0044195   5.273 1.36e-07 ***
## PEmail         -0.0032531  0.0041687  -0.780 0.435196    
## PSocial        -0.0033385  0.0043653  -0.765 0.444416    
## PDemImp         0.0091748  0.0031661   2.898 0.003762 ** 
## PDemCurrent    -0.0264925  0.0031057  -8.530  < 2e-16 ***
## PSatisfied     -0.0657661  0.0028870 -22.780  < 2e-16 ***
## MF             -0.0181360  0.0118716  -1.528 0.126610    
## Age            -0.0001053  0.0004117  -0.256 0.798054    
## Edu             0.0134099  0.0032974   4.067 4.78e-05 ***
## Employment      0.0027528  0.0029695   0.927 0.353927    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7984 on 19048 degrees of freedom
##   (6398 observations deleted due to missingness)
## Multiple R-squared:  0.2048, Adjusted R-squared:  0.2036 
## F-statistic: 169.1 on 29 and 19048 DF,  p-value: < 2.2e-16

CPParties Regression Model

pparties_fit2 <-lm(paste("CPParties ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(pparties_fit2)

## 
## Call:
## lm(formula = paste("CPParties ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.14174 -0.49995  0.06452  0.51573  2.26558 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.3915197  0.0689638  34.678  < 2e-16 ***
## TPeople         0.2026132  0.0134871  15.023  < 2e-16 ***
## TFamily         0.0730319  0.0107332   6.804 1.05e-11 ***
## TNeighbourhood  0.1214502  0.0089349  13.593  < 2e-16 ***
## TKnow           0.0139288  0.0090757   1.535 0.124866    
## TMeet           0.0681373  0.0085038   8.013 1.19e-15 ***
## VFamily         0.0102197  0.0146426   0.698 0.485221    
## VFriends        0.0320656  0.0084315   3.803 0.000143 ***
## VLeisure       -0.0775121  0.0079998  -9.689  < 2e-16 ***
## VReligion      -0.0053848  0.0053578  -1.005 0.314895    
## HSatFin        -0.0072237  0.0026105  -2.767 0.005661 ** 
## HMedicine      -0.0111225  0.0070904  -1.569 0.116739    
## EPrivate       -0.0085828  0.0021790  -3.939 8.21e-05 ***
## ECompetition   -0.0086100  0.0023918  -3.600 0.000319 ***
## EHardWork       0.0123578  0.0021152   5.842 5.23e-09 ***
## PIAB            0.0118645  0.0051064   2.323 0.020164 *  
## STOpportunity  -0.0199979  0.0025251  -7.920 2.51e-15 ***
## STFaith        -0.0069655  0.0020440  -3.408 0.000656 ***
## STImportant     0.0193494  0.0020503   9.437  < 2e-16 ***
## PNewspaper      0.0163469  0.0035994   4.542 5.62e-06 ***
## PMobile         0.0258871  0.0041548   6.231 4.74e-10 ***
## PEmail         -0.0275519  0.0039278  -7.015 2.38e-12 ***
## PSocial         0.0191849  0.0041095   4.668 3.06e-06 ***
## PDemImp         0.0263261  0.0029809   8.832  < 2e-16 ***
## PDemCurrent    -0.0067538  0.0029187  -2.314 0.020678 *  
## PSatisfied     -0.1111140  0.0027136 -40.948  < 2e-16 ***
## MF             -0.0244805  0.0111809  -2.189 0.028573 *  
## Age             0.0018662  0.0003873   4.818 1.46e-06 ***
## Edu             0.0409648  0.0031080  13.180  < 2e-16 ***
## Employment     -0.0008380  0.0027960  -0.300 0.764402    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7521 on 19062 degrees of freedom
##   (6384 observations deleted due to missingness)
## Multiple R-squared:  0.2568, Adjusted R-squared:  0.2556 
## F-statistic: 227.1 on 29 and 19062 DF,  p-value: < 2.2e-16

CParliament Regression Model

parliament_fit2 <-lm(paste("CParliament ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(parliament_fit2)

## 
## Call:
## lm(formula = paste("CParliament ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.98234 -0.50346  0.02038  0.53195  2.57863 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.4695345  0.0702432  35.157  < 2e-16 ***
## TPeople         0.1865382  0.0137336  13.583  < 2e-16 ***
## TFamily         0.0750346  0.0109339   6.863 6.97e-12 ***
## TNeighbourhood  0.1167913  0.0091044  12.828  < 2e-16 ***
## TKnow           0.0406956  0.0092477   4.401 1.09e-05 ***
## TMeet           0.0609645  0.0086657   7.035 2.06e-12 ***
## VFamily         0.0231418  0.0149359   1.549  0.12130    
## VFriends        0.0266446  0.0085905   3.102  0.00193 ** 
## VLeisure       -0.0820433  0.0081511 -10.065  < 2e-16 ***
## VReligion       0.0061257  0.0054572   1.123  0.26166    
## HSatFin        -0.0034520  0.0026616  -1.297  0.19465    
## HMedicine      -0.0198125  0.0072179  -2.745  0.00606 ** 
## EPrivate       -0.0094841  0.0022190  -4.274 1.93e-05 ***
## ECompetition   -0.0048226  0.0024377  -1.978  0.04791 *  
## EHardWork       0.0121235  0.0021554   5.625 1.88e-08 ***
## PIAB            0.0086338  0.0052015   1.660  0.09696 .  
## STOpportunity  -0.0209786  0.0025726  -8.155 3.71e-16 ***
## STFaith         0.0039165  0.0020805   1.882  0.05978 .  
## STImportant     0.0199664  0.0020865   9.570  < 2e-16 ***
## PNewspaper      0.0172890  0.0036650   4.717 2.41e-06 ***
## PMobile         0.0174258  0.0042304   4.119 3.82e-05 ***
## PEmail         -0.0220417  0.0039985  -5.513 3.58e-08 ***
## PSocial         0.0229316  0.0041852   5.479 4.33e-08 ***
## PDemImp         0.0201903  0.0030414   6.638 3.26e-11 ***
## PDemCurrent    -0.0204379  0.0029761  -6.867 6.74e-12 ***
## PSatisfied     -0.1264330  0.0027663 -45.704  < 2e-16 ***
## MF             -0.0222685  0.0113844  -1.956  0.05047 .  
## Age             0.0012231  0.0003943   3.102  0.00193 ** 
## Edu             0.0269764  0.0031647   8.524  < 2e-16 ***
## Employment      0.0003252  0.0028485   0.114  0.90912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7652 on 19026 degrees of freedom
##   (6420 observations deleted due to missingness)
## Multiple R-squared:  0.2955, Adjusted R-squared:  0.2944 
## F-statistic: 275.2 on 29 and 19026 DF,  p-value: < 2.2e-16

CCivilService Regression Model

civilService_fit2 <-lm(paste("CCivilService ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(civilService_fit2)

## 
## Call:
## lm(formula = paste("CCivilService ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.76264 -0.48833 -0.05746  0.54176  2.34033 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.1524923  0.0689769  31.206  < 2e-16 ***
## TPeople         0.1408549  0.0134687  10.458  < 2e-16 ***
## TFamily         0.0749386  0.0107415   6.977 3.12e-12 ***
## TNeighbourhood  0.1101538  0.0089369  12.326  < 2e-16 ***
## TKnow           0.1159068  0.0090688  12.781  < 2e-16 ***
## TMeet           0.0550542  0.0084990   6.478 9.54e-11 ***
## VFamily         0.0180942  0.0146186   1.238 0.215822    
## VFriends        0.0535308  0.0084351   6.346 2.26e-10 ***
## VLeisure       -0.0690031  0.0080019  -8.623  < 2e-16 ***
## VReligion       0.0097332  0.0053553   1.817 0.069159 .  
## HSatFin         0.0041785  0.0026117   1.600 0.109639    
## HMedicine      -0.0647370  0.0070879  -9.133  < 2e-16 ***
## EPrivate       -0.0081832  0.0021783  -3.757 0.000173 ***
## ECompetition   -0.0032871  0.0023884  -1.376 0.168760    
## EHardWork       0.0038368  0.0021129   1.816 0.069410 .  
## PIAB            0.0236871  0.0051073   4.638 3.54e-06 ***
## STOpportunity  -0.0235750  0.0025247  -9.338  < 2e-16 ***
## STFaith         0.0115581  0.0020418   5.661 1.53e-08 ***
## STImportant     0.0130458  0.0020483   6.369 1.95e-10 ***
## PNewspaper     -0.0043020  0.0035972  -1.196 0.231746    
## PMobile         0.0163953  0.0041569   3.944 8.04e-05 ***
## PEmail         -0.0146409  0.0039301  -3.725 0.000196 ***
## PSocial         0.0187948  0.0041149   4.568 4.97e-06 ***
## PDemImp         0.0135974  0.0029798   4.563 5.07e-06 ***
## PDemCurrent    -0.0089293  0.0029204  -3.058 0.002235 ** 
## PSatisfied     -0.0751402  0.0027128 -27.698  < 2e-16 ***
## MF             -0.0310702  0.0111731  -2.781 0.005428 ** 
## Age            -0.0004610  0.0003872  -1.191 0.233802    
## Edu             0.0075626  0.0031028   2.437 0.014806 *  
## Employment      0.0057487  0.0027943   2.057 0.039669 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7511 on 19031 degrees of freedom
##   (6415 observations deleted due to missingness)
## Multiple R-squared:  0.2113, Adjusted R-squared:  0.2101 
## F-statistic: 175.8 on 29 and 19031 DF,  p-value: < 2.2e-16

CElections Regression Model

elections_fit2 <-lm(paste("CElections ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(elections_fit2)

## 
## Call:
## lm(formula = paste("CElections ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.63741 -0.52204 -0.01926  0.56436  2.51336 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.5457439  0.0725289  35.100  < 2e-16 ***
## TPeople         0.1207126  0.0141720   8.518  < 2e-16 ***
## TFamily         0.0601023  0.0112828   5.327 1.01e-07 ***
## TNeighbourhood  0.1064910  0.0093917  11.339  < 2e-16 ***
## TKnow           0.0994317  0.0095428  10.420  < 2e-16 ***
## TMeet           0.0766460  0.0089421   8.571  < 2e-16 ***
## VFamily         0.0056561  0.0153934   0.367 0.713296    
## VFriends        0.0065462  0.0088611   0.739 0.460064    
## VLeisure       -0.0153524  0.0084161  -1.824 0.068141 .  
## VReligion       0.0269983  0.0056317   4.794 1.65e-06 ***
## HSatFin        -0.0041321  0.0027410  -1.508 0.131694    
## HMedicine      -0.0645674  0.0074588  -8.657  < 2e-16 ***
## EPrivate       -0.0064070  0.0022910  -2.797 0.005170 ** 
## ECompetition    0.0012537  0.0025106   0.499 0.617524    
## EHardWork       0.0027040  0.0022204   1.218 0.223315    
## PIAB            0.0038169  0.0053654   0.711 0.476855    
## STOpportunity  -0.0087485  0.0026538  -3.297 0.000981 ***
## STFaith         0.0119297  0.0021460   5.559 2.75e-08 ***
## STImportant     0.0065785  0.0021535   3.055 0.002255 ** 
## PNewspaper      0.0306070  0.0037823   8.092 6.21e-16 ***
## PMobile         0.0208850  0.0043675   4.782 1.75e-06 ***
## PEmail         -0.0058227  0.0041312  -1.409 0.158726    
## PSocial        -0.0118278  0.0043226  -2.736 0.006220 ** 
## PDemImp        -0.0070557  0.0031319  -2.253 0.024280 *  
## PDemCurrent    -0.0489983  0.0030681 -15.970  < 2e-16 ***
## PSatisfied     -0.0772403  0.0028521 -27.082  < 2e-16 ***
## MF              0.0370588  0.0117471   3.155 0.001609 ** 
## Age            -0.0018971  0.0004074  -4.656 3.25e-06 ***
## Edu            -0.0067826  0.0032638  -2.078 0.037711 *  
## Employment     -0.0050986  0.0029383  -1.735 0.082723 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.79 on 19041 degrees of freedom
##   (6405 observations deleted due to missingness)
## Multiple R-squared:  0.2442, Adjusted R-squared:  0.2431 
## F-statistic: 212.2 on 29 and 19041 DF,  p-value: < 2.2e-16

CEnvOrg Regression Model

envOrg_fit2 <-lm(paste("CEnvOrg ~", paste(predictors, collapse = " + ")), data = ClusterCountriesData)
summary(envOrg_fit2)

## 
## Call:
## lm(formula = paste("CEnvOrg ~", paste(predictors, collapse = " + ")), 
##     data = ClusterCountriesData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4322 -0.4628 -0.1382  0.5652  2.3017 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.092e+00  7.342e-02  28.501  < 2e-16 ***
## TPeople         7.596e-02  1.436e-02   5.289 1.25e-07 ***
## TFamily         4.584e-02  1.140e-02   4.020 5.84e-05 ***
## TNeighbourhood  4.550e-02  9.525e-03   4.777 1.79e-06 ***
## TKnow           1.134e-01  9.687e-03  11.707  < 2e-16 ***
## TMeet           5.786e-02  9.082e-03   6.371 1.92e-10 ***
## VFamily         3.004e-03  1.552e-02   0.194 0.846486    
## VFriends        9.053e-03  9.002e-03   1.006 0.314578    
## VLeisure       -1.142e-02  8.546e-03  -1.336 0.181435    
## VReligion       2.716e-03  5.720e-03   0.475 0.634885    
## HSatFin        -1.231e-02  2.788e-03  -4.415 1.02e-05 ***
## HMedicine       1.066e-02  7.563e-03   1.410 0.158636    
## EPrivate       -1.836e-02  2.328e-03  -7.887 3.27e-15 ***
## ECompetition   -6.230e-03  2.555e-03  -2.438 0.014779 *  
## EHardWork       6.002e-03  2.258e-03   2.658 0.007860 ** 
## PIAB            7.522e-05  5.438e-03   0.014 0.988963    
## STOpportunity  -2.696e-02  2.692e-03 -10.013  < 2e-16 ***
## STFaith         7.560e-03  2.181e-03   3.466 0.000529 ***
## STImportant     1.371e-02  2.189e-03   6.262 3.88e-10 ***
## PNewspaper      1.600e-02  3.846e-03   4.160 3.20e-05 ***
## PMobile         2.044e-02  4.447e-03   4.597 4.32e-06 ***
## PEmail         -1.158e-02  4.181e-03  -2.770 0.005615 ** 
## PSocial         7.625e-03  4.382e-03   1.740 0.081868 .  
## PDemImp        -1.553e-02  3.184e-03  -4.878 1.08e-06 ***
## PDemCurrent    -1.203e-02  3.114e-03  -3.864 0.000112 ***
## PSatisfied     -2.646e-02  2.891e-03  -9.153  < 2e-16 ***
## MF             -6.715e-02  1.192e-02  -5.632 1.81e-08 ***
## Age             3.884e-03  4.130e-04   9.405  < 2e-16 ***
## Edu             1.448e-02  3.308e-03   4.376 1.21e-05 ***
## Employment     -6.403e-03  2.975e-03  -2.153 0.031362 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7924 on 18586 degrees of freedom
##   (6860 observations deleted due to missingness)
## Multiple R-squared:  0.09385,    Adjusted R-squared:  0.09244 
## F-statistic: 66.38 on 29 and 18586 DF,  p-value: < 2.2e-16