This file illustrates the step-by-step process of how I conducting data pre-processing.
library(haven)
library(dplyr)
library(stargazer)
library(rlang)
df_05 <- read_dta('v1_20230401_W5_merge_17.dta')
df_nation <- read.csv('national_level.csv')
country_counts <- table(df_05$country)
print(country_counts)
##
## 1 2 3 4 5 6 7 8 9 10 11 13 14 15 18 19
## 1045 1200 1268 4941 1284 1200 1259 1200 1540 1002 1200 1237 1627 1630 5318 2516
## 20
## 2500
df <- subset(df_05, select = c('country', 'Year' ,'IDnumber', 'q1', 'q2', 'q3', 'q4',
'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q11', 'q12',
'q13', 'q14', 'q15', 'q16', 'q22', 'q46', 'q47',
'q58', 'q60', 'q62', 'q64', 'q65', 'q67', 'q82', 'q82a',
'q83', 'q83a', 'q84', 'q84a', 'q85', 'q85a', 'q98',
'q132', 'q133', 'q136', 'q166', 'q168', 'q169', 'q170',
'q171', 'q172', 'q173', 'SE2', 'SE3', 'SE5', 'SE5A',
'SE6', 'SE9', 'q134', 'w'))
df <- lapply(df, function(x) {
if (class(x)[1] %in% c("haven_labelled", "vctrs_vctr")) {
return(as.numeric(x))
} else {
return(x)
}
})
df <- as.data.frame(df)
Overview the data
head(df)
## country Year IDnumber q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13 q14 q15 q16
## 1 19 2021 1001 2 2 3 2 2 1 3 4 4 6 4 6 4 6 4 6
## 2 19 2021 1002 3 2 3 3 2 2 3 4 4 6 4 6 3 6 3 6
## 3 19 2021 1003 1 1 1 2 1 1 1 3 1 3 3 4 1 3 1 4
## 4 19 2021 1004 3 3 2 3 3 3 6 3 4 4 6 4 1 4 6 6
## 5 19 2021 1005 2 1 1 1 1 1 1 3 4 4 3 4 1 4 3 3
## 6 19 2021 1006 2 2 2 3 2 3 1 3 4 4 3 4 3 4 3 4
## q22 q46 q47 q58 q60 q62 q64 q65 q67 q82 q82a q83 q83a q84 q84a q85 q85a q98
## 1 2 2 2 2 2 2 3 4 2 1 1 1 1 1 2 2 1 2
## 2 2 2 2 2 3 3 3 2 1 1 1 2 2 7 0 1 2 2
## 3 2 3 2 1 3 1 2 1 1 2 1 1 1 2 2 1 1 4
## 4 2 3 2 2 3 1 2 1 3 1 1 1 2 2 1 1 1 2
## 5 2 1 1 1 2 3 2 1 3 1 1 1 1 2 2 1 1 1
## 6 2 4 5 2 2 1 2 3 2 2 2 2 1 7 0 1 2 1
## q132 q133 q136 q166 q168 q169 q170 q171 q172 q173 SE2 SE3 SE5 SE5A SE6 SE9
## 1 1 1 1 1 4 3 4 3 4 2 1 1970 3 3 1902 1
## 2 1 1 2 1 4 4 3 3 3 2 1 1988 3 3 1902 1
## 3 3 1 1 1 4 3 4 4 3 2 1 1968 7 5 50 1
## 4 1 8 1 1 4 2 3 3 2 3 1 1956 5 5 1902 1
## 5 1 1 1 1 3 3 4 3 3 2 1 1989 10 9 50 2
## 6 8 1 2 2 3 4 3 4 2 1 2 1969 3 3 50 2
## q134 w
## 1 4 0.7497833
## 2 2 0.7497833
## 3 5 0.4763111
## 4 5 1.2464163
## 5 2 0.5145350
## 6 1 1.3147015
We re-encode the data, where smaller numbers indicate higher preference for economic development, and larger numbers represent a stronger pursuit of democracy.
# Divided dataframe into China and non-China country (Data of China needs further manipulation)
df_china <- df %>% filter(country == 4) # 4941
df_singapore <- df %>% filter(country == 10) # 1002
df_not_china <- df %>% filter(country != 4 & country != 10) # 26024
# exclude columns of other countries
exclude_cols <- c("SE2", "SE3", "SE5", "SE5A", "SE6", "SE9", "country", "Year", "IDnumber") # 排除的欄位
check_cols <- setdiff(names(df_not_china), exclude_cols) # 需要進行檢查的欄位
df_not_china[check_cols] <- lapply(df_not_china[check_cols], function(x) ifelse(x >= 7 | x < 0, NA, x))
# exclude columns of China (especially q7 and q16)
exclude_cols_china <- c("q7", "q16", "SE2", "SE3", "SE5", "SE5A", "SE6", "SE9", "country", "Year", "IDnumber")
check_cols_china <- setdiff(names(df_china), exclude_cols_china)
df_china[check_cols_china] <- lapply(df_china[check_cols_china], function(x) ifelse(x >= 7 | x < 0, NA, x))
# exclude columns of Singapore (especially q16)
exclude_cols_singapore <- c("q16", "SE2", "SE3", "SE5", "SE5A", "SE6", "SE9", "country", "Year", "IDnumber")
check_cols_singapore <- setdiff(names(df_singapore), exclude_cols_singapore)
df_singapore[check_cols_singapore] <- lapply(df_singapore[check_cols_singapore], function(x) ifelse(x >= 7 | x < 0, NA, x))
df <- rbind(df_china, df_not_china, df_singapore)
countries_to_drop <- c(2, 12, 15, 18, 19, 20)
df <- subset(df, !(country %in% countries_to_drop)) # 18803
summary(df)
## country Year IDnumber q1
## Min. : 1.000 Min. :2018 Min. : 1 Min. :1.000
## 1st Qu.: 4.000 1st Qu.:2018 1st Qu.: 552 1st Qu.:2.000
## Median : 6.000 Median :2019 Median : 1074 Median :3.000
## Mean : 7.051 Mean :2019 Mean : 277891 Mean :2.873
## 3rd Qu.:10.000 3rd Qu.:2019 3rd Qu.: 3254 3rd Qu.:4.000
## Max. :14.000 Max. :2020 Max. :6169801 Max. :5.000
## NA's :454
## q2 q3 q4 q5 q6
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.00 Median :2.000 Median :3.000 Median :3.000 Median :2.000
## Mean :2.63 Mean :2.408 Mean :2.869 Mean :2.674 Mean :2.351
## 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :377 NA's :1821 NA's :76 NA's :124 NA's :1616
## q7 q8 q9 q10
## Min. :-1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:-1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median : 2.000 Median :3.000 Median :3.000 Median :3.000
## Mean : 1.802 Mean :2.816 Mean :2.712 Mean :2.944
## 3rd Qu.: 3.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. : 6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :616 NA's :1375 NA's :1022 NA's :1139
## q11 q12 q13 q14 q15
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.00
## Median :3.000 Median :3.000 Median :2.000 Median :3.000 Median :3.00
## Mean :2.876 Mean :2.807 Mean :2.294 Mean :2.643 Mean :2.81
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.00
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.00
## NA's :1372 NA's :1012 NA's :949 NA's :738 NA's :817
## q16 q22 q46 q47
## Min. :-1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:-1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median : 2.000 Median :2.000 Median :3.000 Median :2.000
## Mean : 1.642 Mean :1.688 Mean :2.677 Mean :2.603
## 3rd Qu.: 3.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. : 6.000 Max. :3.000 Max. :4.000 Max. :5.000
## NA's :1027 NA's :391 NA's :216 NA's :178
## q58 q60 q62 q64
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :3.000 Median :2.000
## Mean :1.819 Mean :2.141 Mean :2.567 Mean :2.447
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## NA's :578 NA's :936 NA's :641 NA's :986
## q65 q67 q82 q82a
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.921 Mean :2.147 Mean :1.578 Mean :1.514
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000 Max. :2.000 Max. :2.000
## NA's :651 NA's :1029 NA's :1859 NA's :558
## q83 q83a q84 q84a
## Min. :1.000 Min. :0.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.375 Mean :1.516 Mean :1.528 Mean :1.442
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## NA's :2384 NA's :756 NA's :2454 NA's :599
## q85 q85a q98 q132
## Min. :1.000 Min. :0.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :3.000 Median :1.000
## Mean :1.213 Mean :1.472 Mean :2.512 Mean :1.551
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :4.000 Max. :3.000
## NA's :1727 NA's :509 NA's :1675 NA's :2699
## q133 q136 q166 q168 q169
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:2.000
## Median :1.000 Median :2.000 Median :1.000 Median :3.00 Median :3.000
## Mean :1.218 Mean :1.938 Mean :1.477 Mean :2.61 Mean :2.576
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.00 3rd Qu.:3.000
## Max. :2.000 Max. :4.000 Max. :4.000 Max. :4.00 Max. :4.000
## NA's :2700 NA's :2022 NA's :235 NA's :2844 NA's :3118
## q170 q171 q172 q173
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.697 Mean :2.644 Mean :2.805 Mean :2.604
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## NA's :3037 NA's :3138 NA's :3076 NA's :2723
## SE2 SE3 SE5 SE5A
## Min. :-1.000 Min. : -1 Min. :-1.000 Min. :-1.00
## 1st Qu.: 1.000 1st Qu.:1959 1st Qu.: 4.000 1st Qu.: 7.00
## Median : 2.000 Median :1973 Median : 7.000 Median :11.00
## Mean : 1.522 Mean :1969 Mean : 6.153 Mean :12.25
## 3rd Qu.: 2.000 3rd Qu.:1986 3rd Qu.: 8.000 3rd Qu.:14.00
## Max. : 2.000 Max. :2002 Max. :99.000 Max. :99.00
##
## SE6 SE9 q134 w
## Min. : -1 Min. :-1.000 Min. :1.000 Min. :0.3665
## 1st Qu.: 40 1st Qu.:-1.000 1st Qu.:1.000 1st Qu.:0.7811
## Median : 60 Median : 1.000 Median :2.000 Median :0.9996
## Mean :4157 Mean : 0.735 Mean :2.381 Mean :0.9981
## 3rd Qu.:9990 3rd Qu.: 2.000 3rd Qu.:3.000 3rd Qu.:1.1032
## Max. :9999 Max. : 9.000 Max. :5.000 Max. :5.7556
## NA's :1516 NA's :4
# regime type
df$regime_type <- dplyr::recode(df$country, "1" = 1, "3" = 1, "4" = 0, "5" = 1, "6" = 0, "7" = 1,
"8" = 0, "9" = 1, "10" = 1, "11" = 0, "13" = 0, "14" = 0)
reverse_columns <- c('q7', 'q8', 'q9', 'q10', 'q11', 'q12', 'q13', 'q14', 'q15', 'q16')
# Define reverse function
reverse_factor_levels <- function(x) {
x <- as.factor(x)
levels(x) <- rev(levels(x))
x <- as.numeric(as.character(x))
return(x)
}
# Here we have to consider China & Singapore data again
df_china <- subset(df, country == 4)
df_singapore <- df %>% filter(country == 10)
df_not_china <- subset(df, country != 4 & country != 10)
summary(df_singapore)
## country Year IDnumber q1 q2
## Min. :10 Min. :2020 Min. : 60012 Min. :1.000 Min. :1.000
## 1st Qu.:10 1st Qu.:2020 1st Qu.:6020176 1st Qu.:2.000 1st Qu.:2.000
## Median :10 Median :2020 Median :6070851 Median :3.000 Median :2.000
## Mean :10 Mean :2020 Mean :5105947 Mean :2.957 Mean :2.612
## 3rd Qu.:10 3rd Qu.:2020 3rd Qu.:6116251 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :10 Max. :2020 Max. :6169801 Max. :5.000 Max. :5.000
## NA's :3
## q3 q4 q5 q6
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.902 Mean :2.827 Mean :2.784 Mean :2.739
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :23 NA's :2 NA's :5 NA's :24
## q7 q8 q9 q10 q11
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :2.00 Median :3.000 Median :3.000
## Mean :2.541 Mean :2.534 Mean :2.53 Mean :2.851 Mean :2.657
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :6.000 Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000
## NA's :7 NA's :21 NA's :8 NA's :21 NA's :15
## q12 q13 q14 q15 q16
## Min. :1.000 Min. :1.0 Min. :1.000 Min. :1.000 Min. :-1
## 1st Qu.:2.000 1st Qu.:2.0 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:-1
## Median :3.000 Median :2.0 Median :2.000 Median :3.000 Median :-1
## Mean :2.504 Mean :2.3 Mean :2.242 Mean :2.628 Mean :-1
## 3rd Qu.:3.000 3rd Qu.:3.0 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:-1
## Max. :6.000 Max. :6.0 Max. :6.000 Max. :6.000 Max. :-1
## NA's :6 NA's :16 NA's :5 NA's :5
## q22 q46 q47 q58
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :3.000 Median :2.000 Median :2.000
## Mean :1.746 Mean :2.635 Mean :2.469 Mean :1.855
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :2.000 Max. :4.000 Max. :5.000 Max. :4.000
## NA's :6 NA's :4 NA's :5
## q60 q62 q64 q65 q67
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.00
## Median :2.000 Median :3.000 Median :3.000 Median :2.000 Median :2.00
## Mean :2.133 Mean :2.609 Mean :2.753 Mean :2.041 Mean :2.26
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.00
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.00
## NA's :23 NA's :11 NA's :12 NA's :6 NA's :7
## q82 q82a q83 q83a
## Min. :1.000 Min. :0.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :1.000 Median :2.000
## Mean :1.601 Mean :1.538 Mean :1.446 Mean :1.541
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## NA's :63 NA's :2 NA's :57
## q84 q84a q85 q85a
## Min. :1.000 Min. :0.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :1.000 Median :1.000
## Mean :1.463 Mean :1.503 Mean :1.171 Mean :1.465
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## NA's :43 NA's :1 NA's :30 NA's :3
## q98 q132 q133 q136
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :3.000 Median :2.000 Median :1.000 Median :2.000
## Mean :2.714 Mean :1.732 Mean :1.309 Mean :1.893
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :3.000 Max. :2.000 Max. :4.000
## NA's :18 NA's :39 NA's :93 NA's :36
## q166 q168 q169 q170
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :1.000 Median :3.000 Median :3.000 Median :3.000
## Mean :1.492 Mean :2.814 Mean :2.834 Mean :2.889
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## NA's :6 NA's :73 NA's :61 NA's :52
## q171 q172 q173 SE2 SE3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1925
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1960
## Median :3.000 Median :3.000 Median :3.000 Median :2.000 Median :1974
## Mean :2.897 Mean :2.894 Mean :2.554 Mean :1.524 Mean :1973
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1987
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :2.000 Max. :1999
## NA's :54 NA's :59 NA's :36
## SE5 SE5A SE6 SE9 q134
## Min. : 1.000 Min. : 1.0 Min. : 10 Min. :1.000 Min. :1.000
## 1st Qu.: 5.000 1st Qu.:10.0 1st Qu.: 40 1st Qu.:1.000 1st Qu.:1.000
## Median : 7.000 Median :13.0 Median : 50 Median :1.000 Median :2.000
## Mean : 6.717 Mean :13.1 Mean :1553 Mean :1.353 Mean :2.126
## 3rd Qu.: 9.000 3rd Qu.:16.0 3rd Qu.: 60 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :10.000 Max. :99.0 Max. :9990 Max. :2.000 Max. :5.000
## NA's :26
## w regime_type
## Min. :0.4440 Min. :1
## 1st Qu.:0.9072 1st Qu.:1
## Median :1.0443 Median :1
## Mean :1.0000 Mean :1
## 3rd Qu.:1.1886 3rd Qu.:1
## Max. :1.7350 Max. :1
##
# 非中國的部分,所有需要反轉的欄位
for (col in reverse_columns) {
new_col_name <- paste0(col, "_n")
df_not_china <- df_not_china %>% mutate(!!new_col_name := reverse_factor_levels(!!sym(col)))
}
# 中國的部分,除了 'q7', 'q16' 以外的需要反轉的欄位
reverse_columns_china <- setdiff(reverse_columns, c('q7', 'q16'))
df_china$q13 <- factor(df_china$q13, levels = 1:6) # 確保 "q13" 欄位的 levels 為 1 到 6
for (col in reverse_columns_china) {
new_col_name <- paste0(col, "_n")
df_china <- df_china %>% mutate(!!new_col_name := reverse_factor_levels(!!sym(col)))
}
# 新加坡的部分,除了 q16' 以外的需要反轉的欄位
reverse_columns_singapore <- setdiff(reverse_columns, c('q16'))
for (col in reverse_columns_singapore) {
new_col_name <- paste0(col, "_n")
df_singapore <- df_singapore %>% mutate(!!new_col_name := reverse_factor_levels(!!sym(col)))
}
# 對非中國的部分計算 trust 和 trust_all
df_not_china <- df_not_china %>% mutate(trust = rowMeans(.[c('q7_n', 'q9_n', 'q10_n', 'q11_n', 'q12_n')], na.rm = TRUE))
df_not_china <- df_not_china %>% mutate(trust_all = rowMeans(.[c('q7_n', 'q8_n', 'q9_n', 'q10_n', 'q11_n', 'q12_n',
'q13_n', 'q14_n', 'q15_n', 'q16_n')], na.rm = TRUE))
# 對中國的部分計算 trust 和 trust_all,這裡我們將 'q7', 'q16' 排除在外
df_china <- df_china %>% mutate(trust = rowMeans(.[c('q9_n', 'q10_n', 'q11_n', 'q12_n')], na.rm = TRUE))# 扣掉 q7
df_china <- df_china %>% mutate(trust_all = rowMeans(.[c('q8_n', 'q9_n', 'q10_n', 'q11_n', 'q12_n',
'q13_n', 'q14_n', 'q15_n')], na.rm = TRUE)) # 扣掉 q7 和 q16
# 對新加坡的部分計算 trust 和 trust_all,這裡我們將 'q16' 排除在外
df_singapore <- df_singapore %>% mutate(trust = rowMeans(.[c('q7_n', 'q9_n', 'q10_n', 'q11_n', 'q12_n')], na.rm = TRUE))
df_singapore <- df_singapore %>% mutate(trust_all = rowMeans(.[c('q7_n', 'q8_n', 'q9_n', 'q10_n', 'q11_n', 'q12_n',
'q13_n', 'q14_n', 'q15_n')], na.rm = TRUE)) # 扣掉 q16
# Find all unique column names from the three dataframes
all_columns <- unique(c(names(df_china), names(df_singapore), names(df_not_china)))
# Define a function to add missing columns to a dataframe
add_missing_columns <- function(df, all_columns) {
missing_columns <- setdiff(all_columns, names(df))
for (col in missing_columns) {
df[[col]] <- NA
}
df
}
df_china <- add_missing_columns(df_china, all_columns)
df_singapore <- add_missing_columns(df_singapore, all_columns)
df_not_china <- add_missing_columns(df_not_china, all_columns)
df <- rbind(df_not_china, df_china, df_singapore)
df$q132_n <- as.numeric(recode(df$q132, `1` = 3, `2` = 1, `3` = 2))
df$q133_n <- 3 - df$q133
df$q136_n <- ifelse(is.na(df$q136), NA, ifelse(df$q136 %in% c(3, 4), 1, 2))
target_columns <- c('q168', 'q169', 'q170', 'q171', 'q172', 'q173')
for (col in target_columns) {
new_col <- paste0(col, "_n") # Generate the new column name
df[[new_col]] <- df[[col]] # Copy the original data
df[[new_col]][df[[new_col]] == 1 | df[[new_col]] == 2] <- 1 # Replace 1 and 2 with 1
df[[new_col]][df[[new_col]] == 3 | df[[new_col]] == 4] <- 2 # Replace 3 and 4 with 2
df[[new_col]] <- as.numeric(as.character(df[[new_col]]))
}
df <- df %>% mutate(effectiveness_all = rowMeans(.[c('q132_n', 'q133_n', 'q136_n', 'q168', 'q169', 'q170', 'q171', 'q172', 'q173')], na.rm = TRUE))
df <- df %>% mutate(effectiveness = rowMeans(.[c('q168', 'q169', 'q170', 'q171', 'q172', 'q173')], na.rm = TRUE))
reverse_columns <- c('q1', 'q2', 'q3', 'q4', 'q5', 'q6')
for (col in reverse_columns) {
new_col_name <- paste0(col, "_n")
df <- df %>% mutate(!!new_col_name := reverse_factor_levels(!!sym(col)))
}
df <- df %>% mutate(eco_assess = rowMeans(.[c('q1_n', 'q2_n', 'q3_n', 'q4_n', 'q5_n', 'q6_n')], na.rm = TRUE))
df$q82_n <- ifelse(df$q82 == 2, 0, 1)
df$q83_n <- ifelse(df$q83 == 2, 0, 1)
df$q84_n <- ifelse(df$q84 == 2, 0, 1)
df$q85_n <- ifelse(df$q85 == 2, 0, 1)
# Assign new value of combining two questions
assign_values <- function(x, y) {
result <- case_when(
x == 0 & y == 1 ~ 0,
x == 0 & y == 2 ~ 1,
x == 1 & y == 1 ~ 3,
x == 1 & y == 2 ~ 2,
)
return(result)
}
columns <- list(c('q82_n', 'q82a'), c('q83_n', 'q83a'), c('q84_n', 'q84a'), c('q85_n', 'q85a'))
for(col in columns) {
df[paste0(col[1], "_combine")] <- assign_values(df[[col[1]]], df[[col[2]]])
}
df <- df %>% mutate(regime_pre = rowMeans(.[c('q82_n', 'q83_n', 'q84_n', 'q85_n')], na.rm = TRUE))
df <- df %>% mutate(regime_preference = rowMeans(.[c('q82_n_combine', 'q83_n_combine', 'q84_n_combine',
'q85_n_combine')], na.rm = TRUE))
df_nation <- df_nation %>% mutate(FH_PR_n = reverse_factor_levels(FH_PR))
df_nation <- df_nation %>% mutate(FH_CL_n = reverse_factor_levels(FH_CL))
df_nation <- df_nation %>% mutate(FH_rating = (FH_PR_n + FH_CL_n) / 2)
df_nation <- df_nation %>% mutate(GDP_per_capita_current_log = log(GDP_per_capita_current))
df_nation <- df_nation %>% mutate(GDP_per_capita_constant_log = log(GDP_per_capita_constant))
# DV is re-code with (1, 2) -> 0, (3, 4) -> 1
df$q134_without <- ifelse(df$q134 %in% c(1, 2), 0,
ifelse(df$q134 %in% c(3, 4), 1, NA))
# DV in df is re-code with (1, 2) -> 0, (3, 4, 5) -> 1
df$q134_with <- ifelse(df$q134 %in% c(1, 2), 0,
ifelse(df$q134 %in% c(3, 4, 5), 1, NA))
## 重新排列
df <- df %>% select(country, Year, IDnumber, q1, q1_n, q2, q2_n, q3, q3_n, q4, q4_n, q5, q5_n,
q6, q6_n, q7, q7_n, q8, q8_n, q9, q9_n, q10, q10_n, q11, q11_n, q12, q12_n,
q13, q13_n, q14, q14_n, q15, q15_n, q16, q16_n, q22, q22_n, q46, q46_n, q47,
q47_n, q58, q60, q62, q64, q65, q67, q82, q82_n, q82a, q82_n_combine, q83,
q83_n, q83a, q83_n_combine, q84, q84_n, q84a, q84_n_combine, q85, q85_n, q85a,
q85_n_combine, q98, q132, q132_n, q133, q133_n, q136, q136_n, q166, q166_n,
q168, q168_n, q169, q169_n, q170, q170_n, q171, q171_n, q172, q172_n, q173,
q173_n, SE2, SE2_n, SE3, SE5, SE5A, SE6, SE9, SE9_n, effectiveness_all, effectiveness,
eco_assess, social_value, trust, trust_all, regime_pre, regime_preference, age,
q134, q134_with, q134_without, regime_type, w)
df <- merge(df, df_nation, by = "country")
df <- df %>% select(-country_en, -Survey.year)
summary(df)
## country Year IDnumber q1
## Min. : 1.000 Min. :2018 Min. : 1 Min. :1.000
## 1st Qu.: 4.000 1st Qu.:2018 1st Qu.: 552 1st Qu.:2.000
## Median : 6.000 Median :2019 Median : 1074 Median :3.000
## Mean : 7.051 Mean :2019 Mean : 277891 Mean :2.873
## 3rd Qu.:10.000 3rd Qu.:2019 3rd Qu.: 3254 3rd Qu.:4.000
## Max. :14.000 Max. :2020 Max. :6169801 Max. :5.000
## NA's :454
## q1_n q2 q2_n q3 q3_n
## Min. :1.000 Min. :1.00 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:3.00 1st Qu.:2.000 1st Qu.:3.000
## Median :3.000 Median :2.00 Median :4.00 Median :2.000 Median :4.000
## Mean :3.127 Mean :2.63 Mean :3.37 Mean :2.408 Mean :3.592
## 3rd Qu.:4.000 3rd Qu.:3.00 3rd Qu.:4.00 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.00 Max. :5.00 Max. :5.000 Max. :5.000
## NA's :454 NA's :377 NA's :377 NA's :1821 NA's :1821
## q4 q4_n q5 q5_n
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.869 Mean :3.131 Mean :2.674 Mean :3.326
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## NA's :76 NA's :76 NA's :124 NA's :124
## q6 q6_n q7 q7_n
## Min. :1.000 Min. :1.000 Min. :-1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:-1.000 1st Qu.:3.000
## Median :2.000 Median :4.000 Median : 2.000 Median :4.000
## Mean :2.351 Mean :3.649 Mean : 1.802 Mean :4.153
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.: 3.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. : 6.000 Max. :6.000
## NA's :1616 NA's :1616 NA's :616 NA's :5557
## q8 q8_n q9 q9_n
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.000 Median :4.000 Median :3.000 Median :4.000
## Mean :2.816 Mean :4.184 Mean :2.712 Mean :4.288
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :1375 NA's :1375 NA's :1022 NA's :1022
## q10 q10_n q11 q11_n
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.000 Median :4.000 Median :3.000 Median :4.000
## Mean :2.944 Mean :4.056 Mean :2.876 Mean :4.124
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :1139 NA's :1139 NA's :1372 NA's :1372
## q12 q12_n q13 q13_n
## Min. :1.000 Min. :1.000 Length:18803 Min. :1.000
## 1st Qu.:2.000 1st Qu.:4.000 Class :character 1st Qu.:4.000
## Median :3.000 Median :4.000 Mode :character Median :5.000
## Mean :2.807 Mean :4.193 Mean :4.706
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :6.000 Max. :6.000 Max. :6.000
## NA's :1012 NA's :1012 NA's :949
## q14 q14_n q15 q15_n q16
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.00 Min. :-1.000
## 1st Qu.:2.000 1st Qu.:4.000 1st Qu.:2.00 1st Qu.:4.00 1st Qu.:-1.000
## Median :3.000 Median :4.000 Median :3.00 Median :4.00 Median : 2.000
## Mean :2.643 Mean :4.357 Mean :2.81 Mean :4.19 Mean : 1.642
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:3.00 3rd Qu.:5.00 3rd Qu.: 3.000
## Max. :6.000 Max. :6.000 Max. :6.00 Max. :6.00 Max. : 6.000
## NA's :738 NA's :738 NA's :817 NA's :817 NA's :1027
## q16_n q22 q22_n q46
## Min. :1.00 Min. :1.000 Min. :0.0000 Min. :1.000
## 1st Qu.:3.00 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:2.000
## Median :4.00 Median :2.000 Median :0.0000 Median :3.000
## Mean :4.03 Mean :1.688 Mean :0.3412 Mean :2.677
## 3rd Qu.:5.00 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :6.00 Max. :3.000 Max. :1.0000 Max. :4.000
## NA's :6970 NA's :391 NA's :391 NA's :216
## q46_n q47 q47_n q58
## Min. :1.000 Min. :1.000 Min. :0.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :1.000 Median :2.000
## Mean :2.323 Mean :2.603 Mean :1.603 Mean :1.819
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :5.000 Max. :4.000 Max. :4.000
## NA's :216 NA's :178 NA's :178 NA's :578
## q60 q62 q64 q65
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.141 Mean :2.567 Mean :2.447 Mean :1.921
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## NA's :936 NA's :641 NA's :986 NA's :651
## q67 q82 q82_n q82a
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :0.0000 Median :2.000
## Mean :2.147 Mean :1.578 Mean :0.4218 Mean :1.514
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :4.000 Max. :2.000 Max. :1.0000 Max. :2.000
## NA's :1029 NA's :1859 NA's :1859 NA's :558
## q82_n_combine q83 q83_n q83a
## Min. :0.00 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :1.00 Median :1.000 Median :1.0000 Median :2.000
## Mean :1.43 Mean :1.375 Mean :0.6249 Mean :1.516
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.00 Max. :2.000 Max. :1.0000 Max. :2.000
## NA's :1984 NA's :2384 NA's :2384 NA's :756
## q83_n_combine q84 q84_n q84a
## Min. :0.000 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :0.0000 Median :2.000
## Mean :1.752 Mean :1.528 Mean :0.4721 Mean :1.442
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :1.0000 Max. :2.000
## NA's :2480 NA's :2454 NA's :2454 NA's :599
## q84_n_combine q85 q85_n q85a
## Min. :0.000 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.0000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.0000 Median :2.000
## Mean :1.515 Mean :1.213 Mean :0.7868 Mean :1.472
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :1.0000 Max. :2.000
## NA's :2544 NA's :1727 NA's :1727 NA's :509
## q85_n_combine q98 q132 q132_n q133
## Min. :0.00 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.00 Median :3.000 Median :1.000 Median :3.000 Median :1.000
## Mean :2.09 Mean :2.512 Mean :1.551 Mean :2.438 Mean :1.218
## 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :3.00 Max. :4.000 Max. :3.000 Max. :3.000 Max. :2.000
## NA's :1837 NA's :1675 NA's :2699 NA's :2699 NA's :2700
## q133_n q136 q136_n q166
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :2.000 Median :1.000
## Mean :1.782 Mean :1.938 Mean :1.888 Mean :1.477
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :4.000 Max. :2.000 Max. :4.000
## NA's :2700 NA's :2022 NA's :2022 NA's :235
## q166_n q168 q168_n q169 q169_n
## Min. :1.000 Min. :1.00 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:2.000 1st Qu.:1.000
## Median :4.000 Median :3.00 Median :2.00 Median :3.000 Median :2.000
## Mean :3.523 Mean :2.61 Mean :1.57 Mean :2.576 Mean :1.542
## 3rd Qu.:4.000 3rd Qu.:3.00 3rd Qu.:2.00 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.00 Max. :2.00 Max. :4.000 Max. :2.000
## NA's :235 NA's :2844 NA's :2844 NA's :3118 NA's :3118
## q170 q170_n q171 q171_n
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :3.000 Median :2.000 Median :3.000 Median :2.000
## Mean :2.697 Mean :1.636 Mean :2.644 Mean :1.598
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :2.000 Max. :4.000 Max. :2.000
## NA's :3037 NA's :3037 NA's :3138 NA's :3138
## q172 q172_n q173 q173_n
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:2.000 1st Qu.:1.000
## Median :3.000 Median :2.00 Median :3.000 Median :2.000
## Mean :2.805 Mean :1.69 Mean :2.604 Mean :1.578
## 3rd Qu.:3.000 3rd Qu.:2.00 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :2.00 Max. :4.000 Max. :2.000
## NA's :3076 NA's :3076 NA's :2723 NA's :2723
## SE2 SE2_n SE3 SE5
## Min. :-1.000 Min. :0.0000 Min. : -1 7 :5039
## 1st Qu.: 1.000 1st Qu.:0.0000 1st Qu.:1959 9 :3276
## Median : 2.000 Median :0.0000 Median :1973 5 :2408
## Mean : 1.522 Mean :0.4764 Mean :1969 3 :2329
## 3rd Qu.: 2.000 3rd Qu.:1.0000 3rd Qu.:1986 6 :1337
## Max. : 2.000 Max. :1.0000 Max. :2002 2 :1292
## (Other):3122
## SE5A SE6 SE9 SE9_n
## Min. :-1.00 9990 :6676 Min. :-1.000 Min. :0.0000
## 1st Qu.: 7.00 60 :5061 1st Qu.:-1.000 1st Qu.:0.0000
## Median :11.00 40 :2525 Median : 1.000 Median :1.0000
## Mean :12.25 10 :1291 Mean : 0.735 Mean :0.7463
## 3rd Qu.:14.00 9999 :1089 3rd Qu.: 2.000 3rd Qu.:1.0000
## Max. :99.00 20 : 782 Max. : 9.000 Max. :1.0000
## (Other):1379
## effectiveness_all effectiveness eco_assess social_value
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.111 1st Qu.:2.333 1st Qu.:2.833 1st Qu.:1.833
## Median :2.444 Median :2.667 Median :3.400 Median :2.167
## Mean :2.427 Mean :2.651 Mean :3.350 Mean :2.174
## 3rd Qu.:2.750 3rd Qu.:3.000 3rd Qu.:3.833 3rd Qu.:2.500
## Max. :4.000 Max. :4.000 Max. :5.000 Max. :4.000
## NA's :938 NA's :1828 NA's :14 NA's :147
## trust trust_all regime_pre regime_preference
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.:3.400 1st Qu.:3.667 1st Qu.:0.5000 1st Qu.:1.333
## Median :4.333 Median :4.375 Median :0.5000 Median :1.667
## Mean :4.212 Mean :4.276 Mean :0.5783 Mean :1.696
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:0.7500 3rd Qu.:2.000
## Max. :6.000 Max. :6.000 Max. :1.0000 Max. :3.000
## NA's :358 NA's :262 NA's :754 NA's :813
## age q134 q134_with q134_without
## Min. : 18.00 Min. :1.000 Min. :0.0000 Min. :0.000
## 1st Qu.: 33.00 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.000
## Median : 47.00 Median :2.000 Median :0.0000 Median :0.000
## Mean : 47.43 Mean :2.381 Mean :0.3608 Mean :0.255
## 3rd Qu.: 60.00 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1021.00 Max. :5.000 Max. :1.0000 Max. :1.000
## NA's :33 NA's :1516 NA's :1516 NA's :3971
## regime_type w FH_PR FH_CL
## Min. :0.0000 Min. :0.3665 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.7811 1st Qu.:2.000 1st Qu.:2.000
## Median :0.0000 Median :0.9996 Median :4.000 Median :4.000
## Mean :0.3934 Mean :0.9981 Mean :4.323 Mean :4.037
## 3rd Qu.:1.0000 3rd Qu.:1.1032 3rd Qu.:7.000 3rd Qu.:6.000
## Max. :1.0000 Max. :5.7556 Max. :7.000 Max. :6.000
## NA's :4
## confusion_society inflation unemployment inequality
## Min. :0.0000 Min. :0.00600 Min. :0.00800 Min. :30.70
## 1st Qu.:0.0000 1st Qu.:0.01200 1st Qu.:0.02400 1st Qu.:32.80
## Median :1.0000 Median :0.01600 Median :0.03800 Median :38.40
## Mean :0.5699 Mean :0.02407 Mean :0.03319 Mean :36.81
## 3rd Qu.:1.0000 3rd Qu.:0.03200 3rd Qu.:0.03800 3rd Qu.:38.50
## Max. :1.0000 Max. :0.06900 Max. :0.07800 Max. :45.90
##
## eco_growth_rate GDP_per_capita_constant GDP_per_capita_current
## Min. :0.01100 Min. : 1488 Min. : 1275
## 1st Qu.:0.04200 1st Qu.: 3741 1st Qu.: 3708
## Median :0.06000 Median : 9619 Median : 9905
## Mean :0.05396 Mean :13766 Mean :14507
## 3rd Qu.:0.06700 3rd Qu.:10778 3rd Qu.:11074
## Max. :0.06900 Max. :61340 Max. :65831
##
## FH_PR_n FH_CL_n FH_rating GDP_per_capita_current_log
## Min. :1.000 Min. :1.000 Min. :1.000 Min. : 7.151
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 8.218
## Median :3.000 Median :3.000 Median :3.000 Median : 9.201
## Mean :3.258 Mean :2.963 Mean :3.111 Mean : 9.038
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.: 9.312
## Max. :7.000 Max. :6.000 Max. :6.500 Max. :11.095
##
## GDP_per_capita_constant_log
## Min. : 7.306
## 1st Qu.: 8.227
## Median : 9.172
## Mean : 9.023
## 3rd Qu.: 9.285
## Max. :11.024
##
# write.csv(df, "w5_all.csv", row.names = FALSE)
# save(df, file = "w5_all.rda")
### 1. The original model
logit.fit1 <- glm(q134_with ~ trust + effectiveness + eco_assess + social_value
+ regime_preference + + inflation + unemployment + inequality +
eco_growth_rate + q46_n + q47_n + q166_n + q22_n + SE9_n + SE5
+ SE6 + SE2_n + SE3 + GDP_per_capita_constant_log + FH_rating,
data = df, family = binomial())
logit.fit2 <- glm(q134_with ~ trust_all + effectiveness + eco_assess + social_value
+ q46_n + q47_n + q166_n + q22_n + SE9_n + SE5 + SE6 + SE2_n + SE3,
data = df, family = binomial())
### 2. Adjusted model on trust
logit.fit3 <- glm(q134_without ~ trust + effectiveness + eco_assess + social_value
+ q46_n + q47_n + q166_n + q22_n + SE9_n + SE5 + SE6 + SE2_n + SE3,
data = df, family = binomial())
logit.fit4 <- glm(q134_without ~ trust_all + effectiveness + eco_assess + social_value
+ q46_n + q47_n + q166_n + q22_n + SE9_n + SE5 + SE6 + SE2_n + SE3,
data = df, family = binomial())
stargazer(list(logit.fit1, logit.fit2, logit.fit3, logit.fit4), type = "text", title = "Logistic Regression Results")
##
## Logistic Regression Results
## =======================================================================
## Dependent variable:
## -------------------------------------------
## q134_with q134_without
## (1) (2) (3) (4)
## -----------------------------------------------------------------------
## trust 0.088*** 0.073***
## (0.025) (0.026)
##
## trust_all 0.120*** 0.050*
## (0.025) (0.029)
##
## effectiveness 0.644*** 0.646*** 0.569*** 0.572***
## (0.036) (0.033) (0.039) (0.039)
##
## eco_assess 0.092*** 0.114*** 0.146*** 0.167***
## (0.035) (0.032) (0.038) (0.037)
##
## social_value 0.072* 0.115*** 0.084* 0.077
## (0.043) (0.041) (0.047) (0.047)
##
## regime_preference 0.218***
## (0.033)
##
## inflation 11.451***
## (2.579)
##
## unemployment -11.177***
## (1.566)
##
## inequality -0.031***
## (0.009)
##
## eco_growth_rate 22.639***
## (2.021)
##
## q46_n 0.126*** 0.159*** 0.240*** 0.240***
## (0.024) (0.024) (0.028) (0.028)
##
## q47_n 0.009 0.011 0.003 0.004
## (0.015) (0.014) (0.017) (0.017)
##
## q166_n 0.029 0.042 0.050 0.058
## (0.032) (0.031) (0.036) (0.036)
##
## q22_n 0.147*** 0.172*** 0.165*** 0.169***
## (0.040) (0.038) (0.045) (0.045)
##
## SE9_n -0.103** -0.014 -0.079 -0.074
## (0.046) (0.044) (0.050) (0.050)
##
## SE52 0.118 -0.011 0.229 0.219
## (0.127) (0.124) (0.154) (0.154)
##
## SE53 0.065 -0.113 0.140 0.125
## (0.116) (0.112) (0.142) (0.141)
##
## SE54 0.003 -0.157 -0.070 -0.087
## (0.141) (0.138) (0.175) (0.175)
##
## SE55 0.017 -0.205* -0.023 -0.031
## (0.114) (0.111) (0.141) (0.141)
##
## SE56 0.065 0.021 0.216 0.204
## (0.123) (0.120) (0.150) (0.149)
##
## SE57 0.055 -0.169 0.098 0.077
## (0.110) (0.106) (0.136) (0.135)
##
## SE58 0.201 -0.081 0.239 0.210
## (0.127) (0.123) (0.152) (0.152)
##
## SE59 0.092 -0.212* 0.092 0.067
## (0.113) (0.109) (0.139) (0.138)
##
## SE510 0.472*** 0.053 0.541*** 0.514***
## (0.163) (0.157) (0.182) (0.182)
##
## SE511 -11.808 -11.514 -11.865 -11.866
## (324.744) (324.744) (535.411) (535.411)
##
## SE599 0.729 0.332 0.411 0.406
## (0.543) (0.519) (0.594) (0.594)
##
## SE60 2.225** 1.757* 0.743 0.740
## (1.079) (1.060) (1.463) (1.461)
##
## SE61 0.676 -0.237 0.084 0.082
## (0.756) (0.752) (0.858) (0.858)
##
## SE610 0.864 0.097 0.351 0.354
## (0.750) (0.744) (0.850) (0.851)
##
## SE620 1.095 0.413 0.408 0.431
## (0.750) (0.745) (0.852) (0.852)
##
## SE628 13.283 12.979 14.686 14.698
## (132.370) (131.519) (304.251) (304.329)
##
## SE630 -10.805 -11.077 -11.701 -11.695
## (188.852) (207.432) (346.870) (345.765)
##
## SE640 0.796 -0.228 -0.093 -0.093
## (0.749) (0.743) (0.849) (0.850)
##
## SE641 1.503 0.627 0.997 0.990
## (1.602) (1.598) (1.649) (1.650)
##
## SE642 0.699 0.020 0.549 0.553
## (0.849) (0.841) (0.936) (0.936)
##
## SE650 0.723 -0.238 0.032 0.037
## (0.768) (0.763) (0.870) (0.870)
##
## SE660 1.018 0.485 0.552 0.555
## (0.746) (0.742) (0.848) (0.848)
##
## SE661 1.657* 0.930 1.341 1.342
## (1.004) (0.990) (1.097) (1.096)
##
## SE663 -11.472 -11.901 -12.236 -12.196
## (324.745) (324.745) (535.412) (535.412)
##
## SE670 0.595 -0.398 -0.315 -0.300
## (0.991) (0.988) (1.148) (1.148)
##
## SE671 0.145 -0.964 -12.857 -12.847
## (1.358) (1.360) (253.545) (253.610)
##
## SE672 0.774 -0.030 0.259 0.265
## (0.841) (0.834) (0.937) (0.937)
##
## SE673 0.672 -0.095 0.301 0.308
## (0.995) (0.989) (1.071) (1.071)
##
## SE674 -0.149 -0.931 -0.722 -0.716
## (0.851) (0.845) (0.952) (0.952)
##
## SE675 0.777 0.597 0.887 0.882
## (0.814) (0.810) (0.912) (0.913)
##
## SE676 0.564 -0.392 -0.218 -0.217
## (0.765) (0.761) (0.870) (0.870)
##
## SE677 0.147 -0.752 -0.983 -0.989
## (1.066) (1.064) (1.343) (1.343)
##
## SE680 1.304* 0.757 0.873 0.869
## (0.780) (0.773) (0.881) (0.881)
##
## SE6201 2.266 1.867 1.757 1.735
## (1.387) (1.392) (1.499) (1.500)
##
## SE6202 2.453** 2.071* 2.277* 2.274*
## (1.119) (1.115) (1.205) (1.205)
##
## SE69990 1.047 0.635 0.559 0.575
## (0.745) (0.741) (0.847) (0.848)
##
## SE69998 -10.672 -10.965 -11.310 -11.279
## (324.745) (324.745) (535.412) (535.412)
##
## SE69999 1.358* 1.094 0.908 0.914
## (0.749) (0.744) (0.851) (0.851)
##
## SE2_n 0.068* 0.073** 0.109*** 0.109***
## (0.037) (0.036) (0.042) (0.041)
##
## SE3 0.0002 0.0002 0.001 0.001
## (0.0004) (0.0003) (0.001) (0.001)
##
## GDP_per_capita_constant_log 0.440***
## (0.057)
##
## FH_rating -0.082***
## (0.020)
##
## Constant -9.164*** -4.757*** -7.153*** -7.211***
## (1.277) (0.987) (1.646) (1.665)
##
## -----------------------------------------------------------------------
## Observations 15,624 15,838 13,617 13,653
## Log Likelihood -9,333.263 -9,654.294 -7,404.680 -7,426.395
## Akaike Inf. Crit. 18,780.530 19,408.590 14,909.360 14,952.790
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
df_taiwan <- df %>% filter(country == 7)
logit.taiwan <- glm(q134_with ~ trust_all + effectiveness + eco_assess + social_value
+ q46_n + q47_n + q166_n + q22_n + SE9_n + SE5 + SE6 + SE2_n + SE3,
data = df_taiwan, family = binomial())
df_japan <- df %>% filter(country == 1)
logit.japan <- glm(q134_with ~ trust_all + effectiveness + eco_assess + social_value
+ q46_n + q47_n + q166_n + q22_n + SE9_n + SE5 + SE6 + SE2_n + SE3,
data = df_japan, family = binomial())
stargazer(list(logit.taiwan, logit.japan), type = "text", title = "Logistic Regression Results")
##
## Logistic Regression Results
## ==============================================
## Dependent variable:
## ----------------------------
## q134_with
## (1) (2)
## ----------------------------------------------
## trust_all 0.316** -0.258**
## (0.147) (0.130)
##
## effectiveness 1.495*** 1.358***
## (0.229) (0.198)
##
## eco_assess 0.381** 0.042
## (0.167) (0.161)
##
## social_value 0.649*** -0.070
## (0.250) (0.199)
##
## q46_n 0.234** 0.232**
## (0.119) (0.110)
##
## q47_n 0.065 -0.067
## (0.067) (0.076)
##
## q166_n 0.068 -0.254**
## (0.145) (0.119)
##
## q22_n 0.061 0.133
## (0.175) (0.153)
##
## SE9_n -0.160 0.115
## (0.187) (0.178)
##
## SE52 0.930
## (0.828)
##
## SE53 -0.221 -14.171
## (0.678) (478.514)
##
## SE54 -0.837
## (1.240)
##
## SE55 -0.712
## (0.700)
##
## SE56 -0.060 -13.666
## (0.836) (478.514)
##
## SE57 -0.360 -14.223
## (0.647) (478.514)
##
## SE58 -0.113 -13.642
## (0.650) (478.514)
##
## SE59 -0.114 -13.846
## (0.653) (478.514)
##
## SE510 0.021 -13.408
## (0.692) (478.515)
##
## SE599 -13.278
## (882.744)
##
## SE511 -27.878
## (1,004.097)
##
## SE610 1.094* 0.583
## (0.649) (1.155)
##
## SE620 0.155 0.755
## (0.336) (1.501)
##
## SE630 -10.613 -14.395
## (882.744) (882.744)
##
## SE640 -14.067
## (882.744)
##
## SE660 -0.223 1.338*
## (0.235) (0.778)
##
## SE661 1.701
## (1.066)
##
## SE676 -0.202
## (0.282)
##
## SE677 -1.173
## (1.111)
##
## SE675 0.667
## (0.863)
##
## SE680 0.792 0.895
## (0.731) (0.879)
##
## SE69990 -0.078 1.127
## (0.241) (0.774)
##
## SE69999 -9.196
## (882.744)
##
## SE2_n 0.333* -0.078
## (0.177) (0.159)
##
## SE3 0.0004 0.001
## (0.001) (0.006)
##
## Constant -10.752*** 7.960
## (1.620) (478.647)
##
## ----------------------------------------------
## Observations 1,133 896
## Log Likelihood -463.025 -557.918
## Akaike Inf. Crit. 990.050 1,169.837
## ==============================================
## Note: *p<0.1; **p<0.05; ***p<0.01