This file illustrates the step-by-step process of how I conducting data pre-processing.
library(haven)
library(dplyr)
library(stargazer)
df_05 <- read_dta('v1_20230401_W5_merge_17.dta')
df <- subset(df_05, select = c('country', 'Year' ,'IDnumber', 'q1', 'q2', 'q3', 'q4',
'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q11', 'q12',
'q13', 'q14', 'q15', 'q16', 'q22', 'q46',
'q47', 'q58', 'q60', 'q62', 'q64', 'q65', 'q67',
'q98', 'q132', 'q133', 'q136', 'q166', 'q168',
'q169', 'q170', 'q171', 'q172', 'q173',
'SE2', 'SE3', 'SE5', 'SE5A', 'SE6', 'SE9', 'q134', 'w'))
df <- lapply(df, function(x) {
if (class(x)[1] %in% c("haven_labelled", "vctrs_vctr")) {
return(as.numeric(x))
} else {
return(x)
}
})
df <- as.data.frame(df)
Overview the data
head(df)
## country Year IDnumber q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13 q14 q15 q16
## 1 19 2021 1001 2 2 3 2 2 1 3 4 4 6 4 6 4 6 4 6
## 2 19 2021 1002 3 2 3 3 2 2 3 4 4 6 4 6 3 6 3 6
## 3 19 2021 1003 1 1 1 2 1 1 1 3 1 3 3 4 1 3 1 4
## 4 19 2021 1004 3 3 2 3 3 3 6 3 4 4 6 4 1 4 6 6
## 5 19 2021 1005 2 1 1 1 1 1 1 3 4 4 3 4 1 4 3 3
## 6 19 2021 1006 2 2 2 3 2 3 1 3 4 4 3 4 3 4 3 4
## q22 q46 q47 q58 q60 q62 q64 q65 q67 q98 q132 q133 q136 q166 q168 q169 q170
## 1 2 2 2 2 2 2 3 4 2 2 1 1 1 1 4 3 4
## 2 2 2 2 2 3 3 3 2 1 2 1 1 2 1 4 4 3
## 3 2 3 2 1 3 1 2 1 1 4 3 1 1 1 4 3 4
## 4 2 3 2 2 3 1 2 1 3 2 1 8 1 1 4 2 3
## 5 2 1 1 1 2 3 2 1 3 1 1 1 1 1 3 3 4
## 6 2 4 5 2 2 1 2 3 2 1 8 1 2 2 3 4 3
## q171 q172 q173 SE2 SE3 SE5 SE5A SE6 SE9 q134 w
## 1 3 4 2 1 1970 3 3 1902 1 4 0.7497833
## 2 3 3 2 1 1988 3 3 1902 1 2 0.7497833
## 3 4 3 2 1 1968 7 5 50 1 5 0.4763111
## 4 3 2 3 1 1956 5 5 1902 1 5 1.2464163
## 5 3 3 2 1 1989 10 9 50 2 2 0.5145350
## 6 4 2 1 2 1969 3 3 50 2 1 1.3147015
We re-encode the data, where smaller numbers indicate higher preference for economic development, and larger numbers represent a stronger pursuit of democracy.
# Divided dataframe into China and non-China country (Data of China needs further manipulation)
df_china <- df %>% filter(country == 4) # 4941
df_singapore <- df %>% filter(country == 10) # 1002
df_not_china <- df %>% filter(country != 4 & country != 10) # 26024
# exclude columns of other countries
exclude_cols <- c("SE2", "SE3", "SE5", "SE6", "SE5A", "SE9","IDnumber", 'country', 'Year') # 排除的欄位
check_cols <- setdiff(names(df_not_china), exclude_cols) # 需要進行檢查的欄位
df_not_china <- subset(df_not_china, !rowSums(sapply(df_not_china[check_cols], function(x) x >= 7 | x < 0)) > 0) # 12364
# exclude columns of China (especially q7 and q16)
exclude_cols_china <- c("SE2", "SE3", "SE5", "SE5A", "SE6", "SE9","IDnumber", 'country', 'q7', 'q16', 'Year')
check_cols_china <- setdiff(names(df_china), exclude_cols_china)
df_china <- subset(df_china, !rowSums(sapply(df_china[check_cols_china], function(x) x >= 7 | x < 0)) > 0) # 1896
# exclude columns of Singapore (especially q16)
exclude_cols_singapore <- c("SE2", "SE3", "SE5", "SE5A", "SE6", "SE9","IDnumber", 'country', 'q16', 'Year')
check_cols_singapore <- setdiff(names(df_singapore), exclude_cols_singapore)
df_singapore <- subset(df_singapore, !rowSums(sapply(df_singapore[check_cols_singapore], function(x) x >= 7 | x < 0)) > 0) # 739
df <- rbind(df_china, df_not_china, df_singapore) # 14999
countries_to_drop <- c(2, 12, 15, 18, 19, 20)
df <- subset(df, !(country %in% countries_to_drop))
summary(df)
## country Year IDnumber q1
## Min. : 1.000 Min. :2018 Min. : 1 Min. :1.000
## 1st Qu.: 4.000 1st Qu.:2018 1st Qu.: 518 1st Qu.:2.000
## Median : 6.000 Median :2019 Median : 1035 Median :3.000
## Mean : 7.084 Mean :2019 Mean : 375181 Mean :2.905
## 3rd Qu.:10.000 3rd Qu.:2019 3rd Qu.: 3642 3rd Qu.:4.000
## Max. :14.000 Max. :2020 Max. :6169101 Max. :5.000
## q2 q3 q4 q5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :2.000 Median :3.000 Median :3.000
## Mean :2.675 Mean :2.456 Mean :2.795 Mean :2.653
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## q6 q7 q8 q9
## Min. :1.000 Min. :-1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.: 1.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median : 2.000 Median :3.000 Median :3.000
## Mean :2.338 Mean : 2.128 Mean :2.885 Mean :2.831
## 3rd Qu.:3.000 3rd Qu.: 3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. : 6.000 Max. :6.000 Max. :6.000
## q10 q11 q12 q13
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :2.000
## Mean :3.117 Mean :3.015 Mean :2.823 Mean :2.381
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## q14 q15 q16 q22
## Min. :1.000 Min. :1.000 Min. :-1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:-1.000 1st Qu.:1.000
## Median :3.000 Median :3.000 Median : 2.000 Median :2.000
## Mean :2.693 Mean :2.845 Mean : 1.951 Mean :1.684
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.: 3.000 3rd Qu.:2.000
## Max. :6.000 Max. :6.000 Max. : 6.000 Max. :3.000
## q46 q47 q58 q60
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :2.547 Mean :2.419 Mean :1.829 Mean :2.159
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :4.000 Max. :5.000 Max. :4.000 Max. :4.000
## q62 q64 q65 q67
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.574 Mean :2.484 Mean :1.942 Mean :2.164
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## q98 q132 q133 q136
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :3.000 Median :1.000 Median :1.000 Median :2.000
## Mean :2.504 Mean :1.528 Mean :1.219 Mean :1.915
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :4.000 Max. :3.000 Max. :2.000 Max. :4.000
## q166 q168 q169 q170 q171
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :1.000 Median :3.00 Median :3.000 Median :3.000 Median :3.000
## Mean :1.502 Mean :2.59 Mean :2.597 Mean :2.696 Mean :2.662
## 3rd Qu.:2.000 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.00 Max. :4.000 Max. :4.000 Max. :4.000
## q172 q173 SE2 SE3
## Min. :1.000 Min. :1.000 Min. :-1.000 Min. : -1
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.000 1st Qu.:1963
## Median :3.000 Median :3.000 Median : 1.000 Median :1977
## Mean :2.793 Mean :2.622 Mean : 1.495 Mean :1974
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.: 2.000 3rd Qu.:1989
## Max. :4.000 Max. :4.000 Max. : 2.000 Max. :2002
## SE5 SE5A SE6 SE9
## Min. : 1.000 Min. :-1.00 Min. : -1 Min. :-1.0000
## 1st Qu.: 5.000 1st Qu.: 9.00 1st Qu.: 40 1st Qu.: 1.0000
## Median : 7.000 Median :12.00 Median : 60 Median : 1.0000
## Mean : 6.579 Mean :13.07 Mean :3921 Mean : 0.8939
## 3rd Qu.: 8.000 3rd Qu.:14.00 3rd Qu.:9990 3rd Qu.: 2.0000
## Max. :99.000 Max. :99.00 Max. :9999 Max. : 9.0000
## q134 w
## Min. :1.000 Min. :0.3665
## 1st Qu.:1.000 1st Qu.:0.7927
## Median :2.000 Median :1.0000
## Mean :2.297 Mean :1.0099
## 3rd Qu.:3.000 3rd Qu.:1.1279
## Max. :5.000 Max. :4.7394
reverse_columns <- c('q7', 'q8', 'q9', 'q10', 'q11', 'q12', 'q13', 'q14', 'q15', 'q16')
# Define reverse function
reverse_factor_levels <- function(x) {
x <- as.factor(x)
levels(x) <- rev(levels(x))
x <- as.numeric(as.character(x))
return(x)
}
# Here we have to consider China & Singapore data again
df_china <- subset(df, country == 4)
df_singapore <- df %>% filter(country == 10)
df_not_china <- subset(df, country != 4 & country != 10) # 7592
summary(df_singapore)
## country Year IDnumber q1 q2
## Min. :10 Min. :2020 Min. : 60023 Min. :1.000 Min. :1.000
## 1st Qu.:10 1st Qu.:2020 1st Qu.:6020851 1st Qu.:2.000 1st Qu.:2.000
## Median :10 Median :2020 Median :6071001 Median :3.000 Median :2.000
## Mean :10 Mean :2020 Mean :5099176 Mean :2.926 Mean :2.614
## 3rd Qu.:10 3rd Qu.:2020 3rd Qu.:6115051 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :10 Max. :2020 Max. :6169101 Max. :5.000 Max. :5.000
## q3 q4 q5 q6
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.917 Mean :2.804 Mean :2.756 Mean :2.693
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## q7 q8 q9 q10
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.562 Mean :2.533 Mean :2.566 Mean :2.859
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## q11 q12 q13 q14
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.686 Mean :2.512 Mean :2.329 Mean :2.271
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## q15 q16 q22 q46 q47
## Min. :1.000 Min. :-1 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:2.000 1st Qu.:-1 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.00
## Median :3.000 Median :-1 Median :2.000 Median :2.000 Median :2.00
## Mean :2.644 Mean :-1 Mean :1.727 Mean :2.516 Mean :2.41
## 3rd Qu.:3.000 3rd Qu.:-1 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:3.00
## Max. :6.000 Max. :-1 Max. :2.000 Max. :4.000 Max. :5.00
## q58 q60 q62 q64
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :3.000 Median :3.000
## Mean :1.876 Mean :2.133 Mean :2.645 Mean :2.804
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## q65 q67 q98 q132
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :3.000 Median :1.000
## Mean :2.054 Mean :2.271 Mean :2.725 Mean :1.654
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :3.000
## q133 q136 q166 q168
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :1.000 Median :2.000 Median :1.000 Median :3.000
## Mean :1.291 Mean :1.881 Mean :1.482 Mean :2.804
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :2.000 Max. :4.000 Max. :4.000 Max. :4.000
## q169 q170 q171 q172
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.855 Mean :2.894 Mean :2.894 Mean :2.908
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
## q173 SE2 SE3 SE5
## Min. :1.000 Min. :1.000 Min. :1930 Min. : 1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1963 1st Qu.: 5.000
## Median :3.000 Median :1.000 Median :1976 Median : 7.000
## Mean :2.609 Mean :1.498 Mean :1975 Mean : 7.084
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1988 3rd Qu.: 9.000
## Max. :4.000 Max. :2.000 Max. :1999 Max. :10.000
## SE5A SE6 SE9 q134
## Min. : 2.00 Min. : 10 Min. :1.000 Min. :1.000
## 1st Qu.:10.00 1st Qu.: 40 1st Qu.:1.000 1st Qu.:1.000
## Median :14.00 Median : 50 Median :1.000 Median :2.000
## Mean :13.57 Mean :1672 Mean :1.321 Mean :2.069
## 3rd Qu.:16.00 3rd Qu.: 60 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :99.00 Max. :9990 Max. :2.000 Max. :5.000
## w
## Min. :0.4440
## 1st Qu.:0.9072
## Median :1.0443
## Mean :0.9999
## 3rd Qu.:1.1886
## Max. :1.7350
# 非中國的部分,所有需要反轉的欄位
df_not_china[reverse_columns] <- lapply(df_not_china[reverse_columns], reverse_factor_levels)
# 中國的部分,除了 'q7', 'q16' 以外的需要反轉的欄位
reverse_columns_china <- setdiff(reverse_columns, c('q7', 'q16'))
df_china$q13 <- factor(df_china$q13, levels = 1:6) # 確保 "q13" 欄位的 levels 為 1 到 6
df_china[reverse_columns_china] <- lapply(df_china[reverse_columns_china], reverse_factor_levels)
# 新加坡的部分,除了 q16' 以外的需要反轉的欄位
reverse_columns_singapore <- setdiff(reverse_columns, c('q16'))
df_china[reverse_columns_singapore] <- lapply(df_china[reverse_columns_singapore], reverse_factor_levels)
# 對非中國的部分計算 trust 和 trust_all
df_not_china <- df_not_china %>% mutate(trust = ((q7 + q9 + q10 + q11 + q12) / 5))
df_not_china <- df_not_china %>% mutate(trust_all = (q7 + q8 + q9 + q10 + q11 + q12 + q13 + q14 + q15 + q16) / 10)
# 對中國的部分計算 trust 和 trust_all,這裡我們將 'q7', 'q16' 排除在外
df_china <- df_china %>% mutate(trust = ((q9 + q10 + q11 + q12) / 4)) # 扣掉 q7
df_china <- df_china %>% mutate(trust_all = (q8 + q9 + q10 + q11 + q12 + q13 + q14 + q15) / 8) # 扣掉 q7 和 q16
# 對新加坡的部分計算 trust 和 trust_all,這裡我們將 'q16' 排除在外
df_singapore <- df_singapore %>% mutate(trust = ((q7 + q9 + q10 + q11 + q12) / 5))
df_singapore <- df_singapore %>% mutate(trust_all = (q7 + q8 + q9 + q10 + q11 + q12 + q13 + q14 + q15) / 9) # 扣掉 q16
df <- rbind(df_china, df_not_china, df_singapore) # 10227
df$q132 <- as.numeric(recode(df$q132, `1` = 3, `2` = 1, `3` = 2))
df$q133 <- 3 - df$q133
df$q136 <- ifelse(df$q136 %in% c(3, 4), 1, 2)
target_columns <- c('q168', 'q169', 'q170', 'q171', 'q172', 'q173')
df[target_columns] <- lapply(df[target_columns], function(x) {
x <- replace(x, x == 1 | x == 2, 1)
x <- replace(x, x == 3 | x == 4, 2)
x <- as.numeric(as.character(x))
x
})
df <- df %>% mutate(effectiveness = (q132 + q133 + q136 + q168 + q169 + q170 +
q171 + q172 + q173) / 9)
reverse_columns <- c('q1', 'q2', 'q3', 'q4', 'q5', 'q6')
df[reverse_columns] <- lapply(df[reverse_columns], reverse_factor_levels)
df <- df %>% mutate(eco_assess = (q1 + q2 + q3 + q4 + q5 + q6) / 6)
# DV is re-code with (1, 2) -> 0, (3, 4) -> 1
df$q134_without <- ifelse(df$q134 %in% c(1, 2), 0, ifelse(df$q134 %in% c(5), NA, 1))
# DV in df is re-code with (1, 2) -> 0, (3, 4, 5) -> 1
df$q134_with <- ifelse(df$q134 %in% c(1, 2), 0, 1)
summary(df)
## country Year IDnumber q1
## Min. : 1.000 Min. :2018 Min. : 1 Min. :1.000
## 1st Qu.: 4.000 1st Qu.:2018 1st Qu.: 518 1st Qu.:2.000
## Median : 6.000 Median :2019 Median : 1035 Median :3.000
## Mean : 7.084 Mean :2019 Mean : 375181 Mean :3.095
## 3rd Qu.:10.000 3rd Qu.:2019 3rd Qu.: 3642 3rd Qu.:4.000
## Max. :14.000 Max. :2020 Max. :6169101 Max. :5.000
##
## q2 q3 q4 q5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :4.000 Median :3.000 Median :3.000
## Mean :3.325 Mean :3.544 Mean :3.205 Mean :3.347
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
##
## q6 q7 q8 q9
## Min. :1.000 Min. :-1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.: 2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :4.000 Median : 4.000 Median :3.000 Median :3.000
## Mean :3.662 Mean : 3.068 Mean :3.416 Mean :3.335
## 3rd Qu.:4.000 3rd Qu.: 5.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. : 6.000 Max. :6.000 Max. :6.000
##
## q10 q11 q12 q13
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :4.000 Median :4.000
## Mean :3.064 Mean :3.184 Mean :3.694 Mean :3.927
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
##
## q14 q15 q16 q22
## Min. :1.000 Min. :1.000 Min. :-1.00 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:-1.00 1st Qu.:0.0000
## Median :4.000 Median :4.000 Median : 4.00 Median :0.0000
## Mean :3.595 Mean :3.669 Mean : 2.73 Mean :0.3248
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.: 5.00 3rd Qu.:1.0000
## Max. :6.000 Max. :6.000 Max. : 6.00 Max. :1.0000
##
## q46 q47 q58 q60
## Min. :0.000 Min. :0.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median :1.000 Median :2.000 Median :2.000
## Mean :1.453 Mean :1.419 Mean :1.829 Mean :2.159
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :3.000 Max. :4.000 Max. :4.000 Max. :4.000
##
## q62 q64 q65 q67
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.574 Mean :2.484 Mean :1.942 Mean :2.164
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000 Max. :4.000 Max. :4.000
##
## q98 q132 q133 q136
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.504 Mean :2.419 Mean :1.781 Mean :1.892
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :3.000 Max. :2.000 Max. :2.000
##
## q166 q168 q169 q170 q171
## Min. :0.000 Min. :1.00 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.00 1st Qu.:1.000
## Median :3.000 Median :2.00 Median :2.000 Median :2.00 Median :2.000
## Mean :2.498 Mean :1.55 Mean :1.553 Mean :1.63 Mean :1.603
## 3rd Qu.:3.000 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:2.000
## Max. :3.000 Max. :2.00 Max. :2.000 Max. :2.00 Max. :2.000
##
## q172 q173 SE2 SE3 SE5
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. : -1 7 :3211
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1963 9 :2287
## Median :2.000 Median :2.000 Median :1.0000 Median :1977 5 :1213
## Mean :1.678 Mean :1.587 Mean :0.5041 Mean :1974 3 : 982
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:1989 8 : 772
## Max. :2.000 Max. :2.000 Max. :1.0000 Max. :2002 6 : 682
## (Other):1080
## SE5A SE6 SE9 q134
## Min. :-1.00 9990 :3280 Min. :0.000 Min. :1.000
## 1st Qu.: 9.00 60 :2483 1st Qu.:0.000 1st Qu.:1.000
## Median :12.00 40 :1476 Median :1.000 Median :2.000
## Mean :13.07 10 : 985 Mean :0.739 Mean :2.297
## 3rd Qu.:14.00 9999 : 707 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :99.00 20 : 497 Max. :1.000 Max. :5.000
## (Other): 799
## w trust trust_all effectiveness
## Min. :0.3665 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:0.7927 1st Qu.:2.400 1st Qu.:2.600 1st Qu.:1.556
## Median :1.0000 Median :3.400 Median :3.600 Median :1.778
## Mean :1.0099 Mean :3.375 Mean :3.508 Mean :1.744
## 3rd Qu.:1.1279 3rd Qu.:4.400 3rd Qu.:4.400 3rd Qu.:2.000
## Max. :4.7394 Max. :6.000 Max. :6.000 Max. :2.111
##
## eco_assess social_value age q134_without
## Min. :1.000 Min. :1.000 Min. : 18.00 Min. :0.0000
## 1st Qu.:2.833 1st Qu.:1.833 1st Qu.: 31.00 1st Qu.:0.0000
## Median :3.500 Median :2.167 Median : 43.00 Median :0.0000
## Mean :3.363 Mean :2.192 Mean : 45.73 Mean :0.2724
## 3rd Qu.:3.833 3rd Qu.:2.500 3rd Qu.: 57.00 3rd Qu.:1.0000
## Max. :5.000 Max. :4.000 Max. :2020.00 Max. :1.0000
## NA's :1072
## q134_with
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3487
## 3rd Qu.:1.0000
## Max. :1.0000
##
# write.csv(df, "w5.csv", row.names = FALSE)
# save(df, file = "w5.rda")
logit.fit1 <- glm(q134_with ~ trust + effectiveness + eco_assess + social_value
+ q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 + SE2 + SE3,
data = df, family = binomial())
stargazer(logit.fit1, type = 'text',title = "Logistic Regression Results",
notes = 'Baseline model', style = 'default')
##
## Logistic Regression Results
## =============================================
## Dependent variable:
## ---------------------------
## q134_with
## ---------------------------------------------
## trust 0.045**
## (0.023)
##
## effectiveness 1.768***
## (0.087)
##
## eco_assess 0.205***
## (0.036)
##
## social_value 0.164***
## (0.052)
##
## q46 0.150***
## (0.031)
##
## q47 0.013
## (0.019)
##
## q166 0.040
## (0.039)
##
## q22 0.125**
## (0.049)
##
## SE9 -0.040
## (0.055)
##
## SE52 -0.029
## (0.198)
##
## SE53 -0.240
## (0.177)
##
## SE54 -0.368*
## (0.216)
##
## SE55 -0.245
## (0.174)
##
## SE56 0.0004
## (0.184)
##
## SE57 -0.281*
## (0.168)
##
## SE58 -0.183
## (0.183)
##
## SE59 -0.302*
## (0.170)
##
## SE510 0.083
## (0.214)
##
## SE599 -0.449
## (0.943)
##
## SE60 3.503**
## (1.677)
##
## SE61 0.640
## (1.137)
##
## SE610 1.099
## (1.129)
##
## SE620 1.097
## (1.131)
##
## SE628 13.693
## (187.038)
##
## SE630 -10.246
## (208.979)
##
## SE640 0.611
## (1.128)
##
## SE641 1.216
## (1.809)
##
## SE642 2.365*
## (1.372)
##
## SE650 0.874
## (1.147)
##
## SE660 1.392
## (1.127)
##
## SE661 2.244
## (1.497)
##
## SE670 -0.093
## (1.567)
##
## SE671 0.126
## (1.638)
##
## SE672 0.731
## (1.217)
##
## SE673 1.034
## (1.405)
##
## SE674 -0.214
## (1.244)
##
## SE675 1.599
## (1.189)
##
## SE676 0.361
## (1.149)
##
## SE677 -0.389
## (1.548)
##
## SE680 1.499
## (1.160)
##
## SE6201 1.628
## (1.839)
##
## SE6202 3.253**
## (1.596)
##
## SE69990 1.474
## (1.127)
##
## SE69999 1.976*
## (1.130)
##
## SE2 0.107**
## (0.045)
##
## SE3 0.001
## (0.001)
##
## Constant -7.993***
## (1.630)
##
## ---------------------------------------------
## Observations 10,227
## Log Likelihood -6,062.432
## Akaike Inf. Crit. 12,218.860
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Baseline model
logit.fit2 <- glm(q134_with ~ trust_all + effectiveness + eco_assess + social_value
+ q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 + SE2 + SE3,
data = df, family = binomial())
stargazer(logit.fit2, type = "text", title = "Logistic Regression Results",
notes = 'All trust model')
##
## Logistic Regression Results
## =============================================
## Dependent variable:
## ---------------------------
## q134_with
## ---------------------------------------------
## trust_all 0.018
## (0.024)
##
## effectiveness 1.766***
## (0.087)
##
## eco_assess 0.204***
## (0.036)
##
## social_value 0.154***
## (0.052)
##
## q46 0.152***
## (0.031)
##
## q47 0.014
## (0.019)
##
## q166 0.040
## (0.039)
##
## q22 0.122**
## (0.049)
##
## SE9 -0.046
## (0.055)
##
## SE52 -0.027
## (0.198)
##
## SE53 -0.232
## (0.177)
##
## SE54 -0.372*
## (0.216)
##
## SE55 -0.247
## (0.175)
##
## SE56 0.013
## (0.184)
##
## SE57 -0.280*
## (0.168)
##
## SE58 -0.178
## (0.183)
##
## SE59 -0.302*
## (0.170)
##
## SE510 0.080
## (0.214)
##
## SE599 -0.457
## (0.943)
##
## SE60 3.465**
## (1.672)
##
## SE61 0.613
## (1.137)
##
## SE610 1.089
## (1.129)
##
## SE620 1.068
## (1.131)
##
## SE628 13.629
## (187.086)
##
## SE630 -10.289
## (209.567)
##
## SE640 0.599
## (1.128)
##
## SE641 1.203
## (1.809)
##
## SE642 2.329*
## (1.373)
##
## SE650 0.851
## (1.147)
##
## SE660 1.374
## (1.127)
##
## SE661 2.187
## (1.497)
##
## SE670 -0.085
## (1.565)
##
## SE671 0.095
## (1.637)
##
## SE672 0.722
## (1.217)
##
## SE673 1.025
## (1.404)
##
## SE674 -0.215
## (1.244)
##
## SE675 1.568
## (1.189)
##
## SE676 0.323
## (1.149)
##
## SE677 -0.416
## (1.546)
##
## SE680 1.469
## (1.160)
##
## SE6201 1.623
## (1.841)
##
## SE6202 3.243**
## (1.596)
##
## SE69990 1.424
## (1.127)
##
## SE69999 1.979*
## (1.129)
##
## SE2 0.109**
## (0.045)
##
## SE3 0.001
## (0.001)
##
## Constant -7.891***
## (1.639)
##
## ---------------------------------------------
## Observations 10,227
## Log Likelihood -6,064.110
## Akaike Inf. Crit. 12,222.220
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## All trust model
logit.fit3 <- glm(q134_without ~ trust_all + effectiveness + eco_assess + social_value
+ q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 + SE2 + SE3,
data = df, family = binomial())
summary(logit.fit3)
##
## Call:
## glm(formula = q134_without ~ trust_all + effectiveness + eco_assess +
## social_value + q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 +
## SE2 + SE3, family = binomial(), data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.662264 3.594182 -3.801 0.000144 ***
## trust_all 0.012160 0.026825 0.453 0.650342
## effectiveness 1.475472 0.095776 15.406 < 2e-16 ***
## eco_assess 0.218218 0.041352 5.277 1.31e-07 ***
## social_value 0.148015 0.057776 2.562 0.010411 *
## q46 0.236304 0.034483 6.853 7.25e-12 ***
## q47 0.012848 0.021588 0.595 0.551743
## q166 0.041720 0.042881 0.973 0.330594
## q22 0.143231 0.054138 2.646 0.008153 **
## SE9 -0.110830 0.060503 -1.832 0.066980 .
## SE52 0.285426 0.231272 1.234 0.217145
## SE53 0.050138 0.211996 0.237 0.813041
## SE54 -0.285038 0.259083 -1.100 0.271254
## SE55 -0.054988 0.211513 -0.260 0.794884
## SE56 0.048442 0.222560 0.218 0.827695
## SE57 -0.070516 0.205468 -0.343 0.731452
## SE58 0.071693 0.220774 0.325 0.745383
## SE59 -0.077400 0.208735 -0.371 0.710781
## SE510 0.448229 0.247548 1.811 0.070192 .
## SE599 -0.441058 1.180535 -0.374 0.708696
## SE60 3.062748 1.821625 1.681 0.092699 .
## SE61 0.514317 1.137266 0.452 0.651096
## SE610 0.813305 1.129399 0.720 0.471450
## SE620 0.741331 1.131445 0.655 0.512335
## SE628 13.694479 229.552074 0.060 0.952429
## SE630 -10.390412 210.713374 -0.049 0.960672
## SE640 0.313394 1.128782 0.278 0.781289
## SE641 1.155824 1.809588 0.639 0.523003
## SE642 2.282241 1.374861 1.660 0.096919 .
## SE650 0.658163 1.149417 0.573 0.566912
## SE660 1.038468 1.127155 0.921 0.356885
## SE661 2.224971 1.492449 1.491 0.136009
## SE670 -0.169023 1.571605 -0.108 0.914354
## SE671 -11.735924 181.686054 -0.065 0.948497
## SE672 0.602499 1.215956 0.495 0.620251
## SE673 0.949015 1.401863 0.677 0.498427
## SE674 -0.362842 1.243951 -0.292 0.770527
## SE675 1.518661 1.188123 1.278 0.201178
## SE676 0.132123 1.152167 0.115 0.908704
## SE677 -0.434975 1.544738 -0.282 0.778262
## SE680 1.069311 1.167952 0.916 0.359906
## SE6201 1.676391 1.813473 0.924 0.355274
## SE6202 2.973331 1.630159 1.824 0.068159 .
## SE69990 0.972345 1.127308 0.863 0.388392
## SE69999 1.352663 1.130335 1.197 0.231426
## SE2 0.135221 0.050359 2.685 0.007249 **
## SE3 0.003857 0.001765 2.185 0.028860 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10723 on 9154 degrees of freedom
## Residual deviance: 10049 on 9108 degrees of freedom
## (1072 observations deleted due to missingness)
## AIC: 10143
##
## Number of Fisher Scoring iterations: 11
df_taiwan <- df %>% filter(country == 7)
logit.taiwan <- glm(q134_with ~ trust + effectiveness + eco_assess + social_value
+ q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 + SE2 + SE3,
data = df_taiwan, family = binomial())
summary(logit.taiwan)
##
## Call:
## glm(formula = q134_with ~ trust + effectiveness + eco_assess +
## social_value + q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 +
## SE2 + SE3, family = binomial(), data = df_taiwan)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -31.798454 15.976243 -1.990 0.0466 *
## trust 0.370776 0.158467 2.340 0.0193 *
## effectiveness 2.120331 0.395044 5.367 7.99e-08 ***
## eco_assess 0.293851 0.212906 1.380 0.1675
## social_value 0.770880 0.305622 2.522 0.0117 *
## q46 0.232900 0.147235 1.582 0.1137
## q47 -0.034521 0.084494 -0.409 0.6829
## q166 0.262436 0.177919 1.475 0.1402
## q22 -0.067578 0.209341 -0.323 0.7468
## SE9 -0.313529 0.228573 -1.372 0.1702
## SE52 -0.958445 1.287507 -0.744 0.4566
## SE53 -2.795773 1.117503 -2.502 0.0124 *
## SE54 -1.449779 1.563995 -0.927 0.3539
## SE55 -2.288218 1.058910 -2.161 0.0307 *
## SE56 -1.768343 1.155480 -1.530 0.1259
## SE57 -2.244534 1.008175 -2.226 0.0260 *
## SE58 -2.191096 1.021913 -2.144 0.0320 *
## SE59 -1.980338 1.015477 -1.950 0.0512 .
## SE510 -1.491070 1.037001 -1.438 0.1505
## SE610 0.812492 0.894323 0.908 0.3636
## SE620 0.456626 0.383760 1.190 0.2341
## SE630 -11.262063 882.743596 -0.013 0.9898
## SE660 0.017816 0.287265 0.062 0.9505
## SE661 3.120275 1.415847 2.204 0.0275 *
## SE676 -0.113988 0.343637 -0.332 0.7401
## SE677 -0.882852 1.191394 -0.741 0.4587
## SE680 0.341008 0.972133 0.351 0.7258
## SE69990 0.185350 0.286278 0.647 0.5173
## SE69999 14.992481 882.896185 0.017 0.9865
## SE2 0.298034 0.211084 1.412 0.1580
## SE3 0.012165 0.008307 1.464 0.1431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 782.12 on 802 degrees of freedom
## Residual deviance: 647.62 on 772 degrees of freedom
## AIC: 709.62
##
## Number of Fisher Scoring iterations: 13
df_japan <- df %>% filter(country == 1)
logit.japan <- glm(q134_with ~ trust + effectiveness + eco_assess + social_value
+ q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 + SE2 + SE3,
data = df_japan, family = binomial())
summary(logit.japan)
##
## Call:
## glm(formula = q134_with ~ trust + effectiveness + eco_assess +
## social_value + q46 + q47 + q166 + q22 + SE9 + SE5 + SE6 +
## SE2 + SE3, family = binomial(), data = df_japan)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.501e+00 9.724e+02 0.004 0.9971
## trust -2.481e-01 1.416e-01 -1.752 0.0798 .
## effectiveness 2.854e+00 4.689e-01 6.088 1.14e-09 ***
## eco_assess -1.179e-01 2.081e-01 -0.566 0.5712
## social_value -8.265e-02 2.651e-01 -0.312 0.7552
## q46 3.210e-01 1.520e-01 2.112 0.0347 *
## q47 3.366e-02 1.069e-01 0.315 0.7528
## q166 -3.069e-01 1.574e-01 -1.950 0.0512 .
## q22 3.153e-01 1.960e-01 1.608 0.1077
## SE9 1.569e-01 2.329e-01 0.674 0.5005
## SE53 -1.473e+01 9.723e+02 -0.015 0.9879
## SE56 -1.472e+01 9.723e+02 -0.015 0.9879
## SE57 -1.496e+01 9.723e+02 -0.015 0.9877
## SE58 -1.448e+01 9.723e+02 -0.015 0.9881
## SE59 -1.452e+01 9.723e+02 -0.015 0.9881
## SE510 -1.444e+01 9.723e+02 -0.015 0.9881
## SE610 1.948e+00 1.509e+00 1.291 0.1966
## SE620 9.587e-01 2.189e+00 0.438 0.6614
## SE630 -1.452e+01 1.455e+03 -0.010 0.9920
## SE640 -1.440e+01 1.455e+03 -0.010 0.9921
## SE660 1.773e+00 1.156e+00 1.534 0.1250
## SE675 1.467e+00 1.247e+00 1.176 0.2395
## SE680 1.021e+00 1.283e+00 0.796 0.4261
## SE69990 1.724e+00 1.153e+00 1.495 0.1349
## SE2 -1.413e-02 2.049e-01 -0.069 0.9450
## SE3 2.893e-03 7.940e-03 0.364 0.7156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 768.70 on 557 degrees of freedom
## Residual deviance: 676.66 on 532 degrees of freedom
## AIC: 728.66
##
## Number of Fisher Scoring iterations: 14