library(tidyverse)
library(jsonlite)
library(corrplot)
j <- fromJSON("https://api.github.com/repos/g0v/referendum_report/contents/results")
dfs <- j$download_url %>% lapply(read_csv)
df <- dfs %>% bind_rows()
clean_df <-
df %>%
filter(!is.na(鄉鎮市區)) %>%
select(
case = 案件,
city = 縣市,
dist = 鄉鎮市區,
agree= 同意票數,
votes= 投票數
) %>%
mutate(case=str_remove(case, "第") %>% str_remove("案") %>% as.numeric())
clean_df %>%
filter(dist == "南竿鄉")
## # A tibble: 10 x 5
## case city dist agree votes
## <dbl> <chr> <chr> <dbl> <dbl>
## 1 10 連江縣 南竿鄉 2485 3277
## 2 11 連江縣 南竿鄉 2385 3285
## 3 12 連江縣 南竿鄉 2094 3283
## 4 13 連江縣 南竿鄉 658 3288
## 5 14 連江縣 南竿鄉 790 3288
## 6 15 連江縣 南竿鄉 849 3289
## 7 16 連江縣 南竿鄉 2201 3277
## 8 7 連江縣 南竿鄉 2773 3273
## 9 8 連江縣 南竿鄉 2708 3272
## 10 9 連江縣 南竿鄉 2746 3274
agree_df <-
clean_df %>%
mutate(agree_rate = agree/votes) %>%
select(-agree, -votes) %>%
group_by(city, dist) %>%
do(spread(., case, agree_rate)) %>%
bind_rows() %>%
ungroup()
agree_df
## # A tibble: 368 x 12
## city dist `7` `8` `9` `10` `11` `12` `13` `14` `15` `16`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 高雄市… 阿蓮區… 0.730 0.690 0.704 0.723 0.660 0.580 0.484 0.297 0.328 0.527
## 2 高雄市… 大寮區… 0.738 0.701 0.714 0.729 0.672 0.616 0.479 0.299 0.299 0.540
## 3 高雄市… 大社區… 0.694 0.671 0.689 0.703 0.632 0.578 0.499 0.319 0.332 0.528
## 4 高雄市… 大樹區… 0.734 0.691 0.699 0.736 0.667 0.589 0.509 0.285 0.299 0.545
## 5 高雄市… 鳳山區… 0.743 0.713 0.735 0.716 0.671 0.617 0.444 0.309 0.312 0.564
## 6 高雄市… 岡山區… 0.752 0.718 0.731 0.731 0.679 0.621 0.443 0.291 0.294 0.573
## 7 高雄市… 鼓山區… 0.739 0.716 0.728 0.696 0.668 0.615 0.450 0.323 0.326 0.564
## 8 高雄市… 湖內區… 0.729 0.686 0.682 0.726 0.660 0.594 0.497 0.298 0.298 0.538
## 9 高雄市… 茄萣區… 0.682 0.656 0.662 0.722 0.665 0.596 0.501 0.288 0.318 0.501
## 10 高雄市… 甲仙區… 0.729 0.688 0.680 0.720 0.634 0.573 0.416 0.254 0.263 0.509
## # ... with 358 more rows
agree_df %>% select(-city, -dist) %>% cor %>% corrplot.mixed()

pca <- agree_df %>% select(-city, -dist) %>% princomp(cor=T)
plot(pca)

summary(pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 2.1835831 1.8414376 0.92652600 0.62470596
## Proportion of Variance 0.4768035 0.3390892 0.08584504 0.03902575
## Cumulative Proportion 0.4768035 0.8158927 0.90173777 0.94076352
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.49677522 0.33405497 0.27511725 0.254920205
## Proportion of Variance 0.02467856 0.01115927 0.00756895 0.006498431
## Cumulative Proportion 0.96544208 0.97660136 0.98417031 0.990668738
## Comp.9 Comp.10
## Standard deviation 0.226688490 0.204755837
## Proportion of Variance 0.005138767 0.004192495
## Cumulative Proportion 0.995807505 1.000000000
pca$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 7 0.328 0.334 0.119 0.281 0.276 0.744 0.182 0.137
## 8 0.350 0.326 0.237 -0.201 -0.297 -0.392 -0.651
## 9 0.268 0.416 0.147 0.254 -0.338 -0.545 0.194 0.463
## 10 0.300 -0.371 0.381 0.118 0.411 -0.415 -0.294 0.425
## 11 0.383 -0.231 -0.288 0.137 0.225 -0.216 0.710 -0.302
## 12 0.274 -0.108 -0.814 -0.224 -0.255 0.183 -0.305
## 13 -0.376 -0.264 0.811 -0.196 0.125 -0.130 -0.238
## 14 -0.251 0.402 -0.306 0.742 -0.116 0.294 -0.113
## 15 -0.290 0.382 -0.260 -0.748 0.329 0.129
## 16 0.313 0.313 0.150 -0.879
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
## Proportion Var 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
## Cumulative Var 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
## Comp.9 Comp.10
## SS loadings 1.0 1.0
## Proportion Var 0.1 0.1
## Cumulative Var 0.9 1.0
biplot(pca, choices = 1:2)
