library(tidyverse)
library(jsonlite)
library(corrplot)
j <- fromJSON("https://api.github.com/repos/g0v/referendum_report/contents/results")
dfs <- j$download_url %>% lapply(read_csv)
df <- dfs %>% bind_rows()
clean_df <-
    df %>%
    filter(!is.na(鄉鎮市區)) %>%
    select(
        case =  案件,
        city =  縣市,
        dist =  鄉鎮市區,
        agree=  同意票數,
        votes=  投票數
    ) %>%
    mutate(case=str_remove(case, "第") %>% str_remove("案") %>% as.numeric())

clean_df %>%
    filter(dist == "南竿鄉") 
## # A tibble: 10 x 5
##     case city   dist   agree votes
##    <dbl> <chr>  <chr>  <dbl> <dbl>
##  1    10 連江縣 南竿鄉  2485  3277
##  2    11 連江縣 南竿鄉  2385  3285
##  3    12 連江縣 南竿鄉  2094  3283
##  4    13 連江縣 南竿鄉   658  3288
##  5    14 連江縣 南竿鄉   790  3288
##  6    15 連江縣 南竿鄉   849  3289
##  7    16 連江縣 南竿鄉  2201  3277
##  8     7 連江縣 南竿鄉  2773  3273
##  9     8 連江縣 南竿鄉  2708  3272
## 10     9 連江縣 南竿鄉  2746  3274
agree_df <-
    clean_df %>%
    mutate(agree_rate = agree/votes) %>%
    select(-agree, -votes) %>%
    group_by(city, dist) %>%
    do(spread(., case, agree_rate)) %>%
    bind_rows() %>%
    ungroup()
agree_df
## # A tibble: 368 x 12
##    city  dist    `7`   `8`   `9`  `10`  `11`  `12`  `13`  `14`  `15`  `16`
##    <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 高雄市… 阿蓮區… 0.730 0.690 0.704 0.723 0.660 0.580 0.484 0.297 0.328 0.527
##  2 高雄市… 大寮區… 0.738 0.701 0.714 0.729 0.672 0.616 0.479 0.299 0.299 0.540
##  3 高雄市… 大社區… 0.694 0.671 0.689 0.703 0.632 0.578 0.499 0.319 0.332 0.528
##  4 高雄市… 大樹區… 0.734 0.691 0.699 0.736 0.667 0.589 0.509 0.285 0.299 0.545
##  5 高雄市… 鳳山區… 0.743 0.713 0.735 0.716 0.671 0.617 0.444 0.309 0.312 0.564
##  6 高雄市… 岡山區… 0.752 0.718 0.731 0.731 0.679 0.621 0.443 0.291 0.294 0.573
##  7 高雄市… 鼓山區… 0.739 0.716 0.728 0.696 0.668 0.615 0.450 0.323 0.326 0.564
##  8 高雄市… 湖內區… 0.729 0.686 0.682 0.726 0.660 0.594 0.497 0.298 0.298 0.538
##  9 高雄市… 茄萣區… 0.682 0.656 0.662 0.722 0.665 0.596 0.501 0.288 0.318 0.501
## 10 高雄市… 甲仙區… 0.729 0.688 0.680 0.720 0.634 0.573 0.416 0.254 0.263 0.509
## # ... with 358 more rows
agree_df %>% select(-city, -dist) %>% cor %>% corrplot.mixed()

pca <- agree_df %>% select(-city, -dist) %>% princomp(cor=T)

plot(pca)

summary(pca)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4
## Standard deviation     2.1835831 1.8414376 0.92652600 0.62470596
## Proportion of Variance 0.4768035 0.3390892 0.08584504 0.03902575
## Cumulative Proportion  0.4768035 0.8158927 0.90173777 0.94076352
##                            Comp.5     Comp.6     Comp.7      Comp.8
## Standard deviation     0.49677522 0.33405497 0.27511725 0.254920205
## Proportion of Variance 0.02467856 0.01115927 0.00756895 0.006498431
## Cumulative Proportion  0.96544208 0.97660136 0.98417031 0.990668738
##                             Comp.9     Comp.10
## Standard deviation     0.226688490 0.204755837
## Proportion of Variance 0.005138767 0.004192495
## Cumulative Proportion  0.995807505 1.000000000
pca$loadings
## 
## Loadings:
##    Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 7   0.328  0.334  0.119  0.281  0.276         0.744  0.182  0.137        
## 8   0.350  0.326                0.237        -0.201 -0.297 -0.392 -0.651 
## 9   0.268  0.416         0.147  0.254 -0.338 -0.545  0.194         0.463 
## 10  0.300 -0.371         0.381  0.118  0.411        -0.415 -0.294  0.425 
## 11  0.383 -0.231 -0.288  0.137         0.225 -0.216         0.710 -0.302 
## 12  0.274 -0.108 -0.814 -0.224        -0.255  0.183        -0.305        
## 13 -0.376        -0.264  0.811        -0.196         0.125 -0.130 -0.238 
## 14 -0.251  0.402 -0.306                0.742 -0.116  0.294 -0.113        
## 15 -0.290  0.382 -0.260                             -0.748  0.329  0.129 
## 16  0.313  0.313         0.150 -0.879                                    
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings       1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0
## Proportion Var    0.1    0.1    0.1    0.1    0.1    0.1    0.1    0.1
## Cumulative Var    0.1    0.2    0.3    0.4    0.5    0.6    0.7    0.8
##                Comp.9 Comp.10
## SS loadings       1.0     1.0
## Proportion Var    0.1     0.1
## Cumulative Var    0.9     1.0
biplot(pca, choices = 1:2)