library(slidify)
create_deck(“Topic03”, git = TRUE)


R 統計軟體初階課程


Topic 3: 相關性檢定

  1. 相關係數分析
  2. 交叉表分析
  3. 變數編碼
  4. 製作量尺或指標
  5. 把結果做成HTML

範例檔:


1. 相關係數分析

用於當X與Y都是連續型變數
load("wgcoll.rda")
cor(wgc$aa, wgc$pe)
## [1] 0.7931

2. 交叉表分析(CrossTabulate Analysis)

用於當X與Y都是類別型變數
install.packages("gmodels")
我們要使用這個套件中的CrossTable()指令
library(gmodels)

3. 描述資料檔和變數重新編碼

load("teds2006_kao.rda")
attach(kao06)

變數編碼與清理無效值

library(car)
## Loading required package: MASS
## Loading required package: nnet
回應變數:投票參與
kao06$turnout <- NA
kao06$turnout[H01 == 1] <- 1  #1有;2沒有
kao06$turnout[H01 == 2] <- 0
# 也可以寫為:kao06$campID <- recode(H01, '2=0; c(91, 95)=NA',
# as.factor.result=TRUE)

table(H01)
## H01
##    1    2   91   95 
## 1062  194    2    4
table(kao06$turnout)
## 
##    0    1 
##  194 1062

解釋變數:藍綠陣營傾向
table(L01)
## L01
##   1   2   3   4   5  95  96  97  98 
##  69 231 554 238  77  19  19  14  41

# 綠1;藍2;中間3
kao06$campID <- recode(L01, "2=1; 4=2; 5=2; 95:98=NA", as.factor.result = TRUE)
table(kao06$campID)
## 
##   1   2   3 
## 300 315 554

描述變數的次數分配

CrossTable(campID)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1169 
## 
##  
##           |         1 |         2 |         3 | 
##           |-----------|-----------|-----------|
##           |       300 |       315 |       554 | 
##           |     0.257 |     0.269 |     0.474 | 
##           |-----------|-----------|-----------|

CrossTable(turnout)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1256 
## 
##  
##           |         0 |         1 | 
##           |-----------|-----------|
##           |       194 |      1062 | 
##           |     0.154 |     0.846 | 
##           |-----------|-----------|

分析兩個變數之間是否存在統計上的關聯

研究問題:那一陣營的支持者比較積極參與投票?

- H1 (alternativehypothesis):政黨傾向與投票參與兩者有關

- 請上機實際操作:

CrossTable(campID, turnout)
CrossTable(campID, turnout, prop.r = TRUE, prop.t = FALSE, prop.c = TRUE, 
    prop.chisq = FALSE, chisq = TRUE)

那麼,性別呢?

CrossTable(gender,turnout,prop.r=TRUE,prop.t=FALSE,prop.c=TRUE,prop.chisq=FALSE,chisq=TRUE)

年齡呢?

CrossTable(AGE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1262 
## 
##  
##           |         1 |         2 |         3 |         4 |         5 | 
##           |-----------|-----------|-----------|-----------|-----------|
##           |       229 |       194 |       294 |       296 |       249 | 
##           |     0.181 |     0.154 |     0.233 |     0.235 |     0.197 | 
##           |-----------|-----------|-----------|-----------|-----------|
CrossTable(AGE, turnout, prop.r = TRUE, prop.t = FALSE, prop.c = TRUE, 
    prop.chisq = FALSE, chisq = TRUE)
detach(kao06)
save(kao06, file = "teds2006_kao.rda", compress = TRUE)  # 重新儲存資料檔,把新增的變數打包帶走

回家作業:

請問以上的發現是否適用於台北?


4. 製作量尺(scale)或指標(index)


量尺的製作方式

對媒體新聞的注意力指標
load("teds2006_kao.rda")
attach(kao06)
## The following object(s) are masked from 'kao06 (position 6)':
## 
##     A01, A02, A03, A04, A05, A06, A07, AGE, B01, B02, B03, B06_1,
##     B06_10, B06_2, B06_3, B06_4, B06_5, B06_6, B06_7, B06_8,
##     B06_9, B07, B08, B1A_1, B1A_2, B1A_3, B1A_4, B1A_5, B1A_6,
##     B1A_7, B1A_8, B1B, B1C, B2A, B2B, B2C, B4A, B4B_1, B4B_2,
##     B4B_3, B4B_4, B4B_5, B4B_6, B4B_7, B4B_8, B4B_9, B4C, B4D_1,
##     B4D_2, B4D_3, B4D_4, B4D_5, B4D_6, B4D_7, B4D_8, B4D_9, B5A,
##     B5B_1, B5B_2, B5B_3, B5C_1, B5C_2, B5C_3, B5C_4, B5C_5, B5C_6,
##     B5C_7, B5C_8, B5C_9, C01, C02, C03, campID, CITY, D1A, D1B,
##     D1C, D1D, D1E, D2A, D2B, D2C, D2D, D2E, DISTRICT, E1A, E1B,
##     EDU, F01, F02, F03, F04, G01, G02, G03, G04, H01, H02, H03,
##     H04, H05, H06, H07, H08, H1A, H1B, H1C, H1D, H1E, H1F, H1G,
##     H1H, H1I, H3A, H3B, H3C, H3D, H7A, H7B, H8A, I01, I02, I03,
##     I04, I05, I06, I07, I08, I09, I10_1, I10_2, I10_3, I10_4,
##     I10_5, I10_6, I1A, ID, IN_NUM, IN_SEX, J01, J02, J03, J1A,
##     K01, L01, L02, L05, L06, L08, L09, L2A, L2B, L2C, L3A, L3B,
##     L3C, L3D, L3E, L4A, L4B, L4C, L4D, L4E, L7A, L7B, L7C, L7D,
##     L7E, M01, M02, M03, M04, M05, M06, M07, M08, mediaAtt, N01,
##     N02, N03, N04, N05, N06, N07, N08, N09, N10, N11, N12, N12A,
##     N13, N14, N15, N16, N17, N18, N7A, N9A, N9B, P01_1, P01_2,
##     P01_3, P01_4, P01_5, P01_6, P01_7, P01_8, P01_9, P03, P04,
##     P05, P06, P07, P08, P09_1, P09_2, P09_3, P09_4, P09_5, P2A,
##     P2B, P2C, P2D, P2E, P2F, P2G, P9A, radio2, REGION, SEX,
##     SU_NUM, TAU, turnout, V01, V02, V03, V04, V05, V06, V07, V08,
##     V09, V159_1, V159_2, V159_3, V159_4, V159_5, V159_6, V159_7,
##     V159_8, V159_9, V161_1, V161_2, V161_3, V161_4, V161_5,
##     V161_6, V161_7, V161_8, V161_9, V163_1, V163_2, V163_3,
##     V163_4, V163_5, V163_6, V163_7, V163_8, V163_9, VILLAGE, W

tv <- A01
tv[tv == 7] <- 0
tv[tv > 7] <- NA

radio <- NA
radio[A02 == 1] <- 1
radio[A02 == 2] <- 2
radio[A02 == 3] <- 3
radio[A02 == 4] <- 4
radio[A02 == 5] <- 5
radio[A02 == 6] <- 6
radio[A02 == 7] <- 0

internet <- A03
internet[internet == 7] <- 0
internet[internet > 7] <- NA

newspaper <- A04
newspaper[newspaper == 7] <- 0
newspaper[newspaper > 7] <- NA

tmp <- cbind(tv, radio, internet, newspaper)  # column bind

kao06$mediaAtt <- apply(tmp, 1, sum)  #新變數掛上資料檔成為新的一欄

檢查指標所作的加總是否正確

tail(cbind(tv, radio, internet, newspaper, mediaAtt))
##         tv radio internet newspaper mediaAtt
## [1257,]  4     4        0         0        8
## [1258,]  1     1        5         1        8
## [1259,]  1     0        1         1        3
## [1260,]  1     0        1         1        3
## [1261,]  1     0        0         1        2
## [1262,]  0     6        0         6       12

[進階] 使用條件式指令ifelse()

ifelse(A,B,C)

attach(kao06)
## The following object(s) are masked from 'kao06 (position 3)':
## 
##     A01, A02, A03, A04, A05, A06, A07, AGE, B01, B02, B03, B06_1,
##     B06_10, B06_2, B06_3, B06_4, B06_5, B06_6, B06_7, B06_8,
##     B06_9, B07, B08, B1A_1, B1A_2, B1A_3, B1A_4, B1A_5, B1A_6,
##     B1A_7, B1A_8, B1B, B1C, B2A, B2B, B2C, B4A, B4B_1, B4B_2,
##     B4B_3, B4B_4, B4B_5, B4B_6, B4B_7, B4B_8, B4B_9, B4C, B4D_1,
##     B4D_2, B4D_3, B4D_4, B4D_5, B4D_6, B4D_7, B4D_8, B4D_9, B5A,
##     B5B_1, B5B_2, B5B_3, B5C_1, B5C_2, B5C_3, B5C_4, B5C_5, B5C_6,
##     B5C_7, B5C_8, B5C_9, C01, C02, C03, campID, CITY, D1A, D1B,
##     D1C, D1D, D1E, D2A, D2B, D2C, D2D, D2E, DISTRICT, E1A, E1B,
##     EDU, F01, F02, F03, F04, G01, G02, G03, G04, H01, H02, H03,
##     H04, H05, H06, H07, H08, H1A, H1B, H1C, H1D, H1E, H1F, H1G,
##     H1H, H1I, H3A, H3B, H3C, H3D, H7A, H7B, H8A, I01, I02, I03,
##     I04, I05, I06, I07, I08, I09, I10_1, I10_2, I10_3, I10_4,
##     I10_5, I10_6, I1A, ID, IN_NUM, IN_SEX, J01, J02, J03, J1A,
##     K01, L01, L02, L05, L06, L08, L09, L2A, L2B, L2C, L3A, L3B,
##     L3C, L3D, L3E, L4A, L4B, L4C, L4D, L4E, L7A, L7B, L7C, L7D,
##     L7E, M01, M02, M03, M04, M05, M06, M07, M08, mediaAtt, N01,
##     N02, N03, N04, N05, N06, N07, N08, N09, N10, N11, N12, N12A,
##     N13, N14, N15, N16, N17, N18, N7A, N9A, N9B, P01_1, P01_2,
##     P01_3, P01_4, P01_5, P01_6, P01_7, P01_8, P01_9, P03, P04,
##     P05, P06, P07, P08, P09_1, P09_2, P09_3, P09_4, P09_5, P2A,
##     P2B, P2C, P2D, P2E, P2F, P2G, P9A, radio2, REGION, SEX,
##     SU_NUM, TAU, turnout, V01, V02, V03, V04, V05, V06, V07, V08,
##     V09, V159_1, V159_2, V159_3, V159_4, V159_5, V159_6, V159_7,
##     V159_8, V159_9, V161_1, V161_2, V161_3, V161_4, V161_5,
##     V161_6, V161_7, V161_8, V161_9, V163_1, V163_2, V163_3,
##     V163_4, V163_5, V163_6, V163_7, V163_8, V163_9, VILLAGE, W
## The following object(s) are masked from 'kao06 (position 7)':
## 
##     A01, A02, A03, A04, A05, A06, A07, AGE, B01, B02, B03, B06_1,
##     B06_10, B06_2, B06_3, B06_4, B06_5, B06_6, B06_7, B06_8,
##     B06_9, B07, B08, B1A_1, B1A_2, B1A_3, B1A_4, B1A_5, B1A_6,
##     B1A_7, B1A_8, B1B, B1C, B2A, B2B, B2C, B4A, B4B_1, B4B_2,
##     B4B_3, B4B_4, B4B_5, B4B_6, B4B_7, B4B_8, B4B_9, B4C, B4D_1,
##     B4D_2, B4D_3, B4D_4, B4D_5, B4D_6, B4D_7, B4D_8, B4D_9, B5A,
##     B5B_1, B5B_2, B5B_3, B5C_1, B5C_2, B5C_3, B5C_4, B5C_5, B5C_6,
##     B5C_7, B5C_8, B5C_9, C01, C02, C03, campID, CITY, D1A, D1B,
##     D1C, D1D, D1E, D2A, D2B, D2C, D2D, D2E, DISTRICT, E1A, E1B,
##     EDU, F01, F02, F03, F04, G01, G02, G03, G04, H01, H02, H03,
##     H04, H05, H06, H07, H08, H1A, H1B, H1C, H1D, H1E, H1F, H1G,
##     H1H, H1I, H3A, H3B, H3C, H3D, H7A, H7B, H8A, I01, I02, I03,
##     I04, I05, I06, I07, I08, I09, I10_1, I10_2, I10_3, I10_4,
##     I10_5, I10_6, I1A, ID, IN_NUM, IN_SEX, J01, J02, J03, J1A,
##     K01, L01, L02, L05, L06, L08, L09, L2A, L2B, L2C, L3A, L3B,
##     L3C, L3D, L3E, L4A, L4B, L4C, L4D, L4E, L7A, L7B, L7C, L7D,
##     L7E, M01, M02, M03, M04, M05, M06, M07, M08, mediaAtt, N01,
##     N02, N03, N04, N05, N06, N07, N08, N09, N10, N11, N12, N12A,
##     N13, N14, N15, N16, N17, N18, N7A, N9A, N9B, P01_1, P01_2,
##     P01_3, P01_4, P01_5, P01_6, P01_7, P01_8, P01_9, P03, P04,
##     P05, P06, P07, P08, P09_1, P09_2, P09_3, P09_4, P09_5, P2A,
##     P2B, P2C, P2D, P2E, P2F, P2G, P9A, radio2, REGION, SEX,
##     SU_NUM, TAU, turnout, V01, V02, V03, V04, V05, V06, V07, V08,
##     V09, V159_1, V159_2, V159_3, V159_4, V159_5, V159_6, V159_7,
##     V159_8, V159_9, V161_1, V161_2, V161_3, V161_4, V161_5,
##     V161_6, V161_7, V161_8, V161_9, V163_1, V163_2, V163_3,
##     V163_4, V163_5, V163_6, V163_7, V163_8, V163_9, VILLAGE, W

radio2 <- A02
radio2 <- ifelse(radio2 > 7, NA, ifelse(radio2 == 7, 0, radio2))
detach(kao06)

kao06$radio2 <- radio2  #看得懂嗎?(打包帶走)
save(kao06, file = "teds2006_kao.rda", compress = TRUE)  #檔案把包帶走

5. 把結果轉成html檔

方法一:使用套件

library(R2HTML)
HTMLStart(outdir="路徑",filename="檔名",echo=FALSE)

table(teds08l$eth,exclude=NULL) # 把你的程式碼放在`HTMLStart()`和`HTMLStop()`兩行中間

HTMLStop()

方法二:在Rstudio中使用Rmd檔