ggstatsplot::ggbarstats(
data = ISLR::Default,
x = student,
y = default,
#sampling.plan = "jointMulti",
title = "Student and Default",
xlab = "Student",
legend.title = "Default",
#ggtheme = hrbrthemes::theme_ipsum_pub(),
ggplot.component = list(scale_x_discrete(
guide = guide_axis(n.dodge = 2))),
palette = "Set2",
messages = FALSE
)
Figure 1.1: 學生身份與信用卡違約的相關
\[PRE=\frac{E_{1}-E{2}}{E_{1}}\]
rawd0<-matrix(c("","","新聞台","","",
"","TVBS","民視","壹電視","sum",
"frequency","38","32","25","95"),
ncol=3, nrow=5)
kable(rawd0, booktabs=T, caption="觀看新聞台的次數分配") %>%
kable_styling(full_width=F, bootstrap_options = "striped", font_size=14)
frequency | ||
TVBS | 38 | |
新聞台 | 民視 | 32 |
壹電視 | 25 | |
sum | 95 |
rawd<-matrix(c("","","新聞台","","",
"","TVBS","民視","壹電視","sum",
"軍公教","10","5","15","30",
"私部門職員","15","10","5","30",
"勞工","5","15","5","25",
"農林漁牧","8","2","0","10",
"sum","38","32","25","95"),
ncol=7, nrow=5)
kable(rawd, booktabs=T, caption="職業與新聞台之交叉表") %>%
kable_styling(full_width=F, bootstrap_options = "striped", font_size=14) %>%
add_header_above(c(""," ", "職業" = 5))
軍公教 | 私部門職員 | 勞工 | 農林漁牧 | sum | ||
TVBS | 10 | 15 | 5 | 8 | 38 | |
新聞台 | 民視 | 5 | 10 | 15 | 2 | 32 |
壹電視 | 15 | 5 | 5 | 0 | 25 | |
sum | 30 | 30 | 25 | 10 | 95 |
Ld<-here::here('data','lambda.txt')
dt<-read.table(Ld, sep=';', header=T)
kable(dt, booktabs=T, caption="四種職業看新聞台之正確與錯誤") %>%
kable_styling(full_width=F, bootstrap_options = "striped", font_size=14)
Rating.of..Department | Modal.Category | Right.Guesses | Wrong.Guesses |
---|---|---|---|
軍公教人員 (N=30) | Good | 15 | 15 |
私部門職員 (N=30) | Poor | 15 | 15 |
勞工 (N=25) | Satisfactory | 15 | 10 |
農林漁牧 (N=10) | Poor | 8 | 2 |
Total(N=95) | 53 | 42 |
\[E_{1}=N_{\text{total}}-N_{\text{mode}\hspace{.4em}\text{of}\hspace{.4em}\text{dependent}\hspace{.4em}\text{variable}}\]
\[E_{2}=\sum(N_{\text{category}}-N_{\text{mode}\hspace{.4em} \text{of}\hspace{.4em}\text{category}})\]
\[\begin{align*} \frac{E_{1}-E_{2}}{E_{1}} & = \frac{57-42}{57} \nonumber \\ & = \frac{11}{53} \nonumber \\ & = 0.263 \end{align*}\]
\[\lambda_{symmetric}=\frac{\sum Y_{x, modal}+\sum X_{y, modal}}{2N-(Y_{modal}+X_{modal})}\]
lambdatab=cbind(c(10,5,15),c(15,10,5),c(5,15,5),
c(8,2,0))
lambdatab
## [,1] [,2] [,3] [,4]
## [1,] 10 15 5 8
## [2,] 5 10 15 2
## [3,] 15 5 5 0
DescTools::Lambda(lambdatab, direction = "row")
## [1] 0.2632
M <- as.table(cbind(c(1768,946,115), c(807,1387,438), c(189,746,288), c(47,53,16)))
dimnames(M) <- list(paste("A", 1:3), paste("B", 1:4))
M
## B 1 B 2 B 3 B 4
## A 1 1768 807 189 47
## A 2 946 1387 746 53
## A 3 115 438 288 16
# direction default is "symmetric"
DescTools::Lambda(M)
## [1] 0.2076
DescTools::Lambda(M, conf.level=0.95)
## lambda lwr.ci upr.ci
## 0.2076 0.1872 0.2281
DescTools::Lambda(M, direction="row")
## [1] 0.2241
DescTools::Lambda(M, direction="column")
## [1] 0.1924
\[\Gamma: (N_{s}-N_{d})/(N_{s}+N_{d})\]
TT<-matrix(c(""," ","Y"," ",
"","1","2","3","1","f11","f21","f31",
"2","f12","f22","f32",
"3","f13","f23","f33"),
ncol=5, nrow=4)
knitr::kable(TT, 'html',booktabs=T, caption="3X3表格") %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c(""," ", "X" = 3))
1 | 2 | 3 | ||
1 | f11 | f12 | f13 | |
Y | 2 | f21 | f22 | f23 |
3 | f31 | f32 | f33 |
TT<-matrix(c("","","教育程度","", "",
"","國中及以下", "高中五專","大學以上","總和",
"變差", "35", "76", "22", "133",
"差不多"," 73", "134", "56", "263",
"變好","30","54","30","114",
"總和","138", "264","108","150"),
ncol=6, nrow=5)
knitr::kable(TT, 'html', caption='學歷與對經濟的看法') %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c(""," ", "經濟變好或是不好" = 3,""))
變差 | 差不多 | 變好 | 總和 | ||
國中及以下 | 35 | 73 | 30 | 138 | |
教育程度 | 高中五專 | 76 | 134 | 54 | 264 |
大學以上 | 22 | 56 | 30 | 108 | |
總和 | 133 | 263 | 114 | 150 |
ns<-35*(134+56+54+30)+73*(54+30)+76*(56+30)+134*30
nd<-30*(76+22+134+56)+54*(22+56)+73*(76+22)+134*22
(ns-nd)/(ns+nd)
## [1] 0.06752
R
計算Goodman-Kruskal’s \(\Gamma\):Gtable<-cbind(c(35,76,22),c(73,134,56),
c(30,54,30))
DescTools::GoodmanKruskalGamma(Gtable, direction="column")
## [1] 0.06752
\[\tau-b=\frac{N_{s}-N_{d}}{\sqrt{N_{s}+N_{d}+T_{x}}\cdot\sqrt{N_{s}+N_{d}+T_{y}}}\]
DescTools::GoodmanKruskalTau(Gtable)
## [1] 0.003948
tx<-35*(76+22)+76*22+73*(134+56)+134*56+30*(54+30)+54*30
ty<-35*(73+30)+73*30+76*(134+54)+134*54+22*(56+30)+56*30
taub<-(ns-nd)/sqrt((ns+nd+tx)*(ns+nd+ty))
taub
## [1] 0.04156
DescTools::KendallTauB(Gtable, direction="column")
## [1] 0.04156
Kendall’s \(\tau-b\) 在樣本數大於10的時候,抽樣分佈會形成標準常態分配,也就是可以計算Z值,並檢驗假設。
Kendall’s \(\tau-b\) 與 Spearman rank coefficient一樣, 可以用來計算兩個順序變數之間的相關程度,以-1到+1表示相關程度。
X<-c(rep(1,10), rep(2, 10), rep(3,15), rep(4,5))
Y1<-c(rep(1,10), rep(3, 10), rep(4,15), rep(6,5))
D1<-DescTools::SomersDelta(X,Y1)
Y2<-c(rep(6,10), rep(4, 10), rep(3,15), rep(1,5))
D2<-DescTools::SomersDelta(X,Y2)
\[\rm{Delta}=(N_{s}-N_{d})/(N_{s}+N_{d}+T_{y})\]
★試計算以下的表格2.6的Somers’ D:
tab <- as.table(rbind(c(26,26,23,18,9),c(6,7,9,14,23)))
knitr::kable(tab,format = 'simple',
booktabs=T,caption="交叉列表範例")
A | B | C | D | E | |
---|---|---|---|---|---|
A | 26 | 26 | 23 | 18 | 9 |
B | 6 | 7 | 9 | 14 | 23 |
# Somers' D C|R
DescTools::SomersDelta(tab, direction="column", conf.level=0.95)
## somers lwr.ci upr.ci
## 0.4427 0.2786 0.6068
# Somers' D R|C
DescTools::SomersDelta(tab, direction="row", conf.level=0.95)
## somers lwr.ci upr.ci
## 0.2569 0.1593 0.3546
etable<-cbind(c(35,76,22),c(73,134,56),
c(30,54,30))
DescTools::SomersDelta(etable, direction="column")
## [1] 0.04163
with(mtcars, stats::cor.test(cyl, gear,method='spearman'))
##
## Spearman's rank correlation rho
##
## data: cyl and gear
## S = 8535, p-value = 8e-04
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5643
with(mtcars, stats::cor.test(cyl, gear,method='kendall'))
##
## Kendall's rank correlation tau
##
## data: cyl and gear
## z = -3.2, p-value = 0.002
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.5125
卡方檢定可分為適合度檢定(goodness of fit)以及獨立性檢定(test of independence)。前者檢定我們感興趣的變數是否來自特定的機率分佈的假設,後者檢定兩個類別變數是否獨立的假設。
v1<-here::here('Fig','chi2.jpg')
knitr::include_graphics(v1)
Figure 3.1: 4種自由度的卡方分佈機率密度圖
期待值計算方式:
\[\hat{n_{ij}}=(RT\times CT)/ N\]
tbyt<-matrix(c("","","Y","",
"","1","2","Total",
"1","f11","f21","C1",
"2","f12","f22","C2",
"Total","R1","R2","N"),
ncol=5, nrow=4)
knitr::kable(tbyt, 'html', caption='2X2表格') %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c(""," ", "X" = 3))
1 | 2 | Total | ||
1 | f11 | f12 | R1 | |
Y | 2 | f21 | f22 | R2 |
Total | C1 | C2 | N |
tbythy<-matrix(c("","","Y","",
"","1","2","Total",
"1","25","25","50",
"2","25","25","50",
"Total","50","50","100"),
ncol=5, nrow=4)
knitr::kable(tbythy, 'html', caption='2X2表格假想1') %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c(""," ", "X" = 3))
1 | 2 | Total | ||
1 | 25 | 25 | 50 | |
Y | 2 | 25 | 25 | 50 |
Total | 50 | 50 | 100 |
tbythy<-matrix(c("","","Y","",
"","1","2","Total",
"1","50","0","50",
"2","0","50","50",
"Total","50","50","100"),
ncol=5, nrow=4)
knitr::kable(tbythy, 'html', caption='2X2表格假想2') %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c(""," ", "X" = 3))
1 | 2 | Total | ||
1 | 50 | 0 | 50 | |
Y | 2 | 0 | 50 | 50 |
Total | 50 | 50 | 100 |
\[\mathcal{X}^2=\sum_{i}\sum_{j}\frac{(n_{ij}-\hat{n_{ij}})^2}{\hat{n_{ij}}}\]
或者是
\[\mathcal{X}^2=\sum_{i}\sum_{j}\frac{(O_{ij}-E_{ij})^2}{E_{ij}}\]
\[\mathcal{X}^2=\sum_{i}\sum_{j}\frac{(|O_{ij}-E_{ij}|-0.5)^2}{E_{ij}}\]
\[E_{k}=\frac{N}{n}\]
\(H_{0}\)為觀察值平均分佈在每一類別,如果拒斥這個假設,表示各類別之間的差距的確存在而非隨機分佈。
自由度為\(k-1\)。因為觀察值的總和為\(N\),只要知道\(k-1\)個類別的觀察值就知道最後一個類別的觀察值。
另一方面,因為單一分佈的總和是固定的,所以在這一個限制下理論上的數值變化就喪失一個自由度,寫成\(p=1\)。\(k-p=k-1\)就是自由度。
抽出100位1990年之後的出生的受訪者,觀察到男性為52人、女性為48人,請問這是平均分佈嗎?
計算為
\[\mathcal{X}^2=\frac{(52-50)^2}{50}+\frac{(48-50)^2}{50}=0.16\]
現代統計學的第696-697頁提供拒斥域的機率值(\(\alpha\))對應的卡方分配的臨界值表,左邊每一列是自由度,每一欄\(\mathcal{X^2}_{\alpha}\)則是拒斥域\(\alpha\)的機率值,\(P(\mathcal{X^2}>\mathcal{X_{a}^2})\alpha\)。表格的由左而右,代表\(\alpha\)越來越小,非拒斥域的機率值越來越大。在同樣的自由度下,拒斥域越小,卡方分配的臨界值也就越大。
右尾卡方值參考表 3.4:
rtchi<-matrix(c("","conf. level","",
"", "5%","1%",
"1","3.84","6.63",
"2","5.99","9.21",
"3","7.82","11.34",
"4","9.49","13.28","5","11.07","15.09"),
ncol=7, nrow=3)
knitr::kable(rtchi, 'html',caption='右尾卡方值表') %>%
kable_styling(c("striped", "bordered")) %>%
add_header_above(c("","","d.f." = 5))
1 | 2 | 3 | 4 | 5 | ||
conf. level | 5% | 3.84 | 5.99 | 7.82 | 9.49 | 11.07 |
1% | 6.63 | 9.21 | 11.34 | 13.28 | 15.09 |
alpha=0.05
qchisq(1-alpha, df=3)
## [1] 7.815
1-pchisq(2.9, 1)
## [1] 0.08858
1-pchisq(0.16, 1)
## [1] 0.6892
查表可知機率值為0.68,因此無法拒斥資料來自母體的假設。
我們也可以用卡方檢定測試觀察值是否符合特定的分佈。例如我們經常做加權檢定,想要知道是否樣本符合母體,虛無假設是樣本分佈等於母體分佈,以性別而言,自由度為1。假設母體的分佈是:
男:\(\frac{9003150}{9003150+9184065}=0.495\)
女:\(\frac{9184065}{9003150+9184065}=0.505\)
假設我們觀察到1,067人,男女分佈如表 3.5,可計算對應的理論值:
tmp<-matrix(c("性別","N","母體值",
"男", "531", "528.165",
"女","539","538.835"),
ncol=3, nrow=3)
knitr::kable(tmp, 'html',caption='男女性別分布') %>%
kable_styling(c("striped", "bordered"))
性別 | 男 | 女 |
N | 531 | 539 |
母體值 | 528.165 | 538.835 |
計算卡方值: \[\begin{align*} \mathcal{X}^2 & =\frac{(531-528.165)^2}{528.165}+\frac{(539-538.835)^2}{538.835} \nonumber \\ & =0.015+0 \nonumber \\ & =0.015 \nonumber \\ & <3.84 \end{align*}\]
結論:我們無法拒斥\(H_{0}\):觀察值與母體有相同分佈在每一類別這個假設。
-如果我們想知道政黨支持與婚姻狀態是否互相獨立?以表 3.6 的資料為例:
婚姻 | Total | |||
已婚 | 其他 | |||
政黨支持 | 國民黨 | 141 | 77 | 218 |
民進黨 | 91 | 72 | 163 | |
Total | 232 | 149 | 381 |
marri<-c(141,91,77,72)
marripid<-matrix(marri,2)
marripid
## [,1] [,2]
## [1,] 141 77
## [2,] 91 72
rt<-margin.table(marripid,1)
ct<-margin.table(marripid,2)
newtable<-cbind(marripid,rt)
rbind(newtable,ct)
## rt
## 141 77 218
## 91 72 163
## ct 232 149 232
n=sum(marri)
e11<-rt[1]*ct[1]/n; e21<-rt[2]*ct[1]/n
e12<-rt[1]*ct[2]/n;e22<-rt[2]*ct[2]/n
expec <- matrix(c(e11,e21,e12,e22),2)
print(expec)
## [,1] [,2]
## [1,] 132.75 85.25
## [2,] 99.25 63.75
dt<-c((marripid[1,1]-expec[1,1])^2/e11,
(marripid[2,1]-expec[2,1])^2/e21,
(marripid[1,2]-expec[1,2])^2/e12,
(marripid[2,2]-expec[2,2])^2/e22)
dt
## [1] 0.5133 0.6865 0.7992 1.0689
chisq<-sum(dt)
chisq
## [1] 3.068
R
的函數計算,可以用\(\tt{chisq.test()}\)這個函式,得到Pearson’s 卡方檢定:marri<-c(141,91,77,72)
marripid<-matrix(marri,2)
chisq.test(marripid)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: marripid
## X-squared = 2.7, df = 1, p-value = 0.1
# YATES <- min(0.5, abs(x - E))
dt<-c((abs(marripid[1,1]-expec[1,1])-0.5)^2/e11,
(abs(marripid[2,1]-expec[2,1])-0.5)^2/e21,
(abs(marripid[1,2]-expec[1,2])-0.5)^2/e12,
(abs(marripid[2,2]-expec[2,2])-0.5)^2/e22)
dt
## [1] 0.4530 0.6059 0.7053 0.9433
chisq<-sum(dt)
chisq
## [1] 2.708
z=curve(dchisq(x, df=1), from=0, to=6)
abline(v = chisq, lty = 3, lwd=1.5, col='red2')
Figure 3.2: 自由度為1的卡方分佈
\[z_{ij}=\frac{O_{ij}-E_{ij}}{\sqrt{E_{ij}(1-{\mathrm {row\hspace{.2em} prop.}})(1-{\mathrm {column\hspace{.2em} prop.}})}}\]
標準化殘差越大表示那一格的觀察值超過當虛無假設成立時的期待值越多,也就表示兩個變數的關聯越高。
有一筆觀察值為1067的資料,教育程度分佈如表 3.7,依據母體資料求出期待值以及殘差:
教育程度 | 理論值 | 殘差 |
小學及以下 | 192.66 | -6.66 |
國中 | 228.42 | -81.42 |
高中 | 216.56 | 94.43 |
專科 | 202.64 | -65.64 |
大學及以上 | 226.68 | 59.31 |
卡方檢定注意事項
\[\Phi=\sqrt{\frac{\mathcal{X}^2}{N}}\]
male.age<-car::recode(UsingR::too.young$Male,
"10:35=1; 36:100=2")
female.age<-car::recode(UsingR::too.young$Female,
"10:30=1; 31:100=2; NA=1")
chisq.test(male.age, female.age, correct = F) #chisquared=28.283
##
## Pearson's Chi-squared test
##
## data: male.age and female.age
## X-squared = 28, df = 1, p-value = 1e-07
DescTools::Phi(male.age, female.age)
## [1] 0.5946
phi=sqrt(28.283/80); phi
## [1] 0.5946
\[V=\sqrt{\frac{\mathcal{X}^2}{N*min_{r-1,c-1}}}\]
newdf<- UsingR::too.young %>% mutate(Male.age=case_when(Male >=10 & Male<=30 ~ 1, Male >=31 & Male<=50 ~ 2,
Male >= 51 ~ 3)) %>%
mutate(Female.age=case_when(Female>=10 & Female<=25 ~ 1,
Female>=26 & Female<=40 ~ 2,
Female>=41 & Female<=100 ~ 3,
Female=NA ~ 2))
n = nrow(newdf)
#expss package
library(expss)
newdf %>%
# cro_cpct(female.age, male.age) %>%
calc_cro_cpct(Female.age , Male.age)%>%
set_caption('性別與約會年齡交叉列表')
性別與約會年齡交叉列表 | |||
Male.age | |||
---|---|---|---|
1 | 2 | 3 | |
Female.age | |||
1 | 100 | 20 | |
2 | 75 | 50 | |
3 | 5 | 50 | |
#Total cases | 49 | 20 | 10 |
#chisq.test
ct <- chisq.test(newdf$Male.age, newdf$Female.age, correct = F) #chisquared=80.148
DescTools::CramerV(newdf$Male.age, newdf$Female.age)
## [1] 0.7253
v=sqrt(ct$statistic/(n*2)); v
## X-squared
## 0.7208
\[\begin{align*} r & =\frac{\sum (X-\bar{X})(Y-\bar{Y})}{\sqrt{\sum (X-\bar{X})}\sqrt{\sum (Y-\bar{Y})}} \\ & = \frac{S_{XY}}{S_{X}S_{Y}} \tag{5.1} \end{align*}\]
其中
\[S_{XY}=\frac{\sum (X-\bar{X})(Y-\bar{Y})}{n-1}\]
\[S_{X}=\sqrt{\frac{\sum (X-\bar{X})^2}{n-1}}\]
\[S_{Y}=\sqrt{\frac{\sum (Y-\bar{Y})^2}{n-1}}\]
\[r=\frac{n(\Sigma xy)-(\Sigma x)(\Sigma y)}{\sqrt{(n\Sigma x^2-(\Sigma x)^2)(n\Sigma y^2-(\Sigma y)^2)}}\]
\(r_{XY}\)=1代表正相關關係最高,X增加一個單位,Y也增加一個單位,或者Y增加一個單位,X也增加一個單位。
-1代表負相關關係最高,X減少一個單位,Y增加一個單位。
0代表兩者沒有線性相關。
如果Y是X加上一個單位,兩者還是高度相關:
df <- tibble(X=rnorm(20,0,1), Y=X+1)
with(df, cor(X, Y))
## [1] 1
ggplot(df, aes(x=X, y=Y)) +
geom_point(fill='brown', size=5, shape=1) +
geom_smooth(method="lm", se=F, col='blue')
df <- tibble(X=rnorm(20,0,1), Y=X^2)
with(df, cor(X, Y))
## [1] -0.1375
ggplot(df, aes(x=X, y=Y)) +
geom_point(fill='brown', size=5, shape=1) +
geom_smooth(method="lm", se=F, col='blue')
\[\frac{t}{\sqrt{\frac{1-r^2}{n-2}}} \sim\hspace{.15em}t_{n-2}\]
\(\textbf{trees}\)這筆資料中有Girth(周長)與Volume(體積)兩個變數,我們想知道兩者的相關程度。
檢查是否兩個變數之間有線性相關可用散佈圖,例如圖 5.1:
ggplot(trees, aes(x=Girth, y=Volume)) +
geom_point(size=3, col="darkgreen") +
labs(caption="Data: trees") +
theme_bw()
Figure 5.1: Girth與Volume散佈圖
ggplot(trees, aes(x=Girth, y=Volume)) +
geom_point(col='darkgreen', size=3) +
geom_smooth(method="lm", se=F, col='blue')
Figure 5.2: Girth與Volume散佈圖加上迴歸線
ggstatsplot::ggscatterstats(
data = trees,
x = Girth,
y = Volume,
xlab = "",
ylab = "",
title = "",
messages = FALSE
)
Figure 5.3: 結合相關分析的Girth與Volume散佈圖
ggplot(trees, aes(x=Girth, y=Volume)) +
geom_point(col='darkgreen', size=3) +
geom_smooth(method="loess", se=F) +
theme_bw()
Figure 5.4: Girth與Volume散佈圖加上非線性線
n=nrow(trees)
x<-trees$Girth; y<-trees$Volume
x_sd<-sd(trees$Girth)
y_sd<-sd(trees$Volume)
xy_sd<-sum((x-mean(x))*(y-mean(y)))/(n-1)
r=xy_sd/(x_sd*y_sd)
cat("correlation=",r)
## correlation= 0.9671
R
計算:with(trees, cor(Girth, Volume))
## [1] 0.9671
如同圖 5.3 所顯示,線性相關與迴歸係數跟\(R^2\)之間有關係。
相關係數與迴歸係數的關係可寫成:
\[\begin{align} \hat{\beta}=r\frac{S_{Y}}{S_{X}} \tag{5.2} \end{align}\]
方程式 (5.2) 顯示,當依變數\(Y\)相對於自變數\(X\)的離散程度越大,而且相關程度越大時,迴歸係數也越大。
相關係數與判定係數\(R^2\)的關係為:
\[\begin{align} r^2=R^2=\frac{\sum(\hat{Y}-\bar{Y})^2}{\sum (Y-\bar{Y})^2} \tag{5.3} \end{align}\]
\(\hat{Y}\) 是迴歸模型對\(Y\)的預測值。
vtable <- matrix(c("","","投票","",
"","民主黨","共和黨","總和",
"贊成", "46","41","87",
"反對", "39", "73", "112",
"總和","85","114","199"),
ncol=5, nrow=4)
kable(vtable, 'html', caption='墮胎議題與投票選擇') %>%
kable_styling(full_width=F, bootstrap_options = "striped", font_size=13) %>%
add_header_above(c("","","墮胎議題" = 3))
贊成 | 反對 | 總和 | ||
民主黨 | 46 | 39 | 85 | |
投票 | 共和黨 | 41 | 73 | 114 |
總和 | 87 | 112 | 199 |
PP0797B2.sav
這筆資料,然侯顯示年齡(age)與統獨立場(tondu)之間的交叉列表,再進行卡方檢定,請問這兩個變數互相獨立嗎?
20至29歲 | 30至39歲 | 40至49歲 | 50至59歲 | 60至69歲 | |
N | 351 | 488 | 610 | 374 | 235 |
% | 17.05 | 23.71 | 29.64 | 18.17 | 11.41 |
tab <- as.table(rbind(Yes=c(26,23,18,9), No=c(6,7,14,23)))
names(dimnames(tab)) <- c("Retired", "Department")
tab
## Department
## Retired A B C D
## Yes 26 23 18 9
## No 6 7 14 23
UsingR
這個套件中的nym.2002
這筆資料,裡面的age以及time之間的關係,並且計算兩者的相關程度。
## 最後更新日期 05/07/2021