## Warning: pacote 'tibble' foi compilado no R versão 4.4.3
tabela<-structure(c(160,40,60,12,40,26,140,84), .Dim=c(2L,2L,2L), .Dimnames=
structure(list(local=c("Juiz de Fora","Outros Municípios")
,sexo=c("Masculino","Feminino"),faculdade=c("Engenharia","Economia")), .Names=
c("local de nascimento","sexo","faculdade")),class="table")
mantelhaen.test(tabela)
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: tabela
## Mantel-Haenszel X-squared = 0.2455, df = 1, p-value = 0.6203
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5620944 1.3536126
## sample estimates:
## common odds ratio
## 0.8722718
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tabela[, , 1] + tabela[, , 2]
## X-squared = 3.6027, df = 1, p-value = 0.05769
# Totais marginais
N <- sum(tabela)
n_i <- apply(tabela, 1, sum)
n_j <- apply(tabela, 2, sum)
n_k <- apply(tabela, 3, sum)
# Inicializar array de esperados
esperados <- array(0, dim = c(2, 2, 2))
# Calcular Eijk = (ni.. * n.j. * n..k) / N^2
for (i in 1:2) {
for (j in 1:2) {
for (k in 1:2) {
esperados[i, j, k] <- ((n_i[i] * n_j[j] * n_k[k]) / (N^2))
}
}
}
# Estatística do teste
chi2 <- sum((tabela - esperados)^2 / esperados)
# Graus de liberdade: gl = lce - l - c - e + 2
gl <- 2*2*2 - 2 - 2 - 2 + 2 # = 4
# p-valor
p_valor <- 1 - pchisq(chi2, df = gl)
# Resultado
cat("Qui-quadrado =", chi2, "\nGL =", gl, "\np-valor =", p_valor, "\n")
## Qui-quadrado = 173.3094
## GL = 4
## p-valor = 0
n_jk <- apply(tabela, c(2,3), sum) # n_.jk
n_ik <- apply(tabela, c(1,3), sum) # n_.ik
n_ij <- apply(tabela, c(1,2), sum) # n_.ij
esperados <- array(0, dim = c(2,2,2))
for (i in 1:2) {
for (j in 1:2) {
for (k in 1:2) {
esperados[i,j,k] <- (n_i[i] * n_jk[j,k]) / N
}
}
}
chi2 <- sum((tabela - esperados)^2 / esperados)
gl <- 8 - 4 - 2 + 1 # lce - ce - l + 1 = 8 - 4 - 2 + 1 = 3
p_valor <- 1 - pchisq(chi2, df = gl)
p_valor
## [1] 1.881237e-05
esperados <- array(0, dim = c(2,2,2))
for (i in 1:2) {
for (j in 1:2) {
for (k in 1:2) {
esperados[i,j,k] <- (n_k[i] * n_ij[i,j]) / N
}
}
}
chi2 <- sum((tabela - esperados)^2 / esperados)
gl <- 8 - 4 - 2 + 1 # lce - ce - l + 1 = 8 - 4 - 2 + 1 = 3
p_valor <- 1 - pchisq(chi2, df = gl)
p_valor
## [1] 0
esperados <- array(0, dim = c(2,2,2))
for (i in 1:2) {
for (j in 1:2) {
for (k in 1:2) {
esperados[i,j,k] <- (n_j[i] * n_ik[i,k]) / N
}
}
}
chi2 <- sum((tabela - esperados)^2 / esperados)
gl <- 8 - 4 - 2 + 1 # lce - ce - l + 1 = 8 - 4 - 2 + 1 = 3
p_valor <- 1 - pchisq(chi2, df = gl)
p_valor
## [1] 0
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: tabela
## Mantel-Haenszel X-squared = 0.2455, df = 1, p-value = 0.6203
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5620944 1.3536126
## sample estimates:
## common odds ratio
## 0.8722718
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: aperm(tabela, c(1, 3, 2))
## Mantel-Haenszel X-squared = 19.708, df = 1, p-value = 9.024e-06
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.779383 4.387768
## sample estimates:
## common odds ratio
## 2.794194
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: aperm(tabela, c(2, 3, 1))
## Mantel-Haenszel X-squared = 139.77, df = 1, p-value < 2.2e-16
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 6.514787 14.389699
## sample estimates:
## common odds ratio
## 9.682243
Nij <- matrix(c(16,8,10,32,20,22,20,18,16,32,20,19,40,35,28,74,54,46),nrow = 6, byrow = T)
rownames(Nij) <- c("A","B","C","D","E","F")
colnames(Nij) <- c("(0-----100)","(100-----250)","(250+)")
names(dimnames(Nij)) <- c("Região Geográfica","Receita municipal pela arrecadação de impostos (milhões de coroas)")
knitr::kable(Nij, caption = "Tabela de dados") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)
(0—–100) | (100—–250) | (250+) | |
---|---|---|---|
A | 16 | 8 | 10 |
B | 32 | 20 | 22 |
C | 20 | 18 | 16 |
D | 32 | 20 | 19 |
E | 40 | 35 | 28 |
F | 74 | 54 | 46 |
Tabela com os valores observados:
teste <- chisq.test(Nij)
knitr::kable(teste$observed, caption = "Valores observados") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)
(0—–100) | (100—–250) | (250+) | |
---|---|---|---|
A | 16 | 8 | 10 |
B | 32 | 20 | 22 |
C | 20 | 18 | 16 |
D | 32 | 20 | 19 |
E | 40 | 35 | 28 |
F | 74 | 54 | 46 |
Tabela com os valores esperados:
knitr::kable(teste$expected, caption = "Valores esperados") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)
(0—–100) | (100—–250) | (250+) | |
---|---|---|---|
A | 14.26667 | 10.33333 | 9.40000 |
B | 31.05098 | 22.49020 | 20.45882 |
C | 22.65882 | 16.41176 | 14.92941 |
D | 29.79216 | 21.57843 | 19.62941 |
E | 43.21961 | 31.30392 | 28.47647 |
F | 73.01176 | 52.88235 | 48.10588 |
Tabela com os resíduos ajustados padronizados:
knitr::kable(teste$stdres, caption = "Resíduos ajustados padronizados") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)
(0—–100) | (100—–250) | (250+) | |
---|---|---|---|
A | 0.6235075 | -0.9005526 | 0.2381447 |
B | 0.2417790 | -0.6806905 | 0.4332372 |
C | -0.7753767 | 0.4969478 | 0.3444893 |
D | 0.5722810 | -0.4389747 | -0.1800139 |
E | -0.7195976 | 0.8863405 | -0.1175040 |
F | 0.1870332 | 0.2269529 | -0.4397667 |
## (0-----100) (100-----250) (250+)
## 0.4507042 0.2816901 0.2676056
## (0-----100) (100-----250) (250+)
## 0.4196078 0.3039216 0.2764706
df <- data.frame(
Receita = rep(c("(0–----100)", "(100–----250)", "(250+)"), 2),
Proporcao = c(perfil_D, perfil_medio),
Perfil = rep(c("Linha D", "Perfil Médio"), each = 3)
)
library(ggplot2)
## Warning: pacote 'ggplot2' foi compilado no R versão 4.4.3
ggplot(df, aes(x = Receita, y = Proporcao, group = Perfil, color = Perfil)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Comparação dos Perfis: Linha D vs Perfil Médio",
x = "Faixa de Receita Municipal (milhões de coroas)",
y = "Proporção dentro da linha",
color = "Perfil"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##
## Pearson's Chi-squared test
##
## data: Nij
## X-squared = 2.8517, df = 10, p-value = 0.9847
observado <- teste$observed
esperado <- teste$expected
# Calcular G² = -2 * ln razão de verossimilhança
G2 <- 2 * sum(observado * log(observado / esperado), na.rm = TRUE)
G2 # Estatística
## [1] 2.892284
## [1] 0.9838465
##
## Principal inertias (eigenvalues):
##
## dim value % cum% scree plot
## 1 0.004680 83.7 83.7 *********************
## 2 0.000912 16.3 100.0 ****
## -------- -----
## Total: 0.005592 100.0
##
##
## Rows:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | A | 67 1000 272 | -149 975 317 | 24 25 42 |
## 2 | B | 145 1000 148 | -64 717 126 | 40 283 256 |
## 3 | C | 106 1000 190 | 85 716 163 | 53 284 332 |
## 4 | D | 139 1000 105 | -59 838 105 | -26 162 104 |
## 5 | E | 202 1000 240 | 81 999 287 | -2 1 1 |
## 6 | F | 341 1000 45 | 6 48 3 | -27 952 264 |
##
## Columns:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 0100 | 420 1000 340 | -64 896 363 | -22 104 217 |
## 2 | 1002 | 304 1000 537 | 98 980 629 | -14 20 67 |
## 3 | 250 | 276 1000 123 | -12 53 8 | 49 947 716 |