library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(table1)
##
## Anexando pacote: 'table1'
##
## Os seguintes objetos são mascarados por 'package:base':
##
## units, units<-
library(nortest)
library(patchwork)
setwd("C:/Users/lucca/Documents/Ciencia de Dados/Prova Final")
library(readxl)
df <- read_excel("df.xlsx")
df <- df %>%
mutate(cidade = recode(cidade,
"0" = "Porto Alegre",
"1" = "Canoas",
"2" = "Guaíba",
"3" = "Gravataí"))
df$cidade <- as.factor(df$cidade)
df <- df %>%
mutate(tabagismo = recode(tabagismo,
"0" = "Não Fumante",
"1" = "Ex-Fumante",
"2" = "Fumante"))
df$tabagismo <- as.factor(df$tabagismo)
library(table1)
table1(~ idade + altura + imc + tabagismo | cidade, data = df)
| Canoas (N=75) |
Gravataí (N=75) |
Guaíba (N=75) |
Porto Alegre (N=75) |
Overall (N=300) |
|
|---|---|---|---|---|---|
| idade | |||||
| Mean (SD) | 35.6 (11.2) | 29.8 (10.3) | 36.1 (11.6) | 34.0 (11.0) | 33.9 (11.3) |
| Median [Min, Max] | 35.7 [18.6, 62.9] | 26.1 [18.1, 54.2] | 34.8 [18.3, 55.9] | 35.5 [18.0, 53.8] | 33.0 [18.0, 62.9] |
| altura | |||||
| Mean (SD) | 175 (6.78) | 175 (8.13) | 172 (7.86) | 173 (7.78) | 174 (7.69) |
| Median [Min, Max] | 175 [162, 196] | 174 [150, 197] | 172 [157, 197] | 174 [157, 192] | 174 [150, 197] |
| imc | |||||
| Mean (SD) | 27.0 (3.96) | 26.6 (4.32) | 27.2 (4.37) | 26.9 (5.70) | 26.9 (4.62) |
| Median [Min, Max] | 26.6 [17.7, 39.0] | 25.8 [18.6, 41.7] | 26.1 [19.6, 40.9] | 27.0 [0.00257, 38.7] | 26.4 [0.00257, 41.7] |
| tabagismo | |||||
| Ex-Fumante | 13 (17.3%) | 5 (6.7%) | 17 (22.7%) | 14 (18.7%) | 49 (16.3%) |
| Fumante | 18 (24.0%) | 17 (22.7%) | 6 (8.0%) | 15 (20.0%) | 56 (18.7%) |
| Não Fumante | 44 (58.7%) | 53 (70.7%) | 52 (69.3%) | 46 (61.3%) | 195 (65.0%) |
library(table1)
table1(~ fev1z + fvcz + fev1fvcz + fef2575z | cidade, data = df)
| Canoas (N=75) |
Gravataí (N=75) |
Guaíba (N=75) |
Porto Alegre (N=75) |
Overall (N=300) |
|
|---|---|---|---|---|---|
| fev1z | |||||
| Mean (SD) | -0.493 (0.848) | -1.16 (1.02) | -0.700 (0.847) | -0.760 (1.05) | -0.779 (0.972) |
| Median [Min, Max] | -0.467 [-2.73, 1.37] | -0.980 [-4.13, 1.23] | -0.589 [-3.39, 1.57] | -0.772 [-3.20, 1.34] | -0.724 [-4.13, 1.57] |
| fvcz | |||||
| Mean (SD) | -0.577 (0.830) | -1.24 (1.02) | -0.921 (0.920) | -0.980 (1.02) | -0.930 (0.974) |
| Median [Min, Max] | -0.537 [-2.84, 1.26] | -1.06 [-3.61, 0.700] | -0.872 [-3.82, 1.33] | -0.975 [-4.11, 1.61] | -0.883 [-4.11, 1.61] |
| fev1fvcz | |||||
| Mean (SD) | 0.192 (1.13) | 0.158 (1.30) | 0.444 (1.03) | 0.423 (1.18) | 0.304 (1.16) |
| Median [Min, Max] | 0.173 [-2.57, 2.46] | 0.176 [-3.65, 3.74] | 0.470 [-2.51, 3.61] | 0.348 [-2.48, 3.55] | 0.319 [-3.65, 3.74] |
| fef2575z | |||||
| Mean (SD) | -0.137 (0.899) | -0.491 (1.09) | -0.0772 (0.782) | -0.184 (0.954) | -0.222 (0.946) |
| Median [Min, Max] | -0.178 [-2.57, 1.59] | -0.440 [-4.38, 1.68] | -0.0775 [-2.53, 1.42] | -0.126 [-2.36, 1.93] | -0.175 [-4.38, 1.93] |
library(nortest)
lillie.test(df$fvcz)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: df$fvcz
## D = 0.058471, p-value = 0.01504
lillie.test(df$fev1z)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: df$fev1z
## D = 0.050336, p-value = 0.06404
lillie.test(df$fev1fvcz)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: df$fev1fvcz
## D = 0.050541, p-value = 0.06195
lillie.test(df$fef2575z)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: df$fef2575z
## D = 0.056845, p-value = 0.02054
kruskal.test(fvcz ~ cidade, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: fvcz by cidade
## Kruskal-Wallis chi-squared = 15.091, df = 3, p-value = 0.00174
kruskal.test(fev1z ~ cidade, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: fev1z by cidade
## Kruskal-Wallis chi-squared = 17.793, df = 3, p-value = 0.0004852
kruskal.test(fev1fvcz ~ cidade, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: fev1fvcz by cidade
## Kruskal-Wallis chi-squared = 4.733, df = 3, p-value = 0.1924
kruskal.test(fef2575z ~ cidade, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: fef2575z by cidade
## Kruskal-Wallis chi-squared = 7.9169, df = 3, p-value = 0.04776
pairwise.wilcox.test(df$fvcz, df$cidade, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$fvcz and df$cidade
##
## Canoas Gravataí Guaíba
## Gravataí 0.0013 - -
## Guaíba 0.2041 0.4600 -
## Porto Alegre 0.0540 1.0000 1.0000
##
## P value adjustment method: bonferroni
pairwise.wilcox.test(df$fev1z, df$cidade, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$fev1z and df$cidade
##
## Canoas Gravataí Guaíba
## Gravataí 0.00026 - -
## Guaíba 0.97223 0.01835 -
## Porto Alegre 0.45626 0.21576 1.00000
##
## P value adjustment method: bonferroni
pairwise.wilcox.test(df$fev1fvcz, df$cidade, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$fev1fvcz and df$cidade
##
## Canoas Gravataí Guaíba
## Gravataí 1.00 - -
## Guaíba 0.84 0.41 -
## Porto Alegre 1.00 0.85 1.00
##
## P value adjustment method: bonferroni
pairwise.wilcox.test(df$fef2575z, df$cidade, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$fef2575z and df$cidade
##
## Canoas Gravataí Guaíba
## Gravataí 0.250 - -
## Guaíba 1.000 0.035 -
## Porto Alegre 1.000 0.427 1.000
##
## P value adjustment method: bonferroni
# FVcz
teste1 <- kruskal.test(fvcz ~ cidade, data = df)
g1 <- ggplot(df, aes(x = cidade, y = fvcz)) +
geom_boxplot(fill = "gray") +
theme_classic() +
labs(x = "Municipio", y = "FVCz") +
annotate("text", x = 1.5, y = max(df$fvcz, na.rm = TRUE),
label = paste0("Kruskal-Wallis, p = ", round(teste1$p.value, 3)),
hjust = 0)
# FEV1z
teste2 <- kruskal.test(fev1z ~ cidade, data = df)
g2 <- ggplot(df, aes(x = cidade, y = fev1z)) +
geom_boxplot(fill = "gray") +
theme_classic() +
labs(x = "Municipio", y = "FEV1z") +
annotate("text", x = 1.5, y = max(df$fev1z, na.rm = TRUE),
label = paste0("Kruskal-Wallis, p = ", round(teste2$p.value, 3)),
hjust = 0)
# FEV1/FVCz
teste3 <- kruskal.test(fev1fvcz ~ cidade, data = df)
g3 <- ggplot(df, aes(x = cidade, y = fev1fvcz)) +
geom_boxplot(fill = "gray") +
theme_classic() +
labs(x = "Municipio", y = "FEV1/FVCz") +
annotate("text", x = 1.5, y = max(df$fev1fvcz, na.rm = TRUE),
label = paste0("Kruskal-Wallis, p = ", round(teste3$p.value, 3)),
hjust = 0)
# FEF2575z
teste4 <- kruskal.test(fef2575z ~ cidade, data = df)
g4 <- ggplot(df, aes(x = cidade, y = fef2575z)) +
geom_boxplot(fill = "gray") +
theme_classic() +
labs(x = "Municipio", y = "FEF2575z (z score)") +
annotate("text", x = 1.5, y = max(df$fef2575z, na.rm = TRUE),
label = paste0("Kruskal-Wallis, p = ", round(teste4$p.value, 3)),
hjust = 0)
(g1 + g2) / (g3 + g4) +
plot_annotation(title = "Função pulmonar por município")