1. Ativando pacotes, recodificando variáveis (tabagismo e cidade) e definindo como fator:

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggpubr)
library(table1)

## 
## Anexando pacote: 'table1'
## 
## Os seguintes objetos são mascarados por 'package:base':
## 
##     units, units<-

library(nortest)
library(patchwork)
library(readxl)

df=read_excel("df.xlsx")

df$cidade <- factor(df$cidade,
  levels = c(0,1,2,3),
  labels = c("Porto Alegre","Canoas","Guaiba","Gravatai")
)


df$tabagismo <- factor(df$tabagismo,
  levels = c(0,1,2),
  labels = c("Nao fumante","Ex-fumante","Fumante")
)

Tabela demográfica:

table1(~idade+altura+peso+tabagismo | cidade, df)

	Porto Alegre (N=75)	Canoas (N=75)	Guaiba (N=75)	Gravatai (N=75)	Overall (N=300)
idade
Mean (SD)	34.0 (11.0)	35.6 (11.2)	36.1 (11.6)	29.8 (10.3)	33.9 (11.3)
Median [Min, Max]	35.5 [18.0, 53.8]	35.7 [18.6, 62.9]	34.8 [18.3, 55.9]	26.1 [18.1, 54.2]	33.0 [18.0, 62.9]
altura
Mean (SD)	173 (7.78)	175 (6.78)	172 (7.86)	175 (8.13)	174 (7.69)
Median [Min, Max]	174 [157, 192]	175 [162, 196]	172 [157, 197]	174 [150, 197]	174 [150, 197]
peso
Mean (SD)	82.0 (15.4)	82.8 (14.2)	80.8 (15.6)	81.1 (13.5)	81.7 (14.6)
Median [Min, Max]	82.0 [54.0, 125]	82.0 [50.0, 132]	80.0 [58.0, 140]	80.0 [54.0, 122]	80.0 [50.0, 140]
tabagismo
Nao fumante	46 (61.3%)	44 (58.7%)	52 (69.3%)	53 (70.7%)	195 (65.0%)
Ex-fumante	14 (18.7%)	13 (17.3%)	17 (22.7%)	5 (6.7%)	49 (16.3%)
Fumante	15 (20.0%)	18 (24.0%)	6 (8.0%)	17 (22.7%)	56 (18.7%)

Tabela de Função Pulmonar:

table1(~fvcz+fev1z+fev1fvcz+fef2575z | cidade, df)

	Porto Alegre (N=75)	Canoas (N=75)	Guaiba (N=75)	Gravatai (N=75)	Overall (N=300)
fvcz
Mean (SD)	-0.980 (1.02)	-0.577 (0.830)	-0.921 (0.920)	-1.24 (1.02)	-0.930 (0.974)
Median [Min, Max]	-0.975 [-4.11, 1.61]	-0.537 [-2.84, 1.26]	-0.872 [-3.82, 1.33]	-1.06 [-3.61, 0.700]	-0.883 [-4.11, 1.61]
fev1z
Mean (SD)	-0.760 (1.05)	-0.493 (0.848)	-0.700 (0.847)	-1.16 (1.02)	-0.779 (0.972)
Median [Min, Max]	-0.772 [-3.20, 1.34]	-0.467 [-2.73, 1.37]	-0.589 [-3.39, 1.57]	-0.980 [-4.13, 1.23]	-0.724 [-4.13, 1.57]
fev1fvcz
Mean (SD)	0.423 (1.18)	0.192 (1.13)	0.444 (1.03)	0.158 (1.30)	0.304 (1.16)
Median [Min, Max]	0.348 [-2.48, 3.55]	0.173 [-2.57, 2.46]	0.470 [-2.51, 3.61]	0.176 [-3.65, 3.74]	0.319 [-3.65, 3.74]
fef2575z
Mean (SD)	-0.184 (0.954)	-0.137 (0.899)	-0.0772 (0.782)	-0.491 (1.09)	-0.222 (0.946)
Median [Min, Max]	-0.126 [-2.36, 1.93]	-0.178 [-2.57, 1.59]	-0.0775 [-2.53, 1.42]	-0.440 [-4.38, 1.68]	-0.175 [-4.38, 1.93]

Teste de normalidade:

lillie.test(df$fvcz)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  df$fvcz
## D = 0.058471, p-value = 0.01504

lillie.test(df$fev1z)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  df$fev1z
## D = 0.050336, p-value = 0.06404

lillie.test(df$fev1fvcz)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  df$fev1fvcz
## D = 0.050541, p-value = 0.06195

lillie.test(df$fef2575z)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  df$fef2575z
## D = 0.056845, p-value = 0.02054

Testando diferenças entre Função Pulmonar e as cidades:

kruskal.test(fvcz ~ cidade, data = df)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  fvcz by cidade
## Kruskal-Wallis chi-squared = 15.091, df = 3, p-value = 0.00174

kruskal.test(fev1z ~ cidade, data = df)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  fev1z by cidade
## Kruskal-Wallis chi-squared = 17.793, df = 3, p-value = 0.0004852

kruskal.test(fev1fvcz ~ cidade, data = df)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  fev1fvcz by cidade
## Kruskal-Wallis chi-squared = 4.733, df = 3, p-value = 0.1924

kruskal.test(fef2575z ~ cidade, data = df)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  fef2575z by cidade
## Kruskal-Wallis chi-squared = 7.9169, df = 3, p-value = 0.04776

**Análise post-roc entre Função Pulmonar e cidade:**

pairwise.wilcox.test(df$fvcz, df$cidade)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$fvcz and df$cidade 
## 
##          Porto Alegre Canoas Guaiba
## Canoas   0.0450       -      -     
## Guaiba   0.5011       0.1360 -     
## Gravatai 0.4297       0.0013 0.2300
## 
## P value adjustment method: holm

pairwise.wilcox.test(df$fev1z, df$cidade)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$fev1z and df$cidade 
## 
##          Porto Alegre Canoas  Guaiba 
## Canoas   0.22813      -       -      
## Guaiba   0.50107      0.32408 -      
## Gravatai 0.14384      0.00026 0.01529
## 
## P value adjustment method: holm

pairwise.wilcox.test(df$fev1fvcz, df$cidade)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$fev1fvcz and df$cidade 
## 
##          Porto Alegre Canoas Guaiba
## Canoas   0.70         -      -     
## Guaiba   1.00         0.70   -     
## Gravatai 0.70         1.00   0.41  
## 
## P value adjustment method: holm

pairwise.wilcox.test(df$fef2575z, df$cidade)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$fef2575z and df$cidade 
## 
##          Porto Alegre Canoas Guaiba
## Canoas   1.000        -      -     
## Guaiba   1.000        1.000  -     
## Gravatai 0.285        0.208  0.035 
## 
## P value adjustment method: holm

Gráfico entre Função Pulmonar e cidade:

g1 <- ggplot(df, aes(x = cidade, y = fvcz, fill = cidade)) +
  geom_boxplot() +
  stat_compare_means( method = "kruskal.test",
    size = 2,
    label.x = 2,
    label.y = max(df$fvcz)*4) +
  scale_fill_manual(values = c("#F2A7B8", "#E5989B", "#B56576", "#6D6875")) +
  labs(title = "FVC por Município",
       x = "Município",
       y = "FVC") +
  theme_minimal() +
  theme( plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8))


g2 <- ggplot(df, aes(x = cidade, y = fev1z, fill = cidade)) + 
  geom_boxplot() +
  stat_compare_means( method = "kruskal.test",
    size = 2,
    label.x = 2,
    label.y = max(df$fvcz)*4) +
  scale_fill_manual(values = c("#F2A7B8", "#E5989B", "#B56576", "#6D6875")) +
  labs(title = "FEV1 por Município",
       x = "Município",
       y = "FEV1") +
  theme_minimal() +
  theme( plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8))


g3 <- ggplot(df, aes(x = cidade, y = fev1fvcz, fill = cidade)) + 
  geom_boxplot() +
  stat_compare_means( method = "kruskal.test",
    size = 2,
    label.x = 2,
    label.y = max(df$fvcz)*4) +
  scale_fill_manual(values = c("#F2A7B8", "#E5989B", "#B56576", "#6D6875")) +
  labs(title = "FEV1FVC por Município",
       x = "Município",
       y = "FEV1FVC") +
  theme_minimal() +
  theme( plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8))


g4 <- ggplot(df, aes(x = cidade, y = fef2575z, fill = cidade)) + 
  geom_boxplot() +
  stat_compare_means( method = "kruskal.test",
    size = 2,
    label.x = 2,
    label.y = max(df$fvcz)*4) +
 scale_fill_manual(values = c("#F2A7B8", "#E5989B", "#B56576", "#6D6875")) +
  labs(title = "FEF2575 por Município",
       x = "Município",
       y = "FEF2575") +
  theme_minimal() +
  theme( plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8))


(g1 + g2) / (g3 + g4)

Disciplina Introdução à ciência de dados

Amanda Paz

2026-04-25

1. Ativando pacotes, recodificando variáveis (tabagismo e cidade) e definindo como fator:

Tabela demográfica:

Tabela de Função Pulmonar:

Teste de normalidade:

Testando diferenças entre Função Pulmonar e as cidades:

**Análise post-roc entre Função Pulmonar e cidade:**

Gráfico entre Função Pulmonar e cidade: