gravida_2017 <- read_csv("../data/gravida_2017.csv")
## Parsed with column specification:
## cols(
## ident = col_double(),
## pesomae = col_double(),
## altura = col_double(),
## idade = col_double(),
## fumo = col_double(),
## cigarros = col_double(),
## filhos = col_double(),
## escola = col_double(),
## pesorn = col_double()
## )
# Atribui o banco a variável df
df <- gravida_2017
df_nome <- names(gravida_2017)
# Conversão da variáveis categóricas
df$fumo <- factor(df$fumo, labels = c("Nunca Fumou", "Ex-fumante", "Fumante"))
df$filhos <- factor(df$filhos)
df <- df %>%
mutate(imc = pesomae / (altura/100)^2) %>%
mutate(macrossomia = if_else(pesorn >= 4000, "macrossômico", "não macrossômico"), macrossomia = factor(macrossomia)) %>%
mutate(obesidade_mae = if_else(imc >= 30, TRUE, FALSE)) %>%
mutate(cig10 = if_else(cigarros >= 10, TRUE, FALSE))
df %>%
select(imc, pesorn, macrossomia, obesidade_mae) %>%
Desc()
## -------------------------------------------------------------------------
## Describe . (spec_tbl_df, tbl_df, tbl, data.frame):
##
## data.frame: 1527 obs. of 4 variables
##
## Nr ColName Class NAs Levels
## 1 imc numeric 5 (0.3%)
## 2 pesorn numeric 2 (0.1%)
## 3 macrossomia factor 2 (0.1%) (2): 1-macrossômico, 2-não
## macrossômico
## 4 obesidade_mae logical 5 (0.3%)
##
##
## -------------------------------------------------------------------------
## 1 - imc (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'522 5 722 0 26.13576 25.91904
## 99.7% 0.3% 0.0% 26.35249
##
## .05 .10 .25 median .75 .90 .95
## 20.66116 21.49029 23.11450 25.53670 28.30600 31.24435 34.22140
##
## range sd vcoef mad IQR skew kurt
## 32.69704 4.31048 0.16493 3.77834 5.19149 1.30744 3.20284
##
## lowest : 17.06556, 17.6693, 17.74584, 17.91509, 18.25632
## highest: 46.06009, 47.20255, 48.07692, 48.89875, 49.7626
## -------------------------------------------------------------------------
## 2 - pesorn (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'525 2 287 0 3'230.70 3'200.82
## 99.9% 0.1% 0.0% 3'260.58
##
## .05 .10 .25 median .75 .90 .95
## 2'300.00 2'560.00 2'920.00 3'250.00 3'600.00 3'900.00 4'088.00
##
## range sd vcoef mad IQR skew kurt
## 9'349.00 594.93 0.18 518.91 680.00 0.40 11.92
##
## lowest : 650.0, 740.0, 790.0, 999.0, 1'020.0
## highest: 4'650.0 (3), 4'750.0 (2), 4'840.0, 4'870.0, 9'999.0
## -------------------------------------------------------------------------
## 3 - macrossomia (factor - dichotomous)
##
## length n NAs unique
## 1'527 1'525 2 2
## 99.9% 0.1%
##
## freq perc lci.95 uci.95'
## macrossômico 118 7.7% 6.5% 9.2%
## não macrossômico 1'407 92.3% 90.8% 93.5%
##
## ' 95%-CI Wilson
## -------------------------------------------------------------------------
## 4 - obesidade_mae (logical - dichotomous)
##
## length n NAs unique
## 1'527 1'522 5 2
## 99.7% 0.3%
##
## freq perc lci.95 uci.95'
## FALSE 1'301 85.5% 83.6% 87.2%
## TRUE 221 14.5% 12.8% 16.4%
##
## ' 95%-CI Wilson
# Tabela de Associação entre fumo (fumo e cig 10) e macrossomia
ftable(xtabs(~macrossomia + fumo + cig10, data = df))
## cig10 FALSE TRUE
## macrossomia fumo
## macrossômico Nunca Fumou 74 0
## Ex-fumante 30 0
## Fumante 8 6
## não macrossômico Nunca Fumou 828 0
## Ex-fumante 333 0
## Fumante 120 126
# Peso da mãe
Freq(df$pesomae)
## level freq perc cumfreq cumperc
## 1 [30,40] 4 0.3% 4 0.3%
## 2 (40,50] 171 11.2% 175 11.5%
## 3 (50,60] 535 35.1% 710 46.6%
## 4 (60,70] 468 30.7% 1'178 77.3%
## 5 (70,80] 230 15.1% 1'408 92.4%
## 6 (80,90] 75 4.9% 1'483 97.4%
## 7 (90,100] 24 1.6% 1'507 98.9%
## 8 (100,110] 11 0.7% 1'518 99.7%
## 9 (110,120] 4 0.3% 1'522 99.9%
## 10 (120,130] 1 0.1% 1'523 100.0%
Desc(df$pesomae)
## -------------------------------------------------------------------------
## df$pesomae (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'523 4 72 0 63.24 62.65
## 99.7% 0.3% 0.0% 63.83
##
## .05 .10 .25 median .75 .90 .95
## 47.10 50.00 55.00 61.00 70.00 78.00 84.00
##
## range sd vcoef mad IQR skew kurt
## 91.00 11.78 0.19 10.38 15.00 1.07 2.24
##
## lowest : 39.0 (2), 40.0 (2), 41.0 (4), 42.0 (4), 43.0 (11)
## highest: 109.0, 112.0, 117.0, 119.0 (2), 130.0
# Altura da mãe
Freq(df$altura)
## level freq perc cumfreq cumperc
## 1 [135,140] 13 0.9% 13 0.9%
## 2 (140,145] 69 4.5% 82 5.4%
## 3 (145,150] 249 16.3% 331 21.7%
## 4 (150,155] 441 28.9% 772 50.7%
## 5 (155,160] 451 29.6% 1'223 80.2%
## 6 (160,165] 206 13.5% 1'429 93.8%
## 7 (165,170] 73 4.8% 1'502 98.6%
## 8 (170,175] 21 1.4% 1'523 99.9%
## 9 (175,180] 1 0.1% 1'524 100.0%
Desc(df$altura)
## -------------------------------------------------------------------------
## df$altura (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'524 3 39 0 155.38 155.06
## 99.8% 0.2% 0.0% 155.70
##
## .05 .10 .25 median .75 .90 .95
## 145.00 147.00 151.00 155.00 160.00 164.00 166.00
##
## range sd vcoef mad IQR skew kurt
## 44.00 6.39 0.04 5.93 9.00 0.12 0.09
##
## lowest : 136.0, 138.0 (5), 139.0 (3), 140.0 (4), 141.0 (5)
## highest: 171.0 (8), 172.0 (7), 173.0 (3), 174.0 (3), 180.0
# Idade da mãe
Freq(df$idade)
## level freq perc cumfreq cumperc
## 1 [15,20] 295 19.3% 295 19.3%
## 2 (20,25] 486 31.8% 781 51.1%
## 3 (25,30] 408 26.7% 1'189 77.9%
## 4 (30,35] 249 16.3% 1'438 94.2%
## 5 (35,40] 83 5.4% 1'521 99.6%
## 6 (40,45] 0 0.0% 1'521 99.6%
## 7 (45,50] 0 0.0% 1'521 99.6%
## 8 (50,55] 1 0.1% 1'522 99.7%
## 9 (55,60] 0 0.0% 1'522 99.7%
## 10 (60,65] 0 0.0% 1'522 99.7%
## 11 (65,70] 1 0.1% 1'523 99.7%
## 12 (70,75] 0 0.0% 1'523 99.7%
## 13 (75,80] 0 0.0% 1'523 99.7%
## 14 (80,85] 0 0.0% 1'523 99.7%
## 15 (85,90] 0 0.0% 1'523 99.7%
## 16 (90,95] 0 0.0% 1'523 99.7%
## 17 (95,100] 4 0.3% 1'527 100.0%
Desc(df$idade)
## -------------------------------------------------------------------------
## df$idade (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'527 0 26 0 26.16 25.82
## 100.0% 0.0% 0.0% 26.49
##
## .05 .10 .25 median .75 .90 .95
## 18.00 19.00 21.00 25.00 30.00 34.00 36.00
##
## range sd vcoef mad IQR skew kurt
## 81.00 6.69 0.26 5.93 9.00 3.74 35.83
##
## lowest : 18.0 (78), 19.0 (100), 20.0 (117), 21.0 (101), 22.0 (108)
## highest: 39.0 (10), 40.0 (9), 51.0, 66.0, 99.0 (4)
df$idade <- ifelse(df$idade > 50, NA, df$idade)
# Fumo
Desc(df$fumo)
## -------------------------------------------------------------------------
## df$fumo (factor)
##
## length n NAs unique levels dupes
## 1'527 1'527 0 3 3 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Nunca Fumou 904 59.2% 904 59.2%
## 2 Ex-fumante 363 23.8% 1'267 83.0%
## 3 Fumante 260 17.0% 1'527 100.0%
# Número de cigarros
Freq(df$cigarros)
## level freq perc cumfreq cumperc
## 1 [0,5] 1'360 89.1% 1'360 89.1%
## 2 (5,10] 80 5.2% 1'440 94.3%
## 3 (10,15] 30 2.0% 1'470 96.3%
## 4 (15,20] 49 3.2% 1'519 99.5%
## 5 (20,25] 2 0.1% 1'521 99.6%
## 6 (25,30] 4 0.3% 1'525 99.9%
## 7 (30,35] 1 0.1% 1'526 99.9%
## 8 (35,40] 1 0.1% 1'527 100.0%
Desc(df$cigarros)
## -------------------------------------------------------------------------
## df$cigarros (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'527 0 26 1'269 1.66 1.43
## 100.0% 0.0% 83.1% 1.89
##
## .05 .10 .25 median .75 .90 .95
## 0.00 0.00 0.00 0.00 0.00 7.00 12.00
##
## range sd vcoef mad IQR skew kurt
## 40.00 4.66 2.80 0.00 0.00 3.35 12.28
##
## lowest : 0.0 (1'269), 1.0 (19), 2.0 (17), 3.0 (17), 4.0 (15)
## highest: 21.0 (2), 28.0 (2), 30.0 (2), 32.0, 40.0
# Número de filhos
Freq(df$filhos)
## level freq perc cumfreq cumperc
## 1 0 571 37.4% 571 37.4%
## 2 1 454 29.7% 1'025 67.1%
## 3 2 255 16.7% 1'280 83.8%
## 4 3 119 7.8% 1'399 91.6%
## 5 4 59 3.9% 1'458 95.5%
## 6 5 28 1.8% 1'486 97.3%
## 7 6 20 1.3% 1'506 98.6%
## 8 7 8 0.5% 1'514 99.1%
## 9 8 7 0.5% 1'521 99.6%
## 10 9 2 0.1% 1'523 99.7%
## 11 10 1 0.1% 1'524 99.8%
## 12 11 2 0.1% 1'526 99.9%
## 13 15 1 0.1% 1'527 100.0%
Desc(df$filhos)
## -------------------------------------------------------------------------
## df$filhos (factor)
##
## length n NAs unique levels dupes
## 1'527 1'527 0 13 13 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 0 571 37.4% 571 37.4%
## 2 1 454 29.7% 1'025 67.1%
## 3 2 255 16.7% 1'280 83.8%
## 4 3 119 7.8% 1'399 91.6%
## 5 4 59 3.9% 1'458 95.5%
## 6 5 28 1.8% 1'486 97.3%
## 7 6 20 1.3% 1'506 98.6%
## 8 7 8 0.5% 1'514 99.1%
## 9 8 7 0.5% 1'521 99.6%
## 10 9 2 0.1% 1'523 99.7%
## 11 11 2 0.1% 1'525 99.9%
## 12 10 1 0.1% 1'526 99.9%
## ... etc.
## [list output truncated]
# Escolaridade em anos de estudo com aprovação
Freq(df$escola)
## level freq perc cumfreq cumperc
## 1 [0,1] 76 5.0% 76 5.0%
## 2 (1,2] 54 3.5% 130 8.5%
## 3 (2,3] 76 5.0% 206 13.5%
## 4 (3,4] 118 7.7% 324 21.2%
## 5 (4,5] 158 10.3% 482 31.6%
## 6 (5,6] 105 6.9% 587 38.4%
## 7 (6,7] 116 7.6% 703 46.0%
## 8 (7,8] 190 12.4% 893 58.5%
## 9 (8,9] 55 3.6% 948 62.1%
## 10 (9,10] 73 4.8% 1'021 66.9%
## 11 (10,11] 375 24.6% 1'396 91.4%
## 12 (11,12] 8 0.5% 1'404 91.9%
## 13 (12,13] 18 1.2% 1'422 93.1%
## 14 (13,14] 31 2.0% 1'453 95.2%
## 15 (14,15] 62 4.1% 1'515 99.2%
## 16 (15,16] 12 0.8% 1'527 100.0%
Desc(df$escola)
## -------------------------------------------------------------------------
## df$escola (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'527 0 17 38 7.75 7.56
## 100.0% 0.0% 2.5% 7.94
##
## .05 .10 .25 median .75 .90 .95
## 2.00 3.00 5.00 8.00 11.00 11.00 14.00
##
## range sd vcoef mad IQR skew kurt
## 16.00 3.72 0.48 4.45 6.00 -0.04 -0.67
##
## lowest : 0.0 (38), 1.0 (38), 2.0 (54), 3.0 (76), 4.0 (118)
## highest: 12.0 (8), 13.0 (18), 14.0 (31), 15.0 (62), 16.0 (12)
# Peso do recém-nascido em gramas
Freq(df$pesorn)
## level freq perc cumfreq cumperc
## 1 [0,1e+03] 4 0.3% 4 0.3%
## 2 (1e+03,2e+03] 40 2.6% 44 2.9%
## 3 (2e+03,3e+03] 430 28.2% 474 31.1%
## 4 (3e+03,4e+03] 952 62.4% 1'426 93.5%
## 5 (4e+03,5e+03] 98 6.4% 1'524 99.9%
## 6 (5e+03,6e+03] 0 0.0% 1'524 99.9%
## 7 (6e+03,7e+03] 0 0.0% 1'524 99.9%
## 8 (7e+03,8e+03] 0 0.0% 1'524 99.9%
## 9 (8e+03,9e+03] 0 0.0% 1'524 99.9%
## 10 (9e+03,1e+04] 1 0.1% 1'525 100.0%
Desc(df$pesorn)
## -------------------------------------------------------------------------
## df$pesorn (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'525 2 287 0 3'230.70 3'200.82
## 99.9% 0.1% 0.0% 3'260.58
##
## .05 .10 .25 median .75 .90 .95
## 2'300.00 2'560.00 2'920.00 3'250.00 3'600.00 3'900.00 4'088.00
##
## range sd vcoef mad IQR skew kurt
## 9'349.00 594.93 0.18 518.91 680.00 0.40 11.92
##
## lowest : 650.0, 740.0, 790.0, 999.0, 1'020.0
## highest: 4'650.0 (3), 4'750.0 (2), 4'840.0, 4'870.0, 9'999.0
# Identificado RN com peso 99999, corrigido abaixo
df$pesorn <- ifelse(df$pesorn > 5000, NA, df$pesorn)
# Análise exploratória do IMC
Freq(df$imc)
## level freq perc cumfreq cumperc
## 1 [16,18] 4 0.3% 4 0.3%
## 2 (18,20] 44 2.9% 48 3.2%
## 3 (20,22] 169 11.1% 217 14.3%
## 4 (22,24] 284 18.7% 501 32.9%
## 5 (24,26] 350 23.0% 851 55.9%
## 6 (26,28] 259 17.0% 1'110 72.9%
## 7 (28,30] 191 12.5% 1'301 85.5%
## 8 (30,32] 90 5.9% 1'391 91.4%
## 9 (32,34] 48 3.2% 1'439 94.5%
## 10 (34,36] 38 2.5% 1'477 97.0%
## 11 (36,38] 20 1.3% 1'497 98.4%
## 12 (38,40] 8 0.5% 1'505 98.9%
## 13 (40,42] 5 0.3% 1'510 99.2%
## 14 (42,44] 4 0.3% 1'514 99.5%
## 15 (44,46] 2 0.1% 1'516 99.6%
## 16 (46,48] 3 0.2% 1'519 99.8%
## 17 (48,50] 3 0.2% 1'522 100.0%
Desc(df$imc)
## -------------------------------------------------------------------------
## df$imc (numeric)
##
## length n NAs unique 0s mean meanCI
## 1'527 1'522 5 722 0 26.13576 25.91904
## 99.7% 0.3% 0.0% 26.35249
##
## .05 .10 .25 median .75 .90 .95
## 20.66116 21.49029 23.11450 25.53670 28.30600 31.24435 34.22140
##
## range sd vcoef mad IQR skew kurt
## 32.69704 4.31048 0.16493 3.77834 5.19149 1.30744 3.20284
##
## lowest : 17.06556, 17.6693, 17.74584, 17.91509, 18.25632
## highest: 46.06009, 47.20255, 48.07692, 48.89875, 49.7626
table(df$macrossomia, df$obesidade_mae)
##
## FALSE TRUE
## macrossômico 89 28
## não macrossômico 1211 192
prop.table(table(df$macrossomia, df$obesidade_mae))
##
## FALSE TRUE
## macrossômico 0.05855263 0.01842105
## não macrossômico 0.79671053 0.12631579
table(df$macrossomia, df$fumo)
##
## Nunca Fumou Ex-fumante Fumante
## macrossômico 74 30 14
## não macrossômico 828 333 246
prop.table(table(df$macrossomia, df$fumo))
##
## Nunca Fumou Ex-fumante Fumante
## macrossômico 0.048524590 0.019672131 0.009180328
## não macrossômico 0.542950820 0.218360656 0.161311475
ftable(xtabs(~ fumo + obesidade_mae + macrossomia, data = df))
## macrossomia macrossômico não macrossômico
## fumo obesidade_mae
## Nunca Fumou FALSE 56 719
## TRUE 17 107
## Ex-fumante FALSE 22 275
## TRUE 8 57
## Fumante FALSE 11 217
## TRUE 3 28
CrossTable(df$fumo, df$obesidade_mae, chisq = TRUE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1522
##
##
## | df$obesidade_mae
## df$fumo | FALSE | TRUE | Row Total |
## -------------|-----------|-----------|-----------|
## Nunca Fumou | 776 | 125 | 901 |
## | 0.044 | 0.260 | |
## | 0.861 | 0.139 | 0.592 |
## | 0.596 | 0.566 | |
## | 0.510 | 0.082 | |
## -------------|-----------|-----------|-----------|
## Ex-fumante | 297 | 65 | 362 |
## | 0.500 | 2.942 | |
## | 0.820 | 0.180 | 0.238 |
## | 0.228 | 0.294 | |
## | 0.195 | 0.043 | |
## -------------|-----------|-----------|-----------|
## Fumante | 228 | 31 | 259 |
## | 0.197 | 1.161 | |
## | 0.880 | 0.120 | 0.170 |
## | 0.175 | 0.140 | |
## | 0.150 | 0.020 | |
## -------------|-----------|-----------|-----------|
## Column Total | 1301 | 221 | 1522 |
## | 0.855 | 0.145 | |
## -------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 5.104148 d.f. = 2 p = 0.07791988
##
##
##
mantelhaen.test(xtabs(~ df$fumo + df$obesidade_mae + df$macrossomia))
##
## Cochran-Mantel-Haenszel test
##
## data: xtabs(~df$fumo + df$obesidade_mae + df$macrossomia)
## Cochran-Mantel-Haenszel M^2 = 4.9092, df = 2, p-value = 0.0859