Directorio de trabajo:
library(knitr);library(tidyverse);library(dplyr);library(sjmisc);library(stargazer);library(car);library(RcmdrMisc);library(“RcmdrMisc”)
# - Indicaciones: Por favor usted va a registrar sus observaciones con respecto
#a este análisis.
# Es necesario que su trabajo final contenga un resumen de sus observaciones.
#Encabezado:
#Logo Universidad
#Nombre disciplina
#Nombre unidad
#Nombre Estudiante
#Fecha de entrega
#Título: Relación de Presión arterial en función de masa corporal, edad y hábito de fumar.
#Resumen:
#Indicar un resumen en máximo 300 palabras sobre los resultados mostrados a continuación.
#Intrucciones de entrega:
#Subir su PDF (1 página) nombrado "Monitoria1_nombre"
#Subir este PDF en la carpeta del drive
#EPIDEMIOLOGIA AVANZADA/Unidad I/Monitoria/Trabajos
#Fecha máxima de entrega: Viernes 25.01.22 18H00
##Relación de la Presión arterial sistólica en función de ìndice de masa corporal,
# edad y hábito de fumar en pacientes atendidos en consulta externa:
## Número de registro
## PAS Presión arterial sistólica (mm/Hg)
## QUET Indice de Masa corporal (kg/m 2 )
## AGE Edad (años) y
## SMK Hábito de fumar.
## Hospital, São Paulo, enero de 1995.
setwd("~/Dropbox/1.UNL/1 Modelización/Laboratorio Regresión linear/")
pas <- read.csv(file="PAS.csv")
pas$fSMK <- factor(pas$SMK, levels = c(0,1), labels = c("no", "si"))
knitr::kable(summary(pas))
| PAS | QUET | AGE | SMK | fSMK | |
|---|---|---|---|---|---|
| Min. :120.0 | Min. :23.00 | Min. :41.00 | Min. :0.0000 | no:15 | |
| 1st Qu.:134.8 | 1st Qu.:30.00 | 1st Qu.:48.00 | 1st Qu.:0.0000 | si:17 | |
| Median :143.0 | Median :33.50 | Median :53.50 | Median :1.0000 | NA | |
| Mean :144.5 | Mean :33.94 | Mean :53.25 | Mean :0.5312 | NA | |
| 3rd Qu.:152.0 | 3rd Qu.:37.25 | 3rd Qu.:58.25 | 3rd Qu.:1.0000 | NA | |
| Max. :180.0 | Max. :46.00 | Max. :65.00 | Max. :1.0000 | NA |
tapply(pas$PAS, pas$fSMK, mean)
## no si
## 140.8000 147.8235
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## ## Basic descriptive statistics
##
##
## Grouped by: no
##
## var label mean sd md range
## PAS PAS 140.80 12.90 138 44 (120-164)
## QUET QUET 34.33 4.32 34 14 (27-41)
## AGE AGE 54.27 7.63 56 24 (41-65)
## SMK SMK 0.00 0.00 0 0 (0-0)
## fSMK fSMK 1.00 0.00 1 0 (1-1)
##
##
## Grouped by: si
##
## var label mean sd md range
## PAS PAS 147.82 15.21 145 54 (126-180)
## QUET QUET 33.59 5.78 33 23 (23-46)
## AGE AGE 52.35 6.40 51 21 (43-64)
## SMK SMK 1.00 0.00 1 0 (1-1)
## fSMK fSMK 2.00 0.00 2 0 (2-2)
prop.table(table(pas$fSMK))
##
## no si
## 0.46875 0.53125
library(lattice)
par(mfrow= c(2,2))
plot(pas$AGE)
boxplot(pas$AGE, col = "red")
hist(pas$AGE)
qqplot(pas$PAS, pas$AGE)
library(lattice)
par(mfrow= c(2,2))
plot(pas$QUET)
boxplot(pas$QUET, col="green")
hist(pas$QUET, col="pink")
qqplot(pas$PAS, pas$QUET)
library(lattice)
par(mfrow= c(2,2))
plot(pas$QUET[pas$fSMK == "si"])
boxplot(pas$QUET[pas$fSMK == "si"])
hist(pas$QUET[pas$fSMK == "si"])
qqplot(pas$PAS, pas$QUET[pas$fSMK == "si"])
library(lattice)
par(mfrow= c(2,2))
plot(pas$QUET[pas$fSMK == "no"])
boxplot(pas$QUET[pas$fSMK == "no"])
hist(pas$QUET[pas$fSMK == "no"])
qqplot(pas$PAS, pas$QUET[pas$fSMK == "no"])
library(lattice)
par(mfrow= c(2,2))
plot(pas$AGE[pas$fSMK == "si"])
boxplot(pas$AGE[pas$fSMK == "si"])
hist(pas$AGE[pas$fSMK == "si"])
qqplot(pas$PAS, pas$AGE[pas$fSMK == "si"])
library(lattice)
par(mfrow= c(2,2))
plot(pas$AGE[pas$fSMK == "no"])
boxplot(pas$AGE[pas$fSMK == "no"])
hist(pas$AGE[pas$fSMK == "no"])
qqplot(pas$PAS, pas$AGE[pas$fSMK == "no"])
plot(pas$PAS ~ pas$QUET)
- En el gráfico se observa una relación linear
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
scatterplotMatrix(~PAS+QUET+AGE, data=pas)
RegModel <- lm(pas$PAS~pas$QUET)
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$QUET)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.30 -7.09 -1.79 7.00 22.51
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.0366 11.8761 6.150 9.18e-07 ***
## pas$QUET 2.1067 0.3462 6.085 1.10e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.791 on 30 degrees of freedom
## Multiple R-squared: 0.5524, Adjusted R-squared: 0.5375
## F-statistic: 37.03 on 1 and 30 DF, p-value: 1.1e-06
Confint(RegModel)
## Estimate 2.5 % 97.5 %
## (Intercept) 73.036568 48.782334 97.2908
## pas$QUET 2.106657 1.399615 2.8137
RegModel <- lm(pas$PAS~pas$AGE)
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$AGE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.548 -6.990 -2.481 5.765 23.892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.0916 12.8163 4.611 6.98e-05 ***
## pas$AGE 1.6045 0.2387 6.721 1.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.245 on 30 degrees of freedom
## Multiple R-squared: 0.6009, Adjusted R-squared: 0.5876
## F-statistic: 45.18 on 1 and 30 DF, p-value: 1.894e-07
Confint(RegModel)
## Estimate 2.5 % 97.5 %
## (Intercept) 59.09162 32.917327 85.265923
## pas$AGE 1.60450 1.116977 2.092023
RegModel <- lm(pas$PAS~pas$fSMK)
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$fSMK)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.824 -9.056 -2.812 11.200 32.176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 140.800 3.661 38.454 <2e-16 ***
## pas$fSMKsi 7.024 5.023 1.398 0.172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.18 on 30 degrees of freedom
## Multiple R-squared: 0.06117, Adjusted R-squared: 0.02988
## F-statistic: 1.955 on 1 and 30 DF, p-value: 0.1723
Confint(RegModel)
## Estimate 2.5 % 97.5 %
## (Intercept) 140.800000 133.322276 148.27772
## pas$fSMKsi 7.023529 -3.235823 17.28288
AS ~ QET Presión arterial sistólica en función del indice de masa corporal
qqplot(pas$PAS,pas$QUET, dist="norm")
## Warning in plot.window(...): "dist" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dist" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "dist" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "dist" is not a
## graphical parameter
## Warning in box(...): "dist" is not a graphical parameter
## Warning in title(...): "dist" is not a graphical parameter
shapiro.test(pas$PAS)
##
## Shapiro-Wilk normality test
##
## data: pas$PAS
## W = 0.97199, p-value = 0.5561
Independencia de valores: De acuerdo con la información de la recolección de datos, los valores son independientes entre sí, según diferentes pacientes.
linealidad Es posible observar linealidad en la dispersión de los datos
RegModel <- lm(pas$PAS~pas$QUET)
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$QUET)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.30 -7.09 -1.79 7.00 22.51
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.0366 11.8761 6.150 9.18e-07 ***
## pas$QUET 2.1067 0.3462 6.085 1.10e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.791 on 30 degrees of freedom
## Multiple R-squared: 0.5524, Adjusted R-squared: 0.5375
## F-statistic: 37.03 on 1 and 30 DF, p-value: 1.1e-06
Coeficiente Beta0 = 73,036. Es el incremento promedio de Y cuando X=0
Coeficiente B1 = 2.106 Es el incremento promedio de Y para cada valor de X
Incremento medio estimado de PAS al tener un valor QUET de 0 Incremento medio estimado de PAS estimado para cada unidad de QUET
Las pruebas de hipótesis: H0=B1 = 0 (No hay asociación entre X e Y) H1=B1 != 0 (Existe asociación entre X e Y)
Superior a 9,18 f tabla para 0,5%
anova(RegModel)
## Analysis of Variance Table
##
## Response: pas$PAS
## Df Sum Sq Mean Sq F value Pr(>F)
## pas$QUET 1 3549.8 3549.8 37.028 1.1e-06 ***
## Residuals 30 2876.1 95.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(RegModel)
## Analysis of Variance Table
##
## Response: pas$PAS
## Df Sum Sq Mean Sq F value Pr(>F)
## pas$QUET 1 3549.8 3549.8 37.028 1.1e-06 ***
## Residuals 30 2876.1 95.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sqm <- anova(RegModel)[1,2]
sqr <- anova(RegModel)[2,2]
sqt <- sqm + sqr
sqm; sqr; sqt
## [1] 3549.849
## [1] 2876.12
## [1] 6425.969
R2 <- sqm / sqt
R2
## [1] 0.5524224
sqrt(R2) #&& e o coeficiente de regresao
## [1] 0.7432512
Mide la proporción de la variabilidad total explicada por el modelo, es el poder explicativo del modelo. La proporción de la variabilidad total explicada por el modelo es del 55,24%
mean(residuals(RegModel))
## [1] -6.123574e-16
sd(residuals(RegModel))
## [1] 9.632136
respad <- (residuals(RegModel) - mean(residuals(RegModel))) / sd(residuals(RegModel))
respad
## 1 2 3 4 5 6
## 0.30907242 -1.91542155 -0.86615720 -0.30968083 1.23237151 -0.09513103
## 7 8 9 10 11 12
## 1.36249830 1.15486004 2.33700120 1.04412937 1.34035216 -2.00400609
## 13 14 15 16 17 18
## -0.76924961 -1.12916079 -0.70281121 -0.01345804 0.25370708 0.59838367
## 19 20 21 22 23 24
## -0.34706156 -0.27646163 0.11666875 -0.72495734 -0.35813462 -0.87723027
## 25 26 27 28 29 30
## 0.66898360 -0.43980762 -1.02950319 -0.84401106 0.82125652 1.09949471
## 31 32
## -0.33182696 0.69529126
library(car)
qqPlot(respad)
## [1] 9 12
En la gráfica de comparación de cuantiles se observa una distribución de los residuos entre 1 y 2 a excepción del valor 9 y 12
par(mfrow=c(2,2))
plot(RegModel)
PAS ~ AGE Presión arterial sistólica en función de la edad
RegModel <- lm(pas$PAS~pas$AGE)
plot(pas$PAS ~ pas$AGE, xlab="Edad", ylab="PAS", col="blue")
abline(RegModel, col="red")
cor(pas$PAS,pas$AGE)
## [1] 0.7752041
Gráficamente se puede observar una distribución de puntos concentrados en lo que podría ser una línea recta, mostrando linealidad en la distribución de los datos.
El coeficiente de correlación es alto 0.77.
Pruebas de hipótesis
el modelo a ajustar incluirá la variable dependiente PAS y la variable independiente AGE
los supuestos del modelo incluyen: Distribución normal: Graficando los cuantiles de cada variable contra ellos (QQPLOT) es posible observar una distribución normal.
A través de la prueba de adherencia de Shapiro-Wilk se contrasta la hipótesis de que los datos son normales.
qqplot(pas$PAS,pas$AGE, dist="norm")
## Warning in plot.window(...): "dist" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dist" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "dist" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "dist" is not a
## graphical parameter
## Warning in box(...): "dist" is not a graphical parameter
## Warning in title(...): "dist" is not a graphical parameter
shapiro.test(pas$PAS)
##
## Shapiro-Wilk normality test
##
## data: pas$PAS
## W = 0.97199, p-value = 0.5561
Independencia de valores De acuerdo con la información de la recolección de datos, los valores son independientes entre sí, según diferentes pacientes.
linealidad Hay una linealidad en la distribución de los datos.
RegModel <- lm(pas$PAS~pas$AGE)
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$AGE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.548 -6.990 -2.481 5.765 23.892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.0916 12.8163 4.611 6.98e-05 ***
## pas$AGE 1.6045 0.2387 6.721 1.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.245 on 30 degrees of freedom
## Multiple R-squared: 0.6009, Adjusted R-squared: 0.5876
## F-statistic: 45.18 on 1 and 30 DF, p-value: 1.894e-07
Coeficientes Beta0 = 59,0916. Es el incremento promedio de Y cuando X=0
Coeficiente B1 = 1.605 Es el incremento promedio de Y para cada valor de X
Incremento de PAS medio estimado al tener un valor de EDAD de 0
Incremento medio estimado de la PAS para cada valor de EDAD
H1=B1 != 0 (Existe asociación entre X e Y)
anova(RegModel)
## Analysis of Variance Table
##
## Response: pas$PAS
## Df Sum Sq Mean Sq F value Pr(>F)
## pas$AGE 1 3861.6 3861.6 45.177 1.894e-07 ***
## Residuals 30 2564.3 85.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(RegModel)
## 2.5 % 97.5 %
## (Intercept) 32.917327 85.265923
## pas$AGE 1.116977 2.092023
anova(RegModel)
## Analysis of Variance Table
##
## Response: pas$PAS
## Df Sum Sq Mean Sq F value Pr(>F)
## pas$AGE 1 3861.6 3861.6 45.177 1.894e-07 ***
## Residuals 30 2564.3 85.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sqm <- anova(RegModel)[1,2]
sqr <- anova(RegModel)[2,2]
sqt <- sqm + sqr
sqm; sqr; sqt
## [1] 3861.63
## [1] 2564.338
## [1] 6425.969
R2 <- sqm / sqt
R2
## [1] 0.6009414
sqrt(R2) #&& e o coeficiente de regresao
## [1] 0.7752041
Mide la proporción de la variabilidad total explicada por el modelo, es el poder explicativo del modelo. La proporción de la variabilidad total explicada por el modelo es del 60,1%
mean(residuals(RegModel))
## [1] 3.365364e-16
sd(residuals(RegModel))
## [1] 9.095086
respad <- (residuals(RegModel) - mean(residuals(RegModel))) / sd(residuals(RegModel))
respad
## 1 2 3 4 5 6
## 0.40745903 -0.31622845 -0.84794414 0.60190469 0.02917784 -0.60506573
## 7 8 9 10 11 12
## 0.72988588 2.62695422 1.57341829 2.00332076 1.34609774 -0.32117618
## 13 14 15 16 17 18
## -1.07526469 -1.20324588 -0.63051904 -0.58456016 0.80129807 1.00069145
## 19 20 21 22 23 24
## -1.70950826 -0.76344796 0.11614788 -0.89637688 -0.78395354 -0.80445912
## 25 26 27 28 29 30
## 0.35902628 -0.45163123 -0.88895529 -0.22925841 0.09069458 1.08023990
## 31 32
## -0.72243680 0.06771514
library(car)
qqPlot(respad)
## [1] 8 10
En la tabla de comparación cuantil se observa una distribución de residuos mayor a a1 y 2, a excepción del valor 10, 8.
par(mfrow=c(2,2))
plot(RegModel)
• 50 años.
• COMQUET=30.
• fumador
• no fumador
regModel <- lm(pas$PAS ~ pas$QUET + pas$AGE)
Intervalo de confianza para los parámetros del modelo:
confint(regModel)
## 2.5 % 97.5 %
## (Intercept) 30.9910545 81.802966
## pas$QUET -0.1080414 2.042265
## pas$AGE 0.2536203 1.823859
Intervalo de confianza para E(Y)
nd <- data.frame( PAS = 139:170 )
pr.co <- predict( lm(pas$PAS ~ pas$QUET + pas$AGE), newdata=nd, interval="conf" )
pr.co
## fit lwr upr
## 1 130.2194 125.0418 135.3970
## 2 129.9329 121.2674 138.5984
## 3 137.2757 133.4580 141.0934
## 4 146.1946 140.9665 151.4227
## 5 140.5352 133.9105 147.1599
## 6 131.3298 125.7242 136.9353
## 7 153.5374 148.5852 158.4896
## 8 141.0725 134.2240 147.9211
## 9 124.3451 116.3110 132.3792
## 10 167.3635 158.7377 175.9893
## 11 154.4329 150.1649 158.7009
## 12 148.0572 139.4313 156.6831
## 13 162.5279 156.4814 168.5745
## 14 149.3825 145.8812 152.8837
## 15 146.3379 143.0326 149.6431
## 16 136.3802 131.5491 141.2113
## 17 139.2099 135.0733 143.3466
## 18 133.1924 128.4951 137.8896
## 19 145.5856 138.9605 152.2107
## 20 147.4482 143.6032 151.2933
## 21 149.3825 145.8812 152.8837
## 22 152.4270 148.4994 156.3546
## 23 142.3978 138.6463 146.1493
## 24 139.2816 135.7200 142.8431
## 25 144.4036 140.8449 147.9623
## 26 135.2699 131.1100 139.4297
## 27 127.1748 121.2948 133.0548
## 28 129.1091 123.2060 135.0121
## 29 158.5879 152.7179 164.4578
## 30 161.4892 155.7033 167.2751
## 31 158.5162 153.2951 163.7374
## 32 162.5996 156.1499 169.0492
Intervalo de predicción
pr.pr <- predict( lm(pas$PAS ~ pas$QUET + pas$AGE), newdata=nd, interval="pred" )
pr.pr
## fit lwr upr
## 1 130.2194 111.2977 149.1411
## 2 129.9329 109.7757 150.0902
## 3 137.2757 118.6800 155.8714
## 4 146.1946 127.2590 165.1302
## 5 140.5352 121.1674 159.9030
## 6 131.3298 112.2865 150.3731
## 7 153.5374 134.6761 172.3987
## 8 141.0725 121.6271 160.5180
## 9 124.3451 104.4511 144.2391
## 10 167.3635 147.2233 187.5037
## 11 154.4329 135.7396 173.1262
## 12 148.0572 127.9169 168.1975
## 13 162.5279 143.3502 181.7057
## 14 149.3825 130.8492 167.9157
## 15 146.3379 127.8406 164.8351
## 16 136.3802 117.5504 155.2101
## 17 139.2099 120.5462 157.8737
## 18 133.1924 114.3964 151.9883
## 19 145.5856 126.2177 164.9535
## 20 147.4482 128.8469 166.0495
## 21 149.3825 130.8492 167.9157
## 22 152.4270 133.8085 171.0456
## 23 142.3978 123.8156 160.9800
## 24 139.2816 120.7368 157.8263
## 25 144.4036 125.8594 162.9479
## 26 135.2699 116.6009 153.9388
## 27 127.1748 108.0490 146.3007
## 28 129.1091 109.9761 148.2420
## 29 158.5879 139.4651 177.7106
## 30 161.4892 142.3921 180.5863
## 31 158.5162 139.5825 177.4499
## 32 162.5996 143.2910 181.9082
149.38 (130.849-167.91)
plotando matlines
respad <- (residuals(regModel) - mean(residuals(regModel))) / sd(residuals(regModel))
respad
## 1 2 3 4 5 6
## 0.55544785 -0.92171239 -0.84535403 0.20976605 0.63494759 -0.27069465
## 7 8 9 10 11 12
## 0.98325500 2.19915260 2.28366923 1.46821680 1.34396390 -1.16853032
## 13 14 15 16 17 18
## -1.22322343 -1.32250926 -0.73638613 -0.27655546 0.67273810 1.02334298
## 19 20 21 22 23 24
## -1.22992817 -0.63302115 0.07175195 -0.97912492 -0.62716034 -0.84603389
## 25 26 27 28 29 30
## 0.53404415 -0.37992044 -0.83363240 -0.36123617 0.28026358 0.98885793
## 31 32
## -0.75710998 0.16271543
qqPlot(respad)
## [1] 9 8
No fumadores: 56.3970+300.9671+501.0387 137.345
regModel <- lm(pas$PAS ~ pas$QUET + pas$AGE + factor(pas$SMK))
Fumadores: 45.9977+300.8633+501.2007+9.9647 141.89
confint(lm(pas\(PAS ~ pas\)QUET + pas\(AGE + factor(pas\)SMK)))
plotando matlines
respad <- (residuals(regModel) - mean(residuals(regModel))) / sd(residuals(regModel))
respad
## 1 2 3 4 5 6
## 1.53980813 -0.12204268 -0.22797638 1.08686187 0.02282418 -0.95685844
## 7 8 9 10 11 12
## 0.41538836 2.18544828 2.18923116 1.06634452 0.91093890 -1.95914454
## 13 14 15 16 17 18
## -0.89051463 -0.90153601 -1.57194053 -1.00378010 0.24397206 0.69933115
## 19 20 21 22 23 24
## -0.88499083 -0.08466114 -0.61121592 -0.51138468 -0.03773948 -0.23711943
## 25 26 27 28 29 30
## -0.04184907 -1.06965978 -0.13413306 -0.94597448 0.93400651 0.42694424
## 31 32
## -0.30165526 0.77307710
qqPlot(respad)
## [1] 9 8
| #V) Opinión —- |
|---|
| # VI) Ajustar un modelo de regresión lineal múltiple —- #AS ~ QET + EDAD + factor(SMK) Presión arterial sistólica en función del índice, edad y tabaquismo —- modelo múltiple |
r RegModel <- lm(pas$PAS~pas$QUET + pas$AGE) par(mfrow = c(2,2)) plot(RegModel) |
r RegModel <- lm(pas$PAS~pas$QUET + pas$AGE + factor(pas$SMK)) par(mfrow = c(2,2)) plot(RegModel) |
r cor(pas$PAS,pas$AGE) |
## [1] 0.7752041 a) Gráficamente e posível observar uma distribução dos pontos concentrados no que podería ser uma reta, evidenciandose linearidade na distribução dos dados. |
| b) O coeficiente de correlação e alto 0,77. |
| c) Os testes de hipoteses |
| d) o modelo a ser ajustado vai incluir a variavel dependente PAS e a variável independente AGE |
| e) as suposições do modelo incluem: Distribução normal: Ao plotar os quantils de cada variavei contra elas (QQPLOT) é posível observar uma distribução normal. |
| A través do teste de adherencia Shapiro-Wilk se testa a hipótese que os dados são normais. |
r qqplot(pas$PAS,pas$AGE, dist="norm") |
## Warning in plot.window(...): "dist" is not a graphical parameter |
## Warning in plot.xy(xy, type, ...): "dist" is not a graphical parameter |
| ``` ## Warning in axis(side = side, at = at, labels = labels, …): “dist” is not a ## graphical parameter |
| ## Warning in axis(side = side, at = at, labels = labels, …): “dist” is not a ## graphical parameter ``` |
## Warning in box(...): "dist" is not a graphical parameter |
## Warning in title(...): "dist" is not a graphical parameter |
r shapiro.test(pas$PAS) |
## ## Shapiro-Wilk normality test ## ## data: pas$PAS ## W = 0.97199, p-value = 0.5561 Independencia dos valores De acordo con as informações da coleta de dados os valores são independentes uns con outros obedecendo a pacientes diferentes. |
| Linearidade |
| Homocedasticidade |
| f) Modelo ajustado PAS em função do AGE |
r RegModel <- lm(pas$PAS~pas$AGE + factor(pas$SMK)) summary(RegModel) |
## ## Call: ## lm(formula = pas$PAS ~ pas$AGE + factor(pas$SMK)) ## ## Residuals: ## Min 1Q Median 3Q Max ## -10.639 -5.518 -1.637 4.900 19.616 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 48.0496 11.1296 4.317 0.000168 *** ## pas$AGE 1.7092 0.2018 8.471 2.47e-09 *** ## factor(pas$SMK)1 10.2944 2.7681 3.719 0.000853 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 7.738 on 29 degrees of freedom ## Multiple R-squared: 0.7298, Adjusted R-squared: 0.7112 ## F-statistic: 39.16 on 2 and 29 DF, p-value: 5.746e-09 Coeficientes Beta0 = 59,0916. Ele é o incremento medio de Y quando X=0 |
| Coeficiente B1 = 1.605 Ele é o incremento medio de Y por cada valor de X |
| g) Interprete os coeficientes estimados |
| Incremento médio estimado da PAS ao ter um valor AGE de 0 |
| Incremento médio estimado da PAS por cada valor de AGE |
| h) Construa a tabela de ANOVA, especifique e teste as hipóteses correspondentes, utilizando um teste F Os testes de hipótese: H0=B1 = 0 (no existe associação entre X e Y) |
| H1=B1 != 0 (Existe associação entre X e Y) |
| Un valor grande do quociente dos QMM/QMR pŕoximo de 1 indica que H0 e verdadeira Maior a 9.18 tabela f para 0.5% |
r anova(RegModel) |
## Analysis of Variance Table ## ## Response: pas$PAS ## Df Sum Sq Mean Sq F value Pr(>F) ## pas$AGE 1 3861.6 3861.6 64.498 7.407e-09 *** ## factor(pas$SMK) 1 828.1 828.1 13.830 0.0008532 *** ## Residuals 29 1736.3 59.9 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 |
| i) Teste novamente a hipótese do item anterior, porém utilizando um teste “t”. Comparar os resultados e comente |
| j) Construa o intervalo de confiança de 95% para a inclinação da reta. |
r confint(RegModel) |
## 2.5 % 97.5 % ## (Intercept) 25.287098 70.812108 ## pas$AGE 1.296517 2.121803 ## factor(pas$SMK)1 4.632978 15.955806 |
| k) Obtenha o coeficiente de determinação do modelo e interprete-o |
r anova(RegModel) |
## Analysis of Variance Table ## ## Response: pas$PAS ## Df Sum Sq Mean Sq F value Pr(>F) ## pas$AGE 1 3861.6 3861.6 64.498 7.407e-09 *** ## factor(pas$SMK) 1 828.1 828.1 13.830 0.0008532 *** ## Residuals 29 1736.3 59.9 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 |
| ```r sqm <- anova(RegModel)[1,2] sqr <- anova(RegModel)[2,2] sqt <- sqm + sqr |
| sqm; sqr; sqt ``` |
## [1] 3861.63 |
## [1] 828.0539 |
## [1] 4689.684 |
r R2 <- sqm / sqt R2 |
## [1] 0.8234308 |
r sqrt(R2) #&& e o coeficiente de regresao |
## [1] 0.9074309 Ele mede a proporcção da variabilidade total explicada pelo modelo, é o poder explicativo do modelo. A proporção da variabilidade total explicada pelo modelo é de 60.1% |
| l) Faça uma análise de resíduos. Você acha que o modelo está bem ajustado? Porquê? |
r mean(residuals(RegModel)) |
## [1] -7.632783e-16 |
r sd(residuals(RegModel)) |
## [1] 7.483928 |
r respad <- (residuals(RegModel) - mean(residuals(RegModel))) / sd(residuals(RegModel)) respad |
## 1 2 3 4 5 6 7 ## 1.3413027 0.5177564 -0.2403051 1.4797170 -0.6198103 -1.2927033 0.1478403 ## 8 9 10 11 12 13 14 ## 2.6211292 1.3967238 1.6394850 0.9106963 -1.0036357 -0.7263326 -0.7699892 ## 15 16 17 18 19 20 21 ## -1.4215284 -1.3097370 0.3884568 0.6727298 -1.3992255 -0.2355105 -0.5420863 ## 22 23 24 25 26 27 28 ## -0.4250258 -0.2184768 -0.2014431 -0.2189513 -1.1202216 -0.2062377 -0.7800529 ## 29 30 31 32 ## 0.7046218 0.5316656 -0.2695779 0.6487261 |
r library(car) qqPlot(respad) |
## [1] 8 10 No gráfico de comparação de quantiles se observa uma distribução dos residuos maior a1 e 2 a excepção do valor 10, 8. |
r par(mfrow=c(2,2)) plot(RegModel) |
• com 50anos.
• comQUET=30.
• fumante
• não fumante
Em cada situação, teste a hipótese de que o valor esperado é igual a 150.
regModel <- lm(pas$PAS ~ pas$QUET + pas$AGE)
Intervalo de confiança para os parâmetros do modelo:
confint(regModel)
## 2.5 % 97.5 %
## (Intercept) 30.9910545 81.802966
## pas$QUET -0.1080414 2.042265
## pas$AGE 0.2536203 1.823859
Intervalo de confiança para E(Y)
nd <- data.frame( PAS = 139:170 )
pr.co <- predict( lm(pas$PAS ~ pas$QUET + pas$AGE), newdata=nd, interval="conf" )
pr.co
## fit lwr upr
## 1 130.2194 125.0418 135.3970
## 2 129.9329 121.2674 138.5984
## 3 137.2757 133.4580 141.0934
## 4 146.1946 140.9665 151.4227
## 5 140.5352 133.9105 147.1599
## 6 131.3298 125.7242 136.9353
## 7 153.5374 148.5852 158.4896
## 8 141.0725 134.2240 147.9211
## 9 124.3451 116.3110 132.3792
## 10 167.3635 158.7377 175.9893
## 11 154.4329 150.1649 158.7009
## 12 148.0572 139.4313 156.6831
## 13 162.5279 156.4814 168.5745
## 14 149.3825 145.8812 152.8837
## 15 146.3379 143.0326 149.6431
## 16 136.3802 131.5491 141.2113
## 17 139.2099 135.0733 143.3466
## 18 133.1924 128.4951 137.8896
## 19 145.5856 138.9605 152.2107
## 20 147.4482 143.6032 151.2933
## 21 149.3825 145.8812 152.8837
## 22 152.4270 148.4994 156.3546
## 23 142.3978 138.6463 146.1493
## 24 139.2816 135.7200 142.8431
## 25 144.4036 140.8449 147.9623
## 26 135.2699 131.1100 139.4297
## 27 127.1748 121.2948 133.0548
## 28 129.1091 123.2060 135.0121
## 29 158.5879 152.7179 164.4578
## 30 161.4892 155.7033 167.2751
## 31 158.5162 153.2951 163.7374
## 32 162.5996 156.1499 169.0492
149.38 (145.88-152.88)
Intervalo de predição para Y
pr.pr <- predict( lm(pas$PAS ~ pas$QUET + pas$AGE), newdata=nd, interval="pred" )
pr.pr
## fit lwr upr
## 1 130.2194 111.2977 149.1411
## 2 129.9329 109.7757 150.0902
## 3 137.2757 118.6800 155.8714
## 4 146.1946 127.2590 165.1302
## 5 140.5352 121.1674 159.9030
## 6 131.3298 112.2865 150.3731
## 7 153.5374 134.6761 172.3987
## 8 141.0725 121.6271 160.5180
## 9 124.3451 104.4511 144.2391
## 10 167.3635 147.2233 187.5037
## 11 154.4329 135.7396 173.1262
## 12 148.0572 127.9169 168.1975
## 13 162.5279 143.3502 181.7057
## 14 149.3825 130.8492 167.9157
## 15 146.3379 127.8406 164.8351
## 16 136.3802 117.5504 155.2101
## 17 139.2099 120.5462 157.8737
## 18 133.1924 114.3964 151.9883
## 19 145.5856 126.2177 164.9535
## 20 147.4482 128.8469 166.0495
## 21 149.3825 130.8492 167.9157
## 22 152.4270 133.8085 171.0456
## 23 142.3978 123.8156 160.9800
## 24 139.2816 120.7368 157.8263
## 25 144.4036 125.8594 162.9479
## 26 135.2699 116.6009 153.9388
## 27 127.1748 108.0490 146.3007
## 28 129.1091 109.9761 148.2420
## 29 158.5879 139.4651 177.7106
## 30 161.4892 142.3921 180.5863
## 31 158.5162 139.5825 177.4499
## 32 162.5996 143.2910 181.9082
149.38 (130.849-167.91)
plotando matlines
respad <- (residuals(regModel) - mean(residuals(regModel))) / sd(residuals(regModel))
respad
## 1 2 3 4 5 6
## 0.55544785 -0.92171239 -0.84535403 0.20976605 0.63494759 -0.27069465
## 7 8 9 10 11 12
## 0.98325500 2.19915260 2.28366923 1.46821680 1.34396390 -1.16853032
## 13 14 15 16 17 18
## -1.22322343 -1.32250926 -0.73638613 -0.27655546 0.67273810 1.02334298
## 19 20 21 22 23 24
## -1.22992817 -0.63302115 0.07175195 -0.97912492 -0.62716034 -0.84603389
## 25 26 27 28 29 30
## 0.53404415 -0.37992044 -0.83363240 -0.36123617 0.28026358 0.98885793
## 31 32
## -0.75710998 0.16271543
qqPlot(respad)
## [1] 9 8
Não fumante: 56.3970+300.9671+501.0387 137.345
regModel <- lm(pas$PAS ~ pas$QUET + pas$AGE + factor(pas$SMK))
summary(regModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$QUET + pas$AGE + factor(pas$SMK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.7346 -6.2139 -0.7245 5.0319 15.3476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.9977 10.6607 4.315 0.000180 ***
## pas$QUET 0.8633 0.4366 1.977 0.057935 .
## pas$AGE 1.2007 0.3211 3.739 0.000841 ***
## factor(pas$SMK)1 9.9647 2.6442 3.769 0.000779 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.377 on 28 degrees of freedom
## Multiple R-squared: 0.7629, Adjusted R-squared: 0.7375
## F-statistic: 30.03 on 3 and 28 DF, p-value: 6.783e-09
Fumante: 45.9977+300.8633+501.2007+9.9647 141.89
confint(lm(pas\(PAS ~ pas\)QUET + pas\(AGE + factor(pas\)SMK)))
plotando matlines
respad <- (residuals(regModel) - mean(residuals(regModel))) / sd(residuals(regModel))
respad
## 1 2 3 4 5 6
## 1.53980813 -0.12204268 -0.22797638 1.08686187 0.02282418 -0.95685844
## 7 8 9 10 11 12
## 0.41538836 2.18544828 2.18923116 1.06634452 0.91093890 -1.95914454
## 13 14 15 16 17 18
## -0.89051463 -0.90153601 -1.57194053 -1.00378010 0.24397206 0.69933115
## 19 20 21 22 23 24
## -0.88499083 -0.08466114 -0.61121592 -0.51138468 -0.03773948 -0.23711943
## 25 26 27 28 29 30
## -0.04184907 -1.06965978 -0.13413306 -0.94597448 0.93400651 0.42694424
## 31 32
## -0.30165526 0.77307710
qqPlot(respad)
## [1] 9 8
| # V) Opinião —- |
| Na sua opinião,o modelos univariados estimados são bons? Porquê? Qual seria sua conduta em relação a eles? |
| Os modelos univariados permitem dar a conhecer a interação de cada variavel independente com a dependente, pemitindo um conhecimento mais amplo da sua interação individual. Sendo mais fãcil analizar posíveis relações causais. |
#AS ~ QET + AGE + factor(SMK) Presão arterial sistólica em função do indice, edade e fumo —- Modelo múltiplo
pas <- read.csv(file="PAS.csv")
RegModel <- lm(pas$PAS~pas$QUET + pas$AGE)
par(mfrow = c(2,2))
plot(RegModel)
pas <- read.csv(file="PAS.csv")
RegModel <- lm(pas$PAS~pas$QUET + pas$AGE + factor(pas$SMK))
par(mfrow = c(2,2))
plot(RegModel)
cor(pas$PAS,pas$AGE)
## [1] 0.7752041
summary(RegModel)
##
## Call:
## lm(formula = pas$PAS ~ pas$QUET + pas$AGE + factor(pas$SMK))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.7346 -6.2139 -0.7245 5.0319 15.3476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.9977 10.6607 4.315 0.000180 ***
## pas$QUET 0.8633 0.4366 1.977 0.057935 .
## pas$AGE 1.2007 0.3211 3.739 0.000841 ***
## factor(pas$SMK)1 9.9647 2.6442 3.769 0.000779 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.377 on 28 degrees of freedom
## Multiple R-squared: 0.7629, Adjusted R-squared: 0.7375
## F-statistic: 30.03 on 3 and 28 DF, p-value: 6.783e-09
par(mfrow=c(2,2))
plot(RegModel)