Cargar las librerías requeridas

rm(list=ls(all=TRUE)) 
library(survival)
library(KMsurv)
library(survMisc)
library(survminer)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:survMisc':
## 
##     autoplot
## Loading required package: ggpubr
## Loading required package: magrittr
library(flexsurv)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Leer los datos y resumen de estadísticas básicas

setwd("G:/UAAAN/MATERIAS/2019/REGRESION APLICADA/LECCION 7") 
mis_datos<-read.csv("inter.csv", header = T) 
attach(mis_datos)
head(mis_datos)
##      X   Y
## 1  1.6 0.4
## 2  1.5 0.0
## 3 14.4 2.9
## 4 15.5 0.2
## 5 10.3 4.3
## 6  4.0 0.8
summary(mis_datos)
##        X               Y        
##  Min.   : 0.70   Min.   :0.000  
##  1st Qu.: 1.60   1st Qu.:0.400  
##  Median :10.30   Median :1.600  
##  Mean   :11.44   Mean   :1.938  
##  3rd Qu.:17.50   3rd Qu.:2.900  
##  Max.   :32.50   Max.   :6.300

Figura de dispersión y prueba de normalidad

plot(X, Y, xlab="Precipitaci昼㸳n total (cm)", ylab="Precipitaci昼㸳n neta (cm)", pch=1)
abline(lm(Y~X), col="red")

#prueba de normalidad
ggqqplot(X, ylab = "X", conf.int = TRUE, conf.int.level = 0.05, color = "green")

ggqqplot(Y, ylab = "y", conf.int = TRUE, conf.int.level = 0.05, color = "red")

Cálculo del Coeficiente de correlación Pearson

r=cor(X,Y); r
## [1] 0.7592156
#Prueba de hip昼㸳tesis H0:rho=0 
cor.test(X, Y, alternative="two.sided", method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  X and Y
## t = 3.8689, df = 11, p-value = 0.002613
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3579788 0.9237718
## sample estimates:
##       cor 
## 0.7592156

El modelo de regresión lineal

#LA REGRESION
  n<-length(X)
  # Grafica
  plot(X, Y,ylab="Precipitaci昼㸳n BC (mm", xlab="Precipitaci昼㸳n total (mm)", main="Diagrama de dispersi昼㸳n" )

# Estimaci昼㸳n de par攼㸱metros y prueba de hip昼㸳tesis
fit1<-lm(Y~X)
summary(fit1)
## 
## Call:
## lm(formula = Y ~ X)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34540 -0.45330 -0.02689  0.51898  2.53167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.22914    0.57616   0.398  0.69846   
## X            0.14944    0.03862   3.869  0.00261 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.333 on 11 degrees of freedom
## Multiple R-squared:  0.5764, Adjusted R-squared:  0.5379 
## F-statistic: 14.97 on 1 and 11 DF,  p-value: 0.002613
abline(fit1,col="red")

# Podemos obtener el valor predicho de Y para cada valor de X
fitted <- predict(lm(Y ~ X))
fitted 
##         1         2         3         4         5         6         7 
## 0.4682387 0.4532951 2.3810227 2.5454026 1.7683341 0.8268857 2.8442751 
##         8         9        10        11        12        13 
## 3.4719074 0.3935206 0.3337461 5.0858189 3.3374148 1.2901381
# Con un bucle for y la funci昼㸳n lines() podemos representar los res攼㹤duos
for(i in 1:n)
{
  lines( c(X[i],X[i]), c(Y[i], fitted[i]), col="red")
}

#Análisis de varianza del lm

anova(fit1)
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## X          1 26.613  26.613  14.968 0.002613 **
## Residuals 11 19.558   1.778                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Prueba de normalidad de los residuales

fit1$residuals
##            1            2            3            4            5 
## -0.068238713 -0.453295088  0.518977251 -2.345402627  2.531665888 
##            6            7            8            9           10 
## -0.026885720  1.255724868 -1.571907394  0.006479414 -0.133746085 
##           11           12           13 
##  1.214181076 -1.237414767  0.309861897
boxplot(fit1$residuals)

# Prueba de normalidad
shapiro.test(fit1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit1$residuals
## W = 0.97344, p-value = 0.9317
hist(fit1$residuals)

ggqqplot(fit1$residuals, ylab = "Cuantiles de la muestra", xlab = "Cuantiles te昼㸳ricos",conf.int = TRUE, conf.int.level = 0.05, color = "green")

qqnorm(fit1$residuals)
qqline(fit1$residuals)

Prueba de independencia de los residuales

# Prueba de Durbin -Watson 
library(lmtest, pos=4)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
dwtest(Y ~ X, alternative="two.sided")
## 
##  Durbin-Watson test
## 
## data:  Y ~ X
## DW = 3.1703, p-value = 0.02308
## alternative hypothesis: true autocorrelation is not 0
# Figuras de distribucion de residuos 
oldpar <- par(oma=c(0,0,3,0), mfrow=c(1,1))
plot(fit1)

par(oldpar)

Prueba de Breusch-Pagan para la heteroscedasticidad

a<-bptest(Y ~ X, varformula = ~ fitted.values(fit1), studentize=FALSE); a
## 
##  Breusch-Pagan test
## 
## data:  Y ~ X
## BP = 1.6184, df = 1, p-value = 0.2033

Analisis de Observaciones influyentes y atípicas

# Residuos estandarizados
ei=fit1$residuals/summary(fit1)$sigma
boxplot(ei)

Residuos estudentizados

ri=rstudent(fit1)
boxplot(ri)
data.frame(ei,ri)
##              ei           ri
## 1  -0.051176422 -0.053188304
## 2  -0.339953963 -0.355849055
## 3   0.389213071  0.390751664
## 4  -1.758962185 -2.116370699
## 5   1.898652500  2.348314098
## 6  -0.020163260 -0.020533394
## 7   0.941745580  0.996691440
## 8  -1.178870371 -1.335476862
## 9   0.004859312  0.005075218
## 10 -0.100304444 -0.105266738
## 11  0.910589326  1.259097189
## 12 -0.928013705 -1.007537283
## 13  0.232384561  0.233247869
par(new=T)
abline(h=3, col="red");abline(h=-3, col="red")

Estimados vs observados

# Banda de confianza para E(Y/X0)
new <- data.frame(X=seq(from=min(X), to=max(X), length=X)); new
## Warning in seq.default(from = min(X), to = max(X), length = X): first
## element used of 'length.out' argument
##      X
## 1  0.7
## 2 32.5
CI95 <- predict( fit1, newdata=new, se.fit1=TRUE,interval="confidence", level=0.95)
CI95
##         fit        lwr      upr
## 1 0.3337461 -0.8893415 1.556834
## 2 5.0858189  3.1189822 7.052656
X0<-seq(min(X),max(X),length=length(Y))
pred.m<-predict(fit1,data.frame(X=X0),interval="confidence",se.fit=T)
par(mfrow=c(1,1))
matplot(X0,cbind(pred.m$fit),lty=c(1,2,2,3,3),
        col=c("black","red","red","blue","blue"),type="l",xlab="Precipitaci昼㸳n total (cm)",
        ylab="Precipitaci昼㸳n neta (cm)")
points(X,Y)