rm(list=ls(all=TRUE))
library(survival)
library(KMsurv)
library(survMisc)
library(survminer)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:survMisc':
##
## autoplot
## Loading required package: ggpubr
## Loading required package: magrittr
library(flexsurv)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("G:/UAAAN/MATERIAS/2019/REGRESION APLICADA/LECCION 7")
mis_datos<-read.csv("inter.csv", header = T)
attach(mis_datos)
head(mis_datos)
## X Y
## 1 1.6 0.4
## 2 1.5 0.0
## 3 14.4 2.9
## 4 15.5 0.2
## 5 10.3 4.3
## 6 4.0 0.8
summary(mis_datos)
## X Y
## Min. : 0.70 Min. :0.000
## 1st Qu.: 1.60 1st Qu.:0.400
## Median :10.30 Median :1.600
## Mean :11.44 Mean :1.938
## 3rd Qu.:17.50 3rd Qu.:2.900
## Max. :32.50 Max. :6.300
plot(X, Y, xlab="Precipitaciæ¼ã¸³n total (cm)", ylab="Precipitaciæ¼ã¸³n neta (cm)", pch=1)
abline(lm(Y~X), col="red")
#prueba de normalidad
ggqqplot(X, ylab = "X", conf.int = TRUE, conf.int.level = 0.05, color = "green")
ggqqplot(Y, ylab = "y", conf.int = TRUE, conf.int.level = 0.05, color = "red")
r=cor(X,Y); r
## [1] 0.7592156
#Prueba de hipæ¼ã¸³tesis H0:rho=0
cor.test(X, Y, alternative="two.sided", method="pearson")
##
## Pearson's product-moment correlation
##
## data: X and Y
## t = 3.8689, df = 11, p-value = 0.002613
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3579788 0.9237718
## sample estimates:
## cor
## 0.7592156
#LA REGRESION
n<-length(X)
# Grafica
plot(X, Y,ylab="Precipitaciæ¼ã¸³n BC (mm", xlab="Precipitaciæ¼ã¸³n total (mm)", main="Diagrama de dispersiæ¼ã¸³n" )
# Estimaciæ¼ã¸³n de paræ¼ã¸±metros y prueba de hipæ¼ã¸³tesis
fit1<-lm(Y~X)
summary(fit1)
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.34540 -0.45330 -0.02689 0.51898 2.53167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.22914 0.57616 0.398 0.69846
## X 0.14944 0.03862 3.869 0.00261 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.333 on 11 degrees of freedom
## Multiple R-squared: 0.5764, Adjusted R-squared: 0.5379
## F-statistic: 14.97 on 1 and 11 DF, p-value: 0.002613
abline(fit1,col="red")
# Podemos obtener el valor predicho de Y para cada valor de X
fitted <- predict(lm(Y ~ X))
fitted
## 1 2 3 4 5 6 7
## 0.4682387 0.4532951 2.3810227 2.5454026 1.7683341 0.8268857 2.8442751
## 8 9 10 11 12 13
## 3.4719074 0.3935206 0.3337461 5.0858189 3.3374148 1.2901381
# Con un bucle for y la funciæ¼ã¸³n lines() podemos representar los resæ¼ã¹¤duos
for(i in 1:n)
{
lines( c(X[i],X[i]), c(Y[i], fitted[i]), col="red")
}
#Análisis de varianza del lm
anova(fit1)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X 1 26.613 26.613 14.968 0.002613 **
## Residuals 11 19.558 1.778
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit1$residuals
## 1 2 3 4 5
## -0.068238713 -0.453295088 0.518977251 -2.345402627 2.531665888
## 6 7 8 9 10
## -0.026885720 1.255724868 -1.571907394 0.006479414 -0.133746085
## 11 12 13
## 1.214181076 -1.237414767 0.309861897
boxplot(fit1$residuals)
# Prueba de normalidad
shapiro.test(fit1$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit1$residuals
## W = 0.97344, p-value = 0.9317
hist(fit1$residuals)
ggqqplot(fit1$residuals, ylab = "Cuantiles de la muestra", xlab = "Cuantiles teæ¼ã¸³ricos",conf.int = TRUE, conf.int.level = 0.05, color = "green")
qqnorm(fit1$residuals)
qqline(fit1$residuals)
# Prueba de Durbin -Watson
library(lmtest, pos=4)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dwtest(Y ~ X, alternative="two.sided")
##
## Durbin-Watson test
##
## data: Y ~ X
## DW = 3.1703, p-value = 0.02308
## alternative hypothesis: true autocorrelation is not 0
# Figuras de distribucion de residuos
oldpar <- par(oma=c(0,0,3,0), mfrow=c(1,1))
plot(fit1)
par(oldpar)
a<-bptest(Y ~ X, varformula = ~ fitted.values(fit1), studentize=FALSE); a
##
## Breusch-Pagan test
##
## data: Y ~ X
## BP = 1.6184, df = 1, p-value = 0.2033
# Residuos estandarizados
ei=fit1$residuals/summary(fit1)$sigma
boxplot(ei)
ri=rstudent(fit1)
boxplot(ri)
data.frame(ei,ri)
## ei ri
## 1 -0.051176422 -0.053188304
## 2 -0.339953963 -0.355849055
## 3 0.389213071 0.390751664
## 4 -1.758962185 -2.116370699
## 5 1.898652500 2.348314098
## 6 -0.020163260 -0.020533394
## 7 0.941745580 0.996691440
## 8 -1.178870371 -1.335476862
## 9 0.004859312 0.005075218
## 10 -0.100304444 -0.105266738
## 11 0.910589326 1.259097189
## 12 -0.928013705 -1.007537283
## 13 0.232384561 0.233247869
par(new=T)
abline(h=3, col="red");abline(h=-3, col="red")
# Banda de confianza para E(Y/X0)
new <- data.frame(X=seq(from=min(X), to=max(X), length=X)); new
## Warning in seq.default(from = min(X), to = max(X), length = X): first
## element used of 'length.out' argument
## X
## 1 0.7
## 2 32.5
CI95 <- predict( fit1, newdata=new, se.fit1=TRUE,interval="confidence", level=0.95)
CI95
## fit lwr upr
## 1 0.3337461 -0.8893415 1.556834
## 2 5.0858189 3.1189822 7.052656
X0<-seq(min(X),max(X),length=length(Y))
pred.m<-predict(fit1,data.frame(X=X0),interval="confidence",se.fit=T)
par(mfrow=c(1,1))
matplot(X0,cbind(pred.m$fit),lty=c(1,2,2,3,3),
col=c("black","red","red","blue","blue"),type="l",xlab="Precipitaciæ¼ã¸³n total (cm)",
ylab="Precipitaciæ¼ã¸³n neta (cm)")
points(X,Y)