out2<- read.csv2("out2.csv")
fit<-lm(y~x, out2)
par(mfrow=c(2,2))
plot(fit)
plot(fit, which=1)
Now we remove the outliner
fitno<-lm(y~x, out2[-1,])
plot(fitno, which=1)
head(dfbeta(fit))
## (Intercept) x
## 1 -0.011678662 -0.5336301857
## 2 0.008636967 0.0045759242
## 3 0.010323864 -0.0003509441
## 4 0.003122096 -0.0033664451
## 5 0.001975966 -0.0008297575
## 6 0.002230518 -0.0005867041
Measure of influence, sometimes called leverage, and sometimes hat value
resno <- out2[1, "y"] - predict(fitno, out2[1,])
1-resid(fit)[1]/resno
## 1
## 0.6311547
head(hatvalues(fit))
## 1 2 3 4 5 6
## 0.63115474 0.02324999 0.01962520 0.04326099 0.02255531 0.02071441
sigma <- sqrt(deviance(fit)/df.residual(fit)) #standardized residual
rstd <- resid(fit)/(sigma * sqrt(1-hatvalues(fit)))
# or using the formula
rstandard(fit)
## 1 2 3 4 5
## -5.1928155525 0.9389600529 1.0450409352 0.2682743157 0.1893339106
## 6 7 8 9 10
## 0.2186961487 0.1720367742 0.8163204011 -0.4094964575 -1.2986555344
## 11 12 13 14 15
## 0.4229086603 -0.3836031428 2.0665817003 0.5208478049 -1.1529678925
## 16 17 18 19 20
## -0.3169371876 1.0486057341 0.0209237801 -1.5771373333 -0.6240192264
## 21 22 23 24 25
## -0.0029990747 -1.6609776388 0.3022706179 -0.5904468677 -1.6779775927
## 26 27 28 29 30
## -0.3160648106 1.9016099815 0.6456188040 0.0111124905 0.3452941661
## 31 32 33 34 35
## -0.3727240529 -0.5232628803 -0.1419522727 -0.7285161583 -0.5150214818
## 36 37 38 39 40
## -0.0304921562 1.0425440866 0.5312717811 0.8112964294 -0.0007805723
## 41 42 43 44 45
## 1.3655108902 -0.4208093576 -0.2280769529 0.0760031321 -0.3029951521
## 46 47 48 49 50
## -0.9158295310 0.4075785949 -0.5479516500 -0.5784869852 2.8085070283
## 51
## 0.5158536369
plot(fit, which=3)
Most of the diagnostic statistics under discussion were developed because of perceived shortcomings of other diagnostics and because their distributions under a null hypothesis could be characterized. The assumption that residuals are approximately normal is implicit in such characterizations. Since standardized residuals adjust for individual residual variances, a QQ plot of standardized residuals against normal with constant variance is of interest.
plot(fit, which=2)
sigma1 <- sqrt(deviance(fitno)/df.residual(fitno))
resid(fit)[1]/(sigma1*sqrt(1-hatvalues(fit)[1]))
## 1
## -7.664261
#or using
head(rstudent(fit))
## 1 2 3 4 5 6
## -7.6642608 0.9378046 1.0460451 0.2657179 0.1874606 0.2165588
dy<-predict(fitno, out2)-predict(fit, out2)
sum(dy^2)/(2*sigma^2)
## [1] 23.07105
#or using
cooks.distance(fit)[1]
## 1
## 23.07105
plot(fit, which=5)