response= mean across level of predictor
sd(response_i|predictor_i)=TRUE
OR heteroscedasticity_test(simple_lin_reg)=FAIL
response= mean across level of predictor
sd(response_i|predictor_i)=TRUE
clean<-read.table(here('data','cleaningwtd.txt'),header=TRUE)
head(clean)
## Case Crews Rooms StdDev
## 1 1 16 51 12.000463
## 2 2 10 37 7.927123
## 3 3 12 37 7.289910
## 4 4 16 46 12.000463
## 5 5 16 45 12.000463
## 6 6 4 11 4.966555
Note that the number of rooms cleaned is a mean, and the standard deviations of rooms cleaned per crew amount is given.
ggplot(clean, aes(x=Crews, y=Rooms))+geom_point()+geom_smooth(method='lm')
## `geom_smooth()` using formula 'y ~ x'
Note how the variance increases as the number of crews increases.
clean<-read.table(here('data','cleaningwtd.txt'),header=TRUE)
m.list<-list()
m.list[['wls_no_weight']]<-lm(Rooms~Crews,data=clean)
summary(m.list[['wls_no_weight']])
##
## Call:
## lm(formula = Rooms ~ Crews, data = clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.9990 -4.9901 0.8046 4.0010 17.0010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7847 2.0965 0.851 0.399
## Crews 3.7009 0.2118 17.472 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.336 on 51 degrees of freedom
## Multiple R-squared: 0.8569, Adjusted R-squared: 0.854
## F-statistic: 305.3 on 1 and 51 DF, p-value: < 2.2e-16
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lmtest::bptest(m.list[['wls_no_weight']])
##
## studentized Breusch-Pagan test
##
## data: m.list[["wls_no_weight"]]
## BP = 14.518, df = 1, p-value = 0.0001389
H_0: homoscedasitcity is present (residuals are distributed with equal variance)
H_A: heteroscedasticity is present (residuals are not distributed with equal variance)
if lmtest::bptest(model)==H_A perform Weighted least squares
m.list[['wls']]<-lm(Rooms~Crews,weights=1/StdDev^2,data=clean)
summary(m.list[['wls']])
##
## Call:
## lm(formula = Rooms ~ Crews, data = clean, weights = 1/StdDev^2)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.43184 -0.82013 0.03909 0.69029 2.01030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8095 1.1158 0.725 0.471
## Crews 3.8255 0.1788 21.400 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9648 on 51 degrees of freedom
## Multiple R-squared: 0.8998, Adjusted R-squared: 0.8978
## F-statistic: 458 on 1 and 51 DF, p-value: < 2.2e-16
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggplot(clean, aes(x=Crews,y=Rooms))+geom_point()+geom_abline(aes(intercept=m.list[['wls']]$coefficients[[1]],slope=m.list[['wls']]$coefficients[[2]],color='wls'))+geom_abline(aes(intercept=m.list[['wls_no_weight']]$coefficients[[1]],slope=m.list[['wls']]$coefficients[[2]],color='unweighted'))