plot(x =diamond$carat, y =diamond$price, xlab ="Mass (carats)", ylab ="Price (Singapore $)", main ="Price of Diamonds By Weight", sub ="", bg ="lightblue", # a vector of background colors (Graphical Parameters) col ="black", # the colors for lines and points (Graphical Parameters) cex =1.1, # a numerical vector giving the amount by which plotting characters and symbols should be scaled relative to the default = 1 (Graphical Parameters) pch =21, # a vector of plotting characters or symbols (Graphical Parameters) {triangle, empty circle, filled circle, square,...} frame =FALSE# frame.plot - a logical indicating whether a box should be drawn around the plot.)?ablineabline(reg =lm(data =diamond,price~carat,), lwd =2, # line width, default = 1 col ="blue")
3 Fitting the linear regression model
fit<-lm(data =diamond, formula =price~carat)coef(fit)
(Intercept) carat
-259.6259 3721.0249
We estimate an expected 3721.02 (SIN) dollar increase in price for every carat increase in mass of diamond.
The intercept -259.63 is the expected price of a 0 carat diamond.
y<-diamond$price; x<-diamond$carat; n<-length(y)fit<-lm(formula =y~x)# equivalentfit<-lm(data=diamond, formula =price~carat)e<-resid(fit)plot(x =diamond$carat, y =resid(lm(y~x)), xlab ="Mass (carats)", ylab ="Residuals (SIN $) ", main ="Residual vs X", sub ="price ~ carat"); abline(h =0, col ="blue", lwd =2)# y values for horizontal lines
?ablineyhat<-predict(fit)max(abs(e-(y-yhat)))# y = diamond$price ; yhat = predicted (diamond$price)
x<-runif(100,-3,3) ; y<-x+sin(x)+rnorm(100,sd=.2);plot(x =x, y =y, xlab ="Real Number Line (Domain)", ylab ="Y axis (Range)", main ="Raw Data", col ='red', cex =.8, pch =16);abline(reg =lm(y~x), col ="blue", lwd =2)
plot(x =x, y =resid(lm(y~x)), main ="Residuals vs Domain (Real Number Line)", xlab ="Real Number Line (Domain)", ylab ="Residuals: lm(y ~ x) ", col ="red", cex =.8, pch =16) ; abline(h =0, col ="blue", lwd =2)
9 Heteroskedasticity
# Construct data x<-runif(n =100, min =0, max =6);y<-x+rnorm(n =100, mean =0, sd =.001*x)# PLot raw data plot(x =x, y =y, xlab ="Real Number Line (Domain)", ylab ="Y axis (Range)", main ="Raw Data",);abline(reg =lm(y~x))
9.1 Heteroskedasticity: Getting rid of the blank space can be helpful
plot(x =x, y =resid(lm(y~x)), main ="Residuals vs Domain (Real Number Line)", xlab ="Real Number Line (Domain)", ylab ="Residuals: lm(y ~ x) ", col ="red", cex =.8, pch =16) ; abline(h =0, col ="blue", lwd =2)