#http://stats.stackexchange.com/questions/3944/explain-model-adjustment-in-plain-english

Set seed

# set seed ----------------------------------------------------------------
set.seed(69)

set variables

# set variables -----------------------------------------------------------
# x = weight, y = height
x <- rep(1:10,2)
y <- c(jitter(1:10, factor=4), (jitter(1:10, factor=4)+2))
sex <- rep(c("f", "m"), each=10)
df1 <- data.frame(x,y,sex)

regression

First evaluate

with(df1, plot(y~x, col=c(1,2)[sex]))

see if gender affects height

# lm1 <- lm(y~sex, data=df1)
lm1 <- lm(y~x, data=df1)

here we control for weight

lm2 <- lm(y~sex+x, data=df1)
summary(lm1)
## 
## Call:
## lm(formula = y ~ x, data = df1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42551 -0.99052  0.07216  0.59272  1.80483 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.35675    0.53081   2.556   0.0199 *  
## x            0.93041    0.08555  10.876 2.42e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.099 on 18 degrees of freedom
## Multiple R-squared:  0.8679, Adjusted R-squared:  0.8606 
## F-statistic: 118.3 on 1 and 18 DF,  p-value: 2.417e-09
anova(lm1); 
## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x          1 142.834 142.834  118.28 2.417e-09 ***
## Residuals 18  21.736   1.208                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm2);
## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## sex        1  17.730  17.730  75.242 1.193e-07 ***
## x          1 142.834 142.834 606.146 9.784e-15 ***
## Residuals 17   4.006   0.236                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm2)
## 
## Call:
## lm(formula = y ~ sex + x, data = df1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4840 -0.3921 -0.2676  0.4519  0.8633 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.41520    0.25839   1.607    0.126    
## sexm         1.88309    0.21709   8.674 1.19e-07 ***
## x            0.93041    0.03779  24.620 9.78e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4854 on 17 degrees of freedom
## Multiple R-squared:  0.9757, Adjusted R-squared:  0.9728 
## F-statistic: 340.7 on 2 and 17 DF,  p-value: 1.923e-14

In case you want to add the fitted lines to the plot

coefs2 <- coef(lm2)
# abline(coefs2[1], coefs2[3], col=1)
# abline(coefs2[1]+coefs2[2], coefs2[3], col=2)

# below dash controls table header level
# Try test --------------------------------------------------------------------


plot(x,c(y[1:10],y[11:20] - coefs2[2]))

#http://stackoverflow.com/questions/6853204/plotting-multiple-curves-same-graph-and-same-scale
yy = c(y[1:10],y[-(1:10)] - coefs2[2])
plot(x,yy,col='red')

plot(x[1:10],y[1:10],ylim=range(c(0,12)))
par(new=TRUE)
plot(x[-(1:10)],y[-(1:10)] - coefs2[2],col='red',axes=FALSE,xlab="",ylab="",ylim=range(c(0,12)))

plot(x[1:10],y[1:10])
par(new=TRUE)
plot(x[-(1:10)],y[-(1:10)],col='red',axes=FALSE,xlab="",ylab="")

#%%
# ggplot(data = df1,aes(x=x,y=y)) + geom_point()
# ggplot(data = df1,aes(x=x,y=y)) + geom_line()
# ggplot(data = df1,aes(x=x,y=y)) + geom_line() + geom_point()
# qplot(df1$sex,df1$y)
# 
# 
# qplot(df1$sex,df1$y,geom='boxplot')
# 
# #  -----------------------------------------------------------------------
# 
# ggplot(data=df1,aes(x=x,y=y,fill=sex)) + geom_bar(stat = 'identity',fill='red' )