download.file("http://www.openintro.org/stat/data/evals.RData", destfile = "evals.RData")
load("evals.RData")
hist(evals$score)
boxplot(evals$age ~ evals$bty_f2upper)
plot(evals$score ~ evals$bty_avg)
?jitter
## starting httpd help server ... done
plot(jitter(evals$score) ~ jitter(evals$bty_avg))
m_bty <- lm(evals$score ~ evals$bty_avg)
plot(jitter(evals$score)~ jitter(evals$bty_avg))
abline(m_bty)
summary(m_bty)
##
## Call:
## lm(formula = evals$score ~ evals$bty_avg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9246 -0.3690 0.1420 0.3977 0.9309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.88034 0.07614 50.96 < 2e-16 ***
## evals$bty_avg 0.06664 0.01629 4.09 5.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5348 on 461 degrees of freedom
## Multiple R-squared: 0.03502, Adjusted R-squared: 0.03293
## F-statistic: 16.73 on 1 and 461 DF, p-value: 5.083e-05
plot(m_bty$residuals ~ evals$bty_avg)
abline(h = 0)
hist(m_bty$residuals)
qqnorm(m_bty$residuals)
qqline(m_bty$residuals)
plot(evals$bty_avg ~ evals$bty_f1lower)
cor(evals$bty_avg, evals$bty_f1lower)
## [1] 0.8439112
plot(evals[,13:19])
m_bty_gen <- lm(score ~ bty_avg + gender, data = evals)
summary(m_bty_gen)
##
## Call:
## lm(formula = score ~ bty_avg + gender, data = evals)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8305 -0.3625 0.1055 0.4213 0.9314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.74734 0.08466 44.266 < 2e-16 ***
## bty_avg 0.07416 0.01625 4.563 6.48e-06 ***
## gendermale 0.17239 0.05022 3.433 0.000652 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5287 on 460 degrees of freedom
## Multiple R-squared: 0.05912, Adjusted R-squared: 0.05503
## F-statistic: 14.45 on 2 and 460 DF, p-value: 8.177e-07
m_bty_gen <- lm(evals$score ~ evals$bty_avg+evals$gender)
qqnorm(m_bty_gen$residuals)
qqline(m_bty_gen$residuals)
plot(m_bty_gen$residuals ~ evals$bty_avg)
abline(h=0)
plot(evals$score ~ evals$gender)
summary(m_bty_gen)
##
## Call:
## lm(formula = evals$score ~ evals$bty_avg + evals$gender)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8305 -0.3625 0.1055 0.4213 0.9314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.74734 0.08466 44.266 < 2e-16 ***
## evals$bty_avg 0.07416 0.01625 4.563 6.48e-06 ***
## evals$gendermale 0.17239 0.05022 3.433 0.000652 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5287 on 460 degrees of freedom
## Multiple R-squared: 0.05912, Adjusted R-squared: 0.05503
## F-statistic: 14.45 on 2 and 460 DF, p-value: 8.177e-07
multiLines(m_bty_gen)
m_bty_rank <- lm(evals$score ~ evals$bty_avg + evals$rank)
summary(m_bty_rank)
##
## Call:
## lm(formula = evals$score ~ evals$bty_avg + evals$rank)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8713 -0.3642 0.1489 0.4103 0.9525
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.98155 0.09078 43.860 < 2e-16 ***
## evals$bty_avg 0.06783 0.01655 4.098 4.92e-05 ***
## evals$ranktenure track -0.16070 0.07395 -2.173 0.0303 *
## evals$ranktenured -0.12623 0.06266 -2.014 0.0445 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5328 on 459 degrees of freedom
## Multiple R-squared: 0.04652, Adjusted R-squared: 0.04029
## F-statistic: 7.465 on 3 and 459 DF, p-value: 6.88e-05
m_full <- lm(evals$score ~ evals$rank + evals$ethnicity + evals$gender + evals$language + evals$age + evals$cls_perc_eval
+ evals$cls_students + evals$cls_level + evals$cls_profs + evals$cls_credits + evals$bty_avg
+ evals$pic_outfit + evals$pic_color)
summary(m_full)
##
## Call:
## lm(formula = evals$score ~ evals$rank + evals$ethnicity + evals$gender +
## evals$language + evals$age + evals$cls_perc_eval + evals$cls_students +
## evals$cls_level + evals$cls_profs + evals$cls_credits + evals$bty_avg +
## evals$pic_outfit + evals$pic_color)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.77397 -0.32432 0.09067 0.35183 0.95036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.0952141 0.2905277 14.096 < 2e-16 ***
## evals$ranktenure track -0.1475932 0.0820671 -1.798 0.07278 .
## evals$ranktenured -0.0973378 0.0663296 -1.467 0.14295
## evals$ethnicitynot minority 0.1234929 0.0786273 1.571 0.11698
## evals$gendermale 0.2109481 0.0518230 4.071 5.54e-05 ***
## evals$languagenon-english -0.2298112 0.1113754 -2.063 0.03965 *
## evals$age -0.0090072 0.0031359 -2.872 0.00427 **
## evals$cls_perc_eval 0.0053272 0.0015393 3.461 0.00059 ***
## evals$cls_students 0.0004546 0.0003774 1.205 0.22896
## evals$cls_levelupper 0.0605140 0.0575617 1.051 0.29369
## evals$cls_profssingle -0.0146619 0.0519885 -0.282 0.77806
## evals$cls_creditsone credit 0.5020432 0.1159388 4.330 1.84e-05 ***
## evals$bty_avg 0.0400333 0.0175064 2.287 0.02267 *
## evals$pic_outfitnot formal -0.1126817 0.0738800 -1.525 0.12792
## evals$pic_colorcolor -0.2172630 0.0715021 -3.039 0.00252 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.498 on 448 degrees of freedom
## Multiple R-squared: 0.1871, Adjusted R-squared: 0.1617
## F-statistic: 7.366 on 14 and 448 DF, p-value: 6.552e-14
m_full <- lm(evals$score ~ evals$rank + evals$gender + evals$language + evals$age + evals$cls_perc_eval
+ evals$cls_students + evals$cls_level + evals$cls_profs + evals$cls_credits + evals$bty_avg
+ evals$pic_outfit + evals$pic_color)
summary(m_full)
##
## Call:
## lm(formula = evals$score ~ evals$rank + evals$gender + evals$language +
## evals$age + evals$cls_perc_eval + evals$cls_students + evals$cls_level +
## evals$cls_profs + evals$cls_credits + evals$bty_avg + evals$pic_outfit +
## evals$pic_color)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.73681 -0.32734 0.08283 0.35834 0.98639
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2676351 0.2694274 15.840 < 2e-16 ***
## evals$ranktenure track -0.1660677 0.0813523 -2.041 0.041801 *
## evals$ranktenured -0.1127978 0.0657022 -1.717 0.086705 .
## evals$gendermale 0.2241744 0.0512176 4.377 1.50e-05 ***
## evals$languagenon-english -0.2862448 0.1055924 -2.711 0.006968 **
## evals$age -0.0092040 0.0031385 -2.933 0.003534 **
## evals$cls_perc_eval 0.0051119 0.0015357 3.329 0.000944 ***
## evals$cls_students 0.0004785 0.0003777 1.267 0.205899
## evals$cls_levelupper 0.0767503 0.0567182 1.353 0.176677
## evals$cls_profssingle -0.0292174 0.0512393 -0.570 0.568817
## evals$cls_creditsone credit 0.4589918 0.1128358 4.068 5.61e-05 ***
## evals$bty_avg 0.0375980 0.0174661 2.153 0.031880 *
## evals$pic_outfitnot formal -0.1208610 0.0738165 -1.637 0.102265
## evals$pic_colorcolor -0.2400696 0.0701264 -3.423 0.000675 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4988 on 449 degrees of freedom
## Multiple R-squared: 0.1826, Adjusted R-squared: 0.159
## F-statistic: 7.717 on 13 and 449 DF, p-value: 6.792e-14
#####Dropping the category with highest p-value did change the coefficients of other variables. This shows that the status of professor plays a role in the evaluation score.
m_full <- lm(evals$score ~ evals$gender + evals$language + evals$age + evals$cls_perc_eval
+ evals$cls_credits + evals$bty_avg
+ evals$pic_color)
summary(m_full)
##
## Call:
## lm(formula = evals$score ~ evals$gender + evals$language + evals$age +
## evals$cls_perc_eval + evals$cls_credits + evals$bty_avg +
## evals$pic_color)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.81919 -0.32035 0.09272 0.38526 0.88213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.967255 0.215824 18.382 < 2e-16 ***
## evals$gendermale 0.221457 0.049937 4.435 1.16e-05 ***
## evals$languagenon-english -0.281933 0.098341 -2.867 0.00434 **
## evals$age -0.005877 0.002622 -2.241 0.02551 *
## evals$cls_perc_eval 0.004295 0.001432 2.999 0.00286 **
## evals$cls_creditsone credit 0.444392 0.100910 4.404 1.33e-05 ***
## evals$bty_avg 0.048679 0.016974 2.868 0.00432 **
## evals$pic_colorcolor -0.216556 0.066625 -3.250 0.00124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5014 on 455 degrees of freedom
## Multiple R-squared: 0.1631, Adjusted R-squared: 0.1502
## F-statistic: 12.67 on 7 and 455 DF, p-value: 6.996e-15
m_full_final <- lm(evals$score ~ evals$gender + evals$language + evals$age + evals$cls_perc_eval
+ evals$cls_credits + evals$bty_avg
+ evals$pic_color)
summary(m_full_final)
##
## Call:
## lm(formula = evals$score ~ evals$gender + evals$language + evals$age +
## evals$cls_perc_eval + evals$cls_credits + evals$bty_avg +
## evals$pic_color)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.81919 -0.32035 0.09272 0.38526 0.88213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.967255 0.215824 18.382 < 2e-16 ***
## evals$gendermale 0.221457 0.049937 4.435 1.16e-05 ***
## evals$languagenon-english -0.281933 0.098341 -2.867 0.00434 **
## evals$age -0.005877 0.002622 -2.241 0.02551 *
## evals$cls_perc_eval 0.004295 0.001432 2.999 0.00286 **
## evals$cls_creditsone credit 0.444392 0.100910 4.404 1.33e-05 ***
## evals$bty_avg 0.048679 0.016974 2.868 0.00432 **
## evals$pic_colorcolor -0.216556 0.066625 -3.250 0.00124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5014 on 455 degrees of freedom
## Multiple R-squared: 0.1631, Adjusted R-squared: 0.1502
## F-statistic: 12.67 on 7 and 455 DF, p-value: 6.996e-15
qqnorm(m_full_final$residuals)
qqline(m_full_final$residuals)
plot(m_full_final$residuals ~ evals$bty_avg)
abline(h=0)