output: html_notebook author: Adhislacy

This is an R Markdown Notebook.

UN11 DATA.

library(alr4)
## Warning: package 'alr4' was built under R version 3.3.3
## Loading required package: car
## Warning: package 'car' was built under R version 3.3.3
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.3.3
## 
## Attaching package: 'effects'
## The following object is masked from 'package:car':
## 
##     Prestige
library(car)
?alr4
## starting httpd help server ...
##  done
attach(UN11)
View(UN11)
?UN11

Getting the structure of the data.

str(UN11)
## 'data.frame':    199 obs. of  6 variables:
##  $ region   : Factor w/ 8 levels "Africa","Asia",..: 2 4 1 1 3 5 2 3 8 4 ...
##  $ group    : Factor w/ 3 levels "oecd","other",..: 2 2 3 3 2 2 2 2 1 1 ...
##  $ fertility: num  5.97 1.52 2.14 5.13 2 ...
##  $ ppgdp    : num  499 3677 4473 4322 13750 ...
##  $ lifeExpF : num  49.5 80.4 75 53.2 81.1 ...
##  $ pctUrban : num  23 53 67 59 100 93 64 47 89 68 ...
##  - attr(*, "na.action")=Class 'omit'  Named int [1:34] 4 5 8 28 41 67 68 72 79 83 ...
##   .. ..- attr(*, "names")= chr [1:34] "Am Samoa" "Andorra" "Antigua and Barbuda" "Br Virigin Is" ...

Try running this code: summary(UN11 <- lm(“Life Expectancy” ~ x1 + factor(x2)), data = UN11).

contrasts(UN11$group)
##        other africa
## oecd       0      0
## other      1      0
## africa     0      1
boxplot(UN11$lifeExpF~UN11$group, main="BOX PLOT OF LIFE EXPECTANCY FOR EACH GROUP", xlab="Group", ylab="Life Expectancy")

un11_lm2 <- lm(lifeExpF~log(ppgdp)+factor(group) + log(ppgdp)*factor(group), data = UN11)
summary(un11_lm2)
## 
## Call:
## lm(formula = lifeExpF ~ log(ppgdp) + factor(group) + log(ppgdp) * 
##     factor(group), data = UN11)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.634  -2.089   0.301   2.255  14.489 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     59.2137    15.2203   3.890 0.000138 ***
## log(ppgdp)                       2.2425     1.4664   1.529 0.127844    
## factor(group)other             -11.1731    15.5948  -0.716 0.474572    
## factor(group)africa            -22.9848    15.7838  -1.456 0.146954    
## log(ppgdp):factor(group)other    0.9294     1.5177   0.612 0.540986    
## log(ppgdp):factor(group)africa   1.0950     1.5785   0.694 0.488703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.129 on 193 degrees of freedom
## Multiple R-squared:  0.7498, Adjusted R-squared:  0.7433 
## F-statistic: 115.7 on 5 and 193 DF,  p-value: < 2.2e-16
lm(formula = lifeExpF ~ log(ppgdp) + factor(group) + log(ppgdp) * 
    factor(group), data = UN11)
## 
## Call:
## lm(formula = lifeExpF ~ log(ppgdp) + factor(group) + log(ppgdp) * 
##     factor(group), data = UN11)
## 
## Coefficients:
##                    (Intercept)                      log(ppgdp)  
##                        59.2137                          2.2425  
##             factor(group)other             factor(group)africa  
##                       -11.1731                        -22.9848  
##  log(ppgdp):factor(group)other  log(ppgdp):factor(group)africa  
##                         0.9294                          1.0950

Analysis without Interaction.

 un11_lm3 <- lm(lifeExpF~log(ppgdp)+factor(group), data = UN11) 
summary(un11_lm3)
## 
## Call:
## lm(formula = lifeExpF ~ log(ppgdp) + factor(group), data = UN11)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.6348  -2.1741   0.2441   2.3537  14.6539 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           49.529      3.400  14.569  < 2e-16 ***
## log(ppgdp)             3.177      0.316  10.056  < 2e-16 ***
## factor(group)other    -1.535      1.174  -1.308    0.193    
## factor(group)africa  -12.170      1.557  -7.814 3.35e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.109 on 195 degrees of freedom
## Multiple R-squared:  0.7492, Adjusted R-squared:  0.7453 
## F-statistic: 194.1 on 3 and 195 DF,  p-value: < 2.2e-16
anova(un11_lm2, un11_lm3)
## Analysis of Variance Table
## 
## Model 1: lifeExpF ~ log(ppgdp) + factor(group) + log(ppgdp) * factor(group)
## Model 2: lifeExpF ~ log(ppgdp) + factor(group)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    193 5077.7                           
## 2    195 5090.4 -2   -12.675 0.2409 0.7862
  • Diagnostics plots. Diagnostic plots are automatically produced by applying the plot() function directly to the output from lm(). In general, this command will produce one plot at a time, & hitting ‘Enter’ button will generate the next plot, if you’re using the console. But if using the R-Notebook, all the plots can be viewed siimultaneously and you can use the left and right arrow buttons to view them, which is much more convenient (well, for me).
par(mfrow=c(2,2))
plot(un11_lm3, which=1:6)

*Out of curiosity, I wanted to see the model outcome of un11_lm2)

plot(un11_lm2, which = 1:6)

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
ggplot(data=UN11, aes(x=log(ppgdp), y=lifeExpF, colour=factor(group))) + stat_smooth(method=lm, fullrange=FALSE) + geom_point()

Apparently by default, the above model plots with an interaction.

Keep in mind that in this dataset, interaction is significant.

The QQplot has an option of specifying the formula.

fit = lm(UN11$lifeExpF ~ log(UN11$ppgdp) + factor(UN11$group))
fit
## 
## Call:
## lm(formula = UN11$lifeExpF ~ log(UN11$ppgdp) + factor(UN11$group))
## 
## Coefficients:
##              (Intercept)           log(UN11$ppgdp)  
##                   49.529                     3.177  
##  factor(UN11$group)other  factor(UN11$group)africa  
##                   -1.535                   -12.170

ggplot(UN11, aes(log(ppgdp), lifeExpF, group = group) + geom_point)

END