Regression Models

P.I.N.Kehelbedda, Department of Statistics and Computer Science, Faculty of Science, University of Peradeniya

2020-09-28

1:

library(UsingR)
library(reshape)
library(manipulate)

data("galton")
str(galton)
## 'data.frame':    928 obs. of  2 variables:
##  $ child : num  61.7 61.7 61.7 61.7 61.7 62.2 62.2 62.2 62.2 62.2 ...
##  $ parent: num  70.5 68.5 65.5 64.5 64 67.5 67.5 67.5 66.5 66.5 ...
head(galton)
##   child parent
## 1  61.7   70.5
## 2  61.7   68.5
## 3  61.7   65.5
## 4  61.7   64.5
## 5  61.7   64.0
## 6  62.2   67.5
hist(galton$child)

hist(galton$parent)

# We are unable to compare two variable(child & parent) one-by-one
# So, we need to melt the data set,
long <- melt(galton)
str(long)
## 'data.frame':    1856 obs. of  2 variables:
##  $ variable: Factor w/ 2 levels "child","parent": 1 1 1 1 1 1 1 1 1 1 ...
##  $ value   : num  61.7 61.7 61.7 61.7 61.7 62.2 62.2 62.2 62.2 62.2 ...
head(long)
##   variable value
## 1    child  61.7
## 2    child  61.7
## 3    child  61.7
## 4    child  61.7
## 5    child  61.7
## 6    child  62.2
dim(galton)
## [1] 928   2
dim(long)
## [1] 1856    2
# Plotting
g <- ggplot(long, aes(x = value, fill = variable))
g <- g + geom_histogram(colour = "black", binwidth = 1)
g <- g + facet_grid(. ~ variable)
g

# Scatter plot
library(dplyr)
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
freqData$child <- as.numeric(as.character(freqData$child))
freqData$parent <- as.numeric(as.character(freqData$parent))
g <- ggplot(filter(freqData, freq > 0), aes(x = parent, y = child))
g <- g  + scale_size(range = c(2, 20), guide = "none" )
g <- g + geom_point(colour="grey50", aes(size = freq+20, show_guide = FALSE))
g <- g + geom_point(aes(colour=freq, size = freq))
g <- g + scale_colour_gradient(low = "lightblue", high="white")                    
g

# Model fitting
library(manipulate)
y <- galton$child - mean(galton$child)
x <- galton$parent - mean(galton$parent)
freqData <- as.data.frame(table(x, y))
names(freqData) <- c("child", "parent", "freq")
freqData$child <- as.numeric(as.character(freqData$child))
freqData$parent <- as.numeric(as.character(freqData$parent))
myPlot <- function(beta){
      g <- ggplot(filter(freqData, freq > 0), aes(x = parent, y = child))
      g <- g  + scale_size(range = c(2, 20), guide = "none" )
      g <- g + geom_point(colour="grey50", aes(size = freq+20, show_guide = FALSE))
      g <- g + geom_point(aes(colour=freq, size = freq))
      g <- g + scale_colour_gradient(low = "lightblue", high="white")                     
      g <- g + geom_abline(intercept = 0, slope = beta, size = 3)
      mse <- mean( (y - beta * x) ^2 )
      g <- g + ggtitle(paste("beta = ", beta, "mse = ", round(mse, 3)))
      g
}


lm(I(child - mean(child)) ~ I(parent - mean(parent)) - 1, data = galton)
## 
## Call:
## lm(formula = I(child - mean(child)) ~ I(parent - mean(parent)) - 
##     1, data = galton)
## 
## Coefficients:
## I(parent - mean(parent))  
##                   0.6463
freqData <- as.data.frame(table(galton$child, galton$parent))
names(freqData) <- c("child", "parent", "freq")
freqData$child <- as.numeric(as.character(freqData$child))
freqData$parent <- as.numeric(as.character(freqData$parent))
g <- ggplot(filter(freqData, freq > 0), aes(x = parent, y = child))
g <- g  + scale_size(range = c(2, 20), guide = "none" )
g <- g + geom_point(colour="grey50", aes(size = freq+20, show_guide = FALSE))
g <- g + geom_point(aes(colour=freq, size = freq))
g <- g + scale_colour_gradient(low = "lightblue", high="white")                    
lm1 <- lm(galton$child ~ galton$parent)
g <- g + geom_abline(intercept = coef(lm1)[1], slope = coef(lm1)[2], size = 3, colour = grey(.5))
g

lmd1 <- lm(child ~ parent, data = galton)
par(mfrow = c(2, 2))
plot(lmd1)

summary(lmd1)
## 
## Call:
## lm(formula = child ~ parent, data = galton)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8050 -1.3661  0.0487  1.6339  5.9264 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 23.94153    2.81088   8.517   <2e-16 ***
## parent       0.64629    0.04114  15.711   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.239 on 926 degrees of freedom
## Multiple R-squared:  0.2105, Adjusted R-squared:  0.2096 
## F-statistic: 246.8 on 1 and 926 DF,  p-value: < 2.2e-16
# Ordering
x <- rnorm(100)
y <- rnorm(100)
odr <- order(x)   # Ordering data
x[odr[100]]       # Maximum
## [1] 2.71211
x[odr[1]]         # Minimum
## [1] -2.175858
y[odr[100]]
## [1] -1.265532
y[odr[1]]
## [1] -0.8484868
# RTTM
library(UsingR)
data(father.son)
y <- (father.son$sheight - mean(father.son$sheight)) / sd(father.son$sheight)
x <- (father.son$fheight - mean(father.son$fheight)) / sd(father.son$fheight)
rho <- cor(x, y)
library(ggplot2)
g <- ggplot(data.frame(x = x, y = y), aes(x = x, y = y))
g <- g + geom_point(size = 6, colour = "black", alpha = 0.2)
g <- g + geom_point(size = 4, colour = "salmon", alpha = 0.2)
g <- g + xlim(-4, 4) + ylim(-4, 4)
g <- g + geom_abline(intercept = 0, slope = 1)
g <- g + geom_vline(xintercept = 0)
g <- g + geom_hline(yintercept = 0)
g <- g + geom_abline(intercept = 0, slope = rho, size = 2)
g <- g + geom_abline(intercept = 0, slope = 1 / rho, size = 2)
g

g <- ggplot(data.frame(x, y), aes(x = x, y = y))
g <- g + geom_point(size = 5, alpha = .2, colour = "black")
g <- g + geom_point(size = 4, alpha = .2, colour = "red")
g <- g + geom_vline(xintercept = 0)
g <- g + geom_hline(yintercept = 0)
g <- g + geom_abline(position = "identity")
g <- g + geom_abline(intercept = 0, slope = rho, size = 2)
g <- g + geom_abline(intercept = 0, slope = 1 / rho, size = 2)
g <- g + xlab("Father's height, normalized")
g <- g + ylab("Son's height, normalized")
g

# Middle (MSE)
myHist <- function(mu){
      g <- ggplot(galton, aes(x = child))
      g <- g + geom_histogram(fill = "green", 
                              binwidth = 1, 
                              aes(y = ..density..), 
                              color = "black")
      g <- g + geom_density(size = 2)
      g <- g + geom_vline(xintercept = mu, size = 2)
      mse <- round(mean((galton$child - mu)^2), 3)
      g <- g + labs(title = paste('mu = ', mu,  ' MSE = ', mse))
      g
}



# Manipulate work or not ?
# manipulate(
#      {
#            if(resetSeed)
#                  set.seed(sample(1:1000))
#            
#            hist(rnorm(n=100, mean=0, sd=3), breaks=bins)
#      },
#      bins = slider(1, 20, step=1, initial =5, label="Bins"),
#      resetSeed = button("Reset Seed")
#)

# manipulate(myHist(mu), mu = slider(62, 74, step = 0.5))
# manipulate(myPlot(beta), beta = slider(0.6, 1.2, step = 0.02))

2:

library(UsingR)

data("diamond")
str(diamond)
## 'data.frame':    48 obs. of  2 variables:
##  $ carat: num  0.17 0.16 0.17 0.18 0.25 0.16 0.15 0.19 0.21 0.15 ...
##  $ price: int  355 328 350 325 642 342 322 485 483 323 ...
head(diamond)
##   carat price
## 1  0.17   355
## 2  0.16   328
## 3  0.17   350
## 4  0.18   325
## 5  0.25   642
## 6  0.16   342
g <- ggplot(diamond, aes(x = carat, y = price))
g <- g + geom_point(size = 5, col = "black", alpha = .2)
g <- g + geom_smooth(method = "lm", col = "red")
g

# Fitting
fit1 <- lm(price ~ carat, diamond)
summary(fit1)
## 
## Call:
## lm(formula = price ~ carat, data = diamond)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -85.159 -21.448  -0.869  18.972  79.370 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -259.63      17.32  -14.99   <2e-16 ***
## carat        3721.02      81.79   45.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.84 on 46 degrees of freedom
## Multiple R-squared:  0.9783, Adjusted R-squared:  0.9778 
## F-statistic:  2070 on 1 and 46 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(fit1)

#     We estimate an expect 3721.02 (SIN $) increase in price 
#     for every carat increase in mass of diamond

fit2 <- lm(price ~ I(carat - mean(carat)), data = diamond)
summary(fit2)
## 
## Call:
## lm(formula = price ~ I(carat - mean(carat)), data = diamond)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -85.159 -21.448  -0.869  18.972  79.370 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             500.083      4.596   108.8   <2e-16 ***
## I(carat - mean(carat)) 3721.025     81.786    45.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.84 on 46 degrees of freedom
## Multiple R-squared:  0.9783, Adjusted R-squared:  0.9778 
## F-statistic:  2070 on 1 and 46 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(fit2)

fit3 <- lm(price ~ I(carat * 10), data = diamond)
summary(fit3)
## 
## Call:
## lm(formula = price ~ I(carat * 10), data = diamond)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -85.159 -21.448  -0.869  18.972  79.370 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -259.626     17.319  -14.99   <2e-16 ***
## I(carat * 10)  372.102      8.179   45.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.84 on 46 degrees of freedom
## Multiple R-squared:  0.9783, Adjusted R-squared:  0.9778 
## F-statistic:  2070 on 1 and 46 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(fit3)

# Predict
predict(fit1, newdata = data.frame(carat = c(0.16, 0.27, 0.34)))
##         1         2         3 
##  335.7381  745.0508 1005.5225
predict(fit2, newdata = data.frame(carat = c(0.16, 0.27, 0.34)))
##        1        2        3 
## 140.3843 549.6970 810.1687
predict(fit3, newdata = data.frame(carat = c(0.16, 0.27, 0.34)))
##         1         2         3 
##  335.7381  745.0508 1005.5225
# Residuals
data("diamond")
y <- diamond$price
x <- diamond$carat
fit <- lm(y ~ x)
e <- resid(fit)
yhat <- predict(fit)
max(abs(e - (y - yhat)))
## [1] 9.485746e-13
plot(x = x, y = y)
abline(fit, lwd = 2, col = "red")

for(i in 1:length(y)){
      lines(c(x[i], x[i]), c(y[i], yhat[i]), lwd = 2, col = "blue")
}

plot(x, e)
abline(h = 0, col = "red")
for(i in 1:length(y)){
      lines(c(x[i], x[i]), c(e[i], 0), lwd = 2, col = "blue")
}

3:

# Estimation
n = 100; x = rnorm(n); x2 = rnorm(n); x3 = rnorm(n)
y = 1 + x + x2 + x3 + rnorm(n, sd = .1)
ey = resid(lm(y ~ x2 + x3))
ex = resid(lm(x ~ x2 + x3))
sum(ey * ex) / sum(ex ^ 2)
## [1] 0.9867069
coef(lm(ey ~ ex - 1))
##        ex 
## 0.9867069
coef(lm(y ~ x + x2 + x3))
## (Intercept)           x          x2          x3 
##   1.0080901   0.9867069   1.0051380   0.9993590
## Example 
library(datasets) 
data(swiss)
head(swiss)
##              Fertility Agriculture Examination Education Catholic
## Courtelary        80.2        17.0          15        12     9.96
## Delemont          83.1        45.1           6         9    84.84
## Franches-Mnt      92.5        39.7           5         5    93.40
## Moutier           85.8        36.5          12         7    33.77
## Neuveville        76.9        43.5          17        15     5.16
## Porrentruy        76.1        35.3           9         7    90.57
##              Infant.Mortality
## Courtelary               22.2
## Delemont                 22.2
## Franches-Mnt             20.2
## Moutier                  20.3
## Neuveville               20.6
## Porrentruy               26.6
str(swiss)
## 'data.frame':    47 obs. of  6 variables:
##  $ Fertility       : num  80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
##  $ Agriculture     : num  17 45.1 39.7 36.5 43.5 35.3 70.2 67.8 53.3 45.2 ...
##  $ Examination     : int  15 6 5 12 17 9 16 14 12 16 ...
##  $ Education       : int  12 9 5 7 15 7 7 8 7 13 ...
##  $ Catholic        : num  9.96 84.84 93.4 33.77 5.16 ...
##  $ Infant.Mortality: num  22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ...
library(GGally)
library(ggplot2)
g <- ggpairs(data = swiss, lower = list(continuous = "smooth"))   # pairs()
g

# Fitting
summary(lm(Fertility ~ . , data = swiss))$coefficients
##                    Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept)      66.9151817 10.70603759  6.250229 1.906051e-07
## Agriculture      -0.1721140  0.07030392 -2.448142 1.872715e-02
## Examination      -0.2580082  0.25387820 -1.016268 3.154617e-01
## Education        -0.8709401  0.18302860 -4.758492 2.430605e-05
## Catholic          0.1041153  0.03525785  2.952969 5.190079e-03
## Infant.Mortality  1.0770481  0.38171965  2.821568 7.335715e-03
summary(lm(Fertility ~ Agriculture, data = swiss))$coefficients
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) 60.3043752 4.25125562 14.185074 3.216304e-18
## Agriculture  0.1942017 0.07671176  2.531577 1.491720e-02
# Try a simulation
n <- 100; x2 <- 1 : n; x1 <- .01 * x2 + runif(n, -.1, .1); y = -x1 + x2 + rnorm(n, sd = .01)
summary(lm(y ~ x1))$coef
##             Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)  1.80534   1.101669  1.638731 1.044759e-01
## x1          94.67050   1.870753 50.605547 4.706694e-72
summary(lm(y ~ x1 + x2))$coef
##                  Estimate   Std. Error      t value      Pr(>|t|)
## (Intercept)  4.193221e-05 0.0016687774    0.0251275  9.800049e-01
## x1          -9.856695e-01 0.0147084902  -67.0136419  4.813523e-83
## x2           9.998259e-01 0.0001509347 6624.2293238 4.127510e-276
dat = data.frame(y = y, x1 = x1, x2 = x2, ey = resid(lm(y ~ x2)), ex1 = resid(lm(x1 ~ x2)))
library(ggplot2)
g = ggplot(dat, aes(y = y, x = x1, colour = x2))
g = g + geom_point(colour="grey50", size = 5) + geom_smooth(method = lm, se = FALSE, colour = "black") 
g = g + geom_point(size = 4) 
g

g2 = ggplot(dat, aes(y = ey, x = ex1, colour = x2))  
g2 = g2 + geom_point(colour="grey50", size = 5) + geom_smooth(method = lm, se = FALSE, colour = "black") + geom_point(size = 4) 
g2

z <- swiss$Agriculture + swiss$Education
lm(Fertility ~ . + z, data = swiss)
## 
## Call:
## lm(formula = Fertility ~ . + z, data = swiss)
## 
## Coefficients:
##      (Intercept)       Agriculture       Examination         Education  
##          66.9152           -0.1721           -0.2580           -0.8709  
##         Catholic  Infant.Mortality                 z  
##           0.1041            1.0770                NA
require(datasets);data(InsectSprays); require(stats); require(ggplot2)
g = ggplot(data = InsectSprays, aes(y = count, x = spray, fill  = spray))
g = g + geom_violin(colour = "black", size = 2)
g = g + xlab("Type of spray") + ylab("Insect count")
g

summary(lm(count ~ spray, data = InsectSprays))$coef
##                Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)  14.5000000   1.132156 12.8074279 1.470512e-19
## sprayB        0.8333333   1.601110  0.5204724 6.044761e-01
## sprayC      -12.4166667   1.601110 -7.7550382 7.266893e-11
## sprayD       -9.5833333   1.601110 -5.9854322 9.816910e-08
## sprayE      -11.0000000   1.601110 -6.8702352 2.753922e-09
## sprayF        2.1666667   1.601110  1.3532281 1.805998e-01
summary(lm(count ~ 
                 I(1 * (spray == 'B')) + I(1 * (spray == 'C')) +  
                 I(1 * (spray == 'D')) + I(1 * (spray == 'E')) +
                 I(1 * (spray == 'F')) + I(1 * (spray == 'A')), data = InsectSprays))$coef
##                          Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)            14.5000000   1.132156 12.8074279 1.470512e-19
## I(1 * (spray == "B"))   0.8333333   1.601110  0.5204724 6.044761e-01
## I(1 * (spray == "C")) -12.4166667   1.601110 -7.7550382 7.266893e-11
## I(1 * (spray == "D"))  -9.5833333   1.601110 -5.9854322 9.816910e-08
## I(1 * (spray == "E")) -11.0000000   1.601110 -6.8702352 2.753922e-09
## I(1 * (spray == "F"))   2.1666667   1.601110  1.3532281 1.805998e-01
summary(lm(count ~ spray - 1, data = InsectSprays))$coef
##         Estimate Std. Error   t value     Pr(>|t|)
## sprayA 14.500000   1.132156 12.807428 1.470512e-19
## sprayB 15.333333   1.132156 13.543487 1.001994e-20
## sprayC  2.083333   1.132156  1.840148 7.024334e-02
## sprayD  4.916667   1.132156  4.342749 4.953047e-05
## sprayE  3.500000   1.132156  3.091448 2.916794e-03
## sprayF 16.666667   1.132156 14.721181 1.573471e-22
library(dplyr)
summarise(group_by(InsectSprays, spray), mn = mean(count))
## # A tibble: 6 x 2
##   spray    mn
##   <fct> <dbl>
## 1 A     14.5 
## 2 B     15.3 
## 3 C      2.08
## 4 D      4.92
## 5 E      3.5 
## 6 F     16.7
spray2 <- relevel(InsectSprays$spray, "C")
summary(lm(count ~ spray2, data = InsectSprays))$coef
##              Estimate Std. Error  t value     Pr(>|t|)
## (Intercept)  2.083333   1.132156 1.840148 7.024334e-02
## spray2A     12.416667   1.601110 7.755038 7.266893e-11
## spray2B     13.250000   1.601110 8.275511 8.509776e-12
## spray2D      2.833333   1.601110 1.769606 8.141205e-02
## spray2E      1.416667   1.601110 0.884803 3.794750e-01
## spray2F     14.583333   1.601110 9.108266 2.794343e-13
swiss = mutate(swiss, CatholicBin = 1 * (Catholic > 50))
g = ggplot(swiss, aes(x = Agriculture, y = Fertility, colour = factor(CatholicBin)))
g = g + geom_point(size = 6, colour = "black") + geom_point(size = 4)
g = g + xlab("% in Agriculture") + ylab("Fertility")
g

fit = lm(Fertility ~ Agriculture, data = swiss)
g1 = g
g1 = g1 + geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], size = 2)
g1

summary(lm(Fertility ~ Agriculture + factor(CatholicBin), data = swiss))$coef
##                        Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)          60.8322366  4.1058630 14.815944 1.032493e-18
## Agriculture           0.1241776  0.0810977  1.531210 1.328763e-01
## factor(CatholicBin)1  7.8843292  3.7483622  2.103406 4.118221e-02
fit = lm(Fertility ~ Agriculture + factor(CatholicBin), data = swiss)
g1 = g
g1 = g1 + geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], size = 2)
g1 = g1 + geom_abline(intercept = coef(fit)[1] + coef(fit)[3], slope = coef(fit)[2], size = 2)
g1

fit = lm(Fertility ~ Agriculture * factor(CatholicBin), data = swiss)
g1 = g
g1 = g1 + geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], size = 2)
g1 = g1 + geom_abline(intercept = coef(fit)[1] + coef(fit)[3], 
                      slope = coef(fit)[2] + coef(fit)[4], size = 2)
g1

## Adjustment
# Simulation 1
n <- 100
t <- rep(c(0, 1), c(n/2, n/2))
x <- c(runif(n/2), runif(n/2))
beta0 <- 0 
beta1 <- 2
tau <- 1
sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)

# Simulation 2
n <- 100
t <- rep(c(0, 1), c(n/2, n/2))
x <- c(runif(n/2), 1.5 + runif(n/2))
beta0 <- 0
beta1 <- 2
tau <- 0
sigma <- .2
y <- beta0 + x * beta1 + t * tau + rnorm(n, sd = sigma)
plot(x, y, type = "n", frame = FALSE)
abline(lm(y ~ x), lwd = 2)
abline(h = mean(y[1 : (n/2)]), lwd = 3)
abline(h = mean(y[(n/2 + 1) : n]), lwd = 3)
fit <- lm(y ~ x + t)
abline(coef(fit)[1], coef(fit)[2], lwd = 3)
abline(coef(fit)[1] + coef(fit)[3], coef(fit)[2], lwd = 3)
points(x[1 : (n/2)], y[1 : (n/2)], pch = 21, col = "black", bg = "lightblue", cex = 2)
points(x[(n/2 + 1) : n], y[(n/2 + 1) : n], pch = 21, col = "black", bg = "salmon", cex = 2)