code 12.1

visit<-c(350,460,350,430,350,380,430,470,450,490,340,300,440,450,300)
fee<-c(5.5,7.5,8,8,6.8,7.5,4.5,6.4,7,5,7.2,7.9,5.9,5,7)
ad<-c(3.3,3.3,3,4.5,3,4,3,3.7,3.5,4,3.5,3.2,4,3.5,2.7)
region<-c("A","A","A","A","A","B","B","B","B","B","C","C","C","C","C")
interaction<-fee*ad
da<-c(1,1,1,1,1,0,0,0,0,0,0,0,0,0,0)
db<-c(0,0,0,0,0,1,1,1,1,1,0,0,0,0,0)

sale<-data.frame(visit, fee, ad, interaction, region, da, db)

sale
##    visit fee  ad interaction region da db
## 1    350 5.5 3.3       18.15      A  1  0
## 2    460 7.5 3.3       24.75      A  1  0
## 3    350 8.0 3.0       24.00      A  1  0
## 4    430 8.0 4.5       36.00      A  1  0
## 5    350 6.8 3.0       20.40      A  1  0
## 6    380 7.5 4.0       30.00      B  0  1
## 7    430 4.5 3.0       13.50      B  0  1
## 8    470 6.4 3.7       23.68      B  0  1
## 9    450 7.0 3.5       24.50      B  0  1
## 10   490 5.0 4.0       20.00      B  0  1
## 11   340 7.2 3.5       25.20      C  0  0
## 12   300 7.9 3.2       25.28      C  0  0
## 13   440 5.9 4.0       23.60      C  0  0
## 14   450 5.0 3.5       17.50      C  0  0
## 15   300 7.0 2.7       18.90      C  0  0

code 12.2

# code 12.1의 자료 사용

sale
##    visit fee  ad interaction region da db
## 1    350 5.5 3.3       18.15      A  1  0
## 2    460 7.5 3.3       24.75      A  1  0
## 3    350 8.0 3.0       24.00      A  1  0
## 4    430 8.0 4.5       36.00      A  1  0
## 5    350 6.8 3.0       20.40      A  1  0
## 6    380 7.5 4.0       30.00      B  0  1
## 7    430 4.5 3.0       13.50      B  0  1
## 8    470 6.4 3.7       23.68      B  0  1
## 9    450 7.0 3.5       24.50      B  0  1
## 10   490 5.0 4.0       20.00      B  0  1
## 11   340 7.2 3.5       25.20      C  0  0
## 12   300 7.9 3.2       25.28      C  0  0
## 13   440 5.9 4.0       23.60      C  0  0
## 14   450 5.0 3.5       17.50      C  0  0
## 15   300 7.0 2.7       18.90      C  0  0
cov(sale[,1:2])             #공분산행렬
##            visit        fee
## visit 4035.23810 -32.990476
## fee    -32.99048   1.372667
cov(sale[,1:2])*(nrow(sale)-1)  #공분산행렬, 제곱합
##            visit        fee
## visit 56493.3333 -461.86667
## fee    -461.8667   19.21733
summary(sale)
##      visit            fee              ad        interaction    region
##  Min.   :300.0   Min.   :4.500   Min.   :2.70   Min.   :13.50   A:5   
##  1st Qu.:350.0   1st Qu.:5.700   1st Qu.:3.10   1st Qu.:19.45   B:5   
##  Median :430.0   Median :7.000   Median :3.50   Median :23.68   C:5   
##  Mean   :399.3   Mean   :6.613   Mean   :3.48   Mean   :23.03         
##  3rd Qu.:450.0   3rd Qu.:7.500   3rd Qu.:3.85   3rd Qu.:24.98         
##  Max.   :490.0   Max.   :8.000   Max.   :4.50   Max.   :36.00         
##        da               db        
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000  
##  Mean   :0.3333   Mean   :0.3333  
##  3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000

code 12.3

# code 12.1의 자료 사용

result.12.3<-lm(visit ~ fee, data=sale)

aov(result.12.3)
## Call:
##    aov(formula = result.12.3)
## 
## Terms:
##                      fee Residuals
## Sum of Squares  11100.44  45392.90
## Deg. of Freedom        1        13
## 
## Residual standard error: 59.09113
## Estimated effects may be unbalanced
summary(aov(result.12.3))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## fee          1  11100   11100   3.179 0.0979 .
## Residuals   13  45393    3492                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(result.12.3)
## 
## Call:
## lm(formula = visit ~ fee, data = sale)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -90.040 -45.040   1.977  55.926  81.977 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   558.28      90.44   6.173 3.36e-05 ***
## fee           -24.03      13.48  -1.783   0.0979 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 59.09 on 13 degrees of freedom
## Multiple R-squared:  0.1965, Adjusted R-squared:  0.1347 
## F-statistic: 3.179 on 1 and 13 DF,  p-value: 0.09794
confint(result.12.3, level=0.95)
##                 2.5 %    97.5 %
## (Intercept) 362.89124 753.66326
## fee         -53.15468   5.08696
#각 회귀분석 모형 및 결과요약 객체. 필요한 결과물을 꺼내쓰면 된다.
ls(result.12.3)
##  [1] "assign"        "call"          "coefficients"  "df.residual"  
##  [5] "effects"       "fitted.values" "model"         "qr"           
##  [9] "rank"          "residuals"     "terms"         "xlevels"
ls(summary(result.12.3))
##  [1] "adj.r.squared" "aliased"       "call"          "coefficients" 
##  [5] "cov.unscaled"  "df"            "fstatistic"    "r.squared"    
##  [9] "residuals"     "sigma"         "terms"
#수정 r제곱
summary(result.12.3)$adj.r.squared
## [1] 0.1346827
#Root MSE
r.mse<-sqrt(sum(result.12.3$residuals^2)/(nrow(sale)-2))
r.mse
## [1] 59.09113
#종속변수 평균
d.mean<-mean(sale$visit)
d.mean
## [1] 399.3333
#Coeff. Var.
c.v<-r.mse/d.mean*100
c.v
## [1] 14.79744
#적합결여검정(Lack-of-fit test)
model.lm<-lm(visit ~ fee, data=sale)
model.factor<-lm(visit ~ as.factor(fee), data=sale)

anova(model.lm, model.factor)
## Analysis of Variance Table
## 
## Model 1: visit ~ fee
## Model 2: visit ~ as.factor(fee)
##   Res.Df   RSS Df Sum of Sq     F Pr(>F)
## 1     13 45393                          
## 2      4 18450  9     26943 0.649 0.7295

code 12.4

# code 12.1의 자료 사용

library(ggplot2)


sale$predict<-predict(result.12.3)

library(grid)

ggplot(sale, aes(x=fee, y=predict)) + geom_point(shape=3) + 
    stat_smooth(method=lm, se=FALSE) +
    geom_hline(yintercept=mean(sale$visit)) +
    geom_segment(aes(yend=mean(sale$visit)), xend=fee, color="red", size=1) +
    geom_segment(aes(yend=visit), xend=fee, linetype=3, size=1) +
    geom_point(data=sale, aes(x=fee, y=visit)) +
    annotate("text", parse=TRUE, x=5.9, y=410, label="hat(y[i]) - bar(y)", hjust=-.5) +
    annotate("text", parse=TRUE, x=5.9, y=430, label="y[i] - bar(y)", hjust=-.5) +
    annotate("segment", x=5.95, xend=5.95, y=mean(sale$visit), yend=416.4775, 
            arrow=arrow(ends="both", angle=45, length=unit(0.2, "cm")), color="blue") +
    annotate("segment", x=5.95, xend=5.95, y=415.4775, yend=440, 
            arrow=arrow(ends="both", angle=45, length=unit(0.2, "cm")), color="blue")

code 12.5

# code 12.1의 자료 사용

confint(result.12.3, level=0.9) #계수의 신뢰구간
##                   5 %       95 %
## (Intercept) 398.11218 718.442322
## fee         -47.90526  -0.162456
predict(result.12.3, interval="confidence", level=0.9)  #종속변수 평균의 신뢰구간
##         fit      lwr      upr
## 1  426.0910 388.1913 463.9907
## 2  378.0233 343.7005 412.3461
## 3  366.0064 323.2772 408.7355
## 4  366.0064 323.2772 408.7355
## 5  394.8470 367.4624 422.2316
## 6  378.0233 343.7005 412.3461
## 7  450.1249 392.8966 507.3532
## 8  404.4606 376.9652 431.9559
## 9  390.0402 361.4875 418.5929
## 10 438.1080 391.0625 485.1534
## 11 385.2335 354.8002 415.6668
## 12 368.4098 327.5021 409.3175
## 13 416.4775 384.5397 448.4152
## 14 438.1080 391.0625 485.1534
## 15 390.0402 361.4875 418.5929
predict(result.12.3, interval="prediction", level=0.9)  #종속변수 관측값의 신뢰구간
## Warning in predict.lm(result.12.3, interval = "prediction", level = 0.9): predictions on current data refer to _future_ responses
##         fit      lwr      upr
## 1  426.0910 314.7929 537.3891
## 2  378.0233 267.8919 488.1548
## 3  366.0064 252.9725 479.0403
## 4  366.0064 252.9725 479.0403
## 5  394.8470 286.6768 503.0172
## 6  378.0233 267.8919 488.1548
## 7  450.1249 330.8523 569.3975
## 8  404.4606 296.2623 512.6589
## 9  390.0402 281.5684 498.5121
## 10 438.1080 323.3728 552.8431
## 11 385.2335 276.2515 494.2154
## 12 368.4098 256.0518 480.7678
## 13 416.4775 307.0659 525.8891
## 14 438.1080 323.3728 552.8431
## 15 390.0402 281.5684 498.5121
ggplot(sale[1:15,], aes(x=fee, y=visit)) + geom_point() + stat_smooth(method=lm, level=0.90)

#좀 더 확실하게 그려보자
sale2<-sale[1:15,1:2]
sale2$mean.clm.l<-predict(result.12.3, interval="confidence", level=0.9)[,2]
sale2$mean.clm.u<-predict(result.12.3, interval="confidence", level=0.9)[,3]

sale2$cli.l<-predict(result.12.3, interval="prediction", level=0.9)[,2]
## Warning in predict.lm(result.12.3, interval = "prediction", level = 0.9): predictions on current data refer to _future_ responses
sale2$cli.u<-predict(result.12.3, interval="prediction", level=0.9)[,3]
## Warning in predict.lm(result.12.3, interval = "prediction", level = 0.9): predictions on current data refer to _future_ responses
ggplot(sale2, aes(x=fee, y=visit)) + geom_point() + stat_smooth(method=lm, level=0.90) +
    geom_point(data=sale2, aes(x=fee, y=mean.clm.l), shape=3) + 
    geom_point(data=sale2, aes(x=fee, y=mean.clm.u), shape=3) +
    geom_line(data=sale2, aes(x=fee, y=cli.l), linetype=5) + 
    geom_line(data=sale2, aes(x=fee, y=cli.u), linetype=5) 

# 독립변수가 10으로 주어졌을 때 예측치와 신뢰구간

sale3<-sale[,1:2]
sale3<-rbind(sale3, c(NA, 10))

result.12.5<-lm(visit ~ fee, data=sale3)

predict(result.12.5, newdata=data.frame(fee=sale3$fee))
##        1        2        3        4        5        6        7        8 
## 426.0910 378.0233 366.0064 366.0064 394.8470 378.0233 450.1249 404.4606 
##        9       10       11       12       13       14       15       16 
## 390.0402 438.1080 385.2335 368.4098 416.4775 438.1080 390.0402 317.9387
sale3$mean.clm.l<-predict(result.12.5, newdata=data.frame(fee=sale3$fee), interval="confidence", level=0.9)[,2]
sale3$mean.clm.u<-predict(result.12.5, newdata=data.frame(fee=sale3$fee), interval="confidence", level=0.9)[,3]

sale3$mean.cli.l<-predict(result.12.5, newdata=data.frame(fee=sale3$fee), interval="prediction", level=0.9)[,2]
sale3$mean.cli.u<-predict(result.12.5, newdata=data.frame(fee=sale3$fee), interval="prediction", level=0.9)[,3]

sale3
##    visit  fee mean.clm.l mean.clm.u mean.cli.l mean.cli.u
## 1    350  5.5   388.1913   463.9907   314.7929   537.3891
## 2    460  7.5   343.7005   412.3461   267.8919   488.1548
## 3    350  8.0   323.2772   408.7355   252.9725   479.0403
## 4    430  8.0   323.2772   408.7355   252.9725   479.0403
## 5    350  6.8   367.4624   422.2316   286.6768   503.0172
## 6    380  7.5   343.7005   412.3461   267.8919   488.1548
## 7    430  4.5   392.8966   507.3532   330.8523   569.3975
## 8    470  6.4   376.9652   431.9559   296.2623   512.6589
## 9    450  7.0   361.4875   418.5929   281.5684   498.5121
## 10   490  5.0   391.0625   485.1534   323.3728   552.8431
## 11   340  7.2   354.8002   415.6668   276.2515   494.2154
## 12   300  7.9   327.5021   409.3175   256.0518   480.7678
## 13   440  5.9   384.5397   448.4152   307.0659   525.8891
## 14   450  5.0   391.0625   485.1534   323.3728   552.8431
## 15   300  7.0   361.4875   418.5929   281.5684   498.5121
## 16    NA 10.0   232.6985   403.1789   182.9692   452.9082