StarbucksPrepaid=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/StarbucksPrepaid.csv")
#Check the first three rows of the data
head(StarbucksPrepaid,3)
##   Amount Age Days Cups Income
## 1      5  25    4    1     20
## 2     25  30   12    5     35
## 3     10  27   10    4     30
#Scatter plot matrix
library(car)
## Loading required package: carData
scatterplotMatrix(StarbucksPrepaid,col="blue",main="ScatterplotMatrix")

cor(StarbucksPrepaid)
##           Amount        Age       Days      Cups    Income
## Amount 1.0000000 0.21514123 0.40686371 0.2862270 0.8500323
## Age    0.2151412 1.00000000 0.03745681 0.2682888 0.1778494
## Days   0.4068637 0.03745681 1.00000000 0.5876009 0.3054375
## Cups   0.2862270 0.26828876 0.58760095 1.0000000 0.1594511
## Income 0.8500323 0.17784941 0.30543755 0.1594511 1.0000000
hist(StarbucksPrepaid$Amount,breaks = 20)

#model fit
StarbucksPrepaid_Fit=lm(formula = Amount ~ Age + Days +Cups +Income, data = StarbucksPrepaid)
library(lm.beta)
OPBeta = lm.beta(StarbucksPrepaid_Fit)
summary(OPBeta)
## 
## Call:
## lm(formula = Amount ~ Age + Days + Cups + Income, data = StarbucksPrepaid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.874  -9.947  -2.351   7.332  56.872 
## 
## Coefficients:
##              Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept) -83.82574      0.00000   22.49445  -3.727  0.00133 ** 
## Age           0.23693      0.04878    0.57591   0.411  0.68515    
## Days          1.18966      0.11697    1.47393   0.807  0.42909    
## Cups          1.42161      0.07793    2.63105   0.540  0.59494    
## Income        2.40654      0.79320    0.35972   6.690 1.64e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.15 on 20 degrees of freedom
## Multiple R-squared:  0.7546, Adjusted R-squared:  0.7056 
## F-statistic: 15.38 on 4 and 20 DF,  p-value: 6.758e-06
#VIF 
library(car)
vif(StarbucksPrepaid_Fit)
##      Age     Days     Cups   Income 
## 1.146044 1.711952 1.695650 1.145870
#model fit
StarbucksPrepaid_Fit_1=lm(formula = Amount ~ Age +Cups +Income, data = StarbucksPrepaid)
library(car)
vif(StarbucksPrepaid_Fit_1)
##      Age     Cups   Income 
## 1.099746 1.092743 1.047212
library(lm.beta)
OPBeta = lm.beta(StarbucksPrepaid_Fit_1)
summary(OPBeta)
## 
## Call:
## lm(formula = Amount ~ Age + Cups + Income, data = StarbucksPrepaid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.806 -12.493  -1.030   6.862  58.207 
## 
## Coefficients:
##              Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept) -76.72545      0.00000   20.53046  -3.737  0.00122 ** 
## Age           0.14350      0.02955    0.55945   0.257  0.80006    
## Cups          2.68789      0.14735    2.09452   1.283  0.21337    
## Income        2.49174      0.82128    0.34102   7.307 3.41e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.96 on 21 degrees of freedom
## Multiple R-squared:  0.7466, Adjusted R-squared:  0.7105 
## F-statistic: 20.63 on 3 and 21 DF,  p-value: 1.82e-06
#model fit
StarbucksPrepaid_Fit_2=lm(formula = Amount ~ Cups +Income, data = StarbucksPrepaid)
StarbucksPrepaid_Fit_2
## 
## Call:
## lm(formula = Amount ~ Cups + Income, data = StarbucksPrepaid)
## 
## Coefficients:
## (Intercept)         Cups       Income  
##     -73.076        2.821        2.504
library(car)
vif(StarbucksPrepaid_Fit_2)
##     Cups   Income 
## 1.026088 1.026088
# residual analyses
plot(StarbucksPrepaid_Fit_2)

Starbucks_Q2=StarbucksPrepaid[,-1]
#Scatter plot matrix
library(car)
scatterplotMatrix(Starbucks_Q2,col="blue",main="ScatterplotMatrix")

hist(Starbucks_Q2$Days,breaks = 10)

#model fit
StarbucksDays_Fit=lm(formula = Days ~ Age+ Cups +Income, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit)
summary(OPBeta)
## 
## Call:
## lm(formula = Days ~ Age + Cups + Income, data = Starbucks_Q2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7442 -2.5016 -0.2539  2.4524  6.9865 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)   
## (Intercept)  5.96836      0.00000    3.06510   1.947  0.06501 . 
## Age         -0.07853     -0.16445    0.08352  -0.940  0.35777   
## Cups         1.06441      0.59344    0.31270   3.404  0.00267 **
## Income       0.07161      0.24006    0.05091   1.407  0.17418   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.279 on 21 degrees of freedom
## Multiple R-squared:  0.4159, Adjusted R-squared:  0.3324 
## F-statistic: 4.984 on 3 and 21 DF,  p-value: 0.009129
library(car)
vif(StarbucksDays_Fit)
##      Age     Cups   Income 
## 1.099746 1.092743 1.047212
#model fit
StarbucksDays_Fit_2=lm(formula = Days ~ Cups +Income, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit_2)
summary(OPBeta)
## 
## Call:
## lm(formula = Days ~ Cups + Income, data = Starbucks_Q2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1987 -2.5260 -0.1873  2.4608  6.4772 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)   
## (Intercept)  3.97135      0.00000    2.20414   1.802  0.08530 . 
## Cups         0.99179      0.55296    0.30222   3.282  0.00341 **
## Income       0.06481      0.21727    0.05026   1.289  0.21063   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.27 on 22 degrees of freedom
## Multiple R-squared:  0.3913, Adjusted R-squared:  0.3359 
## F-statistic: 7.071 on 2 and 22 DF,  p-value: 0.004252
vif(StarbucksDays_Fit_2)
##     Cups   Income 
## 1.026088 1.026088
#model fit
StarbucksDays_Fit_3=lm(formula = Days ~ Cups, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit_3)
summary(OPBeta)
## 
## Call:
## lm(formula = Days ~ Cups, data = Starbucks_Q2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3502 -3.2423  0.6498  2.7577  6.6498 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept)   6.0806       0.0000     1.4985   4.058 0.000487 ***
## Cups          1.0539       0.5876     0.3026   3.483 0.002011 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.317 on 23 degrees of freedom
## Multiple R-squared:  0.3453, Adjusted R-squared:  0.3168 
## F-statistic: 12.13 on 1 and 23 DF,  p-value: 0.002011
StarbucksGrowth=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/StarbucksGrowth.csv")
#Check the first three rows of the data
head(StarbucksGrowth,3)
##   Year Revenue Stores Drinks AveWeekEarnings
## 1    1     400    676     15             386
## 2    2     700   1015     15             394
## 3    3    1000   1412     18             407
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(StarbucksGrowth)

#model fit
StarbucksGrowth_Fit=lm(formula = Revenue ~ Stores+Drinks+AveWeekEarnings, data = StarbucksGrowth)
library(lm.beta)
OPBeta = lm.beta(StarbucksGrowth_Fit)
summary(OPBeta)
## 
## Call:
## lm(formula = Revenue ~ Stores + Drinks + AveWeekEarnings, data = StarbucksGrowth)
## 
## Residuals:
##        1        2        3        4        5        6        7 
##  -3.6994  -6.6557  22.5877 -15.8762   3.9080  -0.1463  -0.1180 
## 
## Coefficients:
##                   Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept)     -1.350e+04    0.000e+00  9.462e+02 -14.268 0.000746 ***
## Stores          -2.642e-02   -4.689e-02  2.777e-02  -0.952 0.411447    
## Drinks          -7.520e+01   -5.830e-01  1.007e+01  -7.468 0.004972 ** 
## AveWeekEarnings  3.899e+01    1.615e+00  2.847e+00  13.696 0.000842 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.69 on 3 degrees of freedom
## Multiple R-squared:  0.9998, Adjusted R-squared:  0.9996 
## F-statistic:  4539 on 3 and 3 DF,  p-value: 5.549e-06
vif(StarbucksGrowth_Fit)
##          Stores          Drinks AveWeekEarnings 
##        33.06382        83.01337       189.28671
#model fit
StarbucksGrowth_Fit_2=lm(formula = Revenue ~ Drinks+AveWeekEarnings, data = StarbucksGrowth)
library(lm.beta)
OPBeta = lm.beta(StarbucksGrowth_Fit_2)
summary(OPBeta)
## 
## Call:
## lm(formula = Revenue ~ Drinks + AveWeekEarnings, data = StarbucksGrowth)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -11.780  -3.966  24.408 -12.110   5.621   7.773  -9.946 
## 
## Coefficients:
##                   Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept)     -12670.302        0.000    362.794  -34.92 4.01e-06 ***
## Drinks             -67.725       -0.525      6.222  -10.88 0.000404 ***
## AveWeekEarnings     36.523        1.512      1.165   31.36 6.17e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.49 on 4 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9996 
## F-statistic:  6973 on 2 and 4 DF,  p-value: 8.223e-08