StarbucksPrepaid=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/StarbucksPrepaid.csv")
#Check the first three rows of the data
head(StarbucksPrepaid,3)
## Amount Age Days Cups Income
## 1 5 25 4 1 20
## 2 25 30 12 5 35
## 3 10 27 10 4 30
#Scatter plot matrix
library(car)
## Loading required package: carData
scatterplotMatrix(StarbucksPrepaid,col="blue",main="ScatterplotMatrix")

cor(StarbucksPrepaid)
## Amount Age Days Cups Income
## Amount 1.0000000 0.21514123 0.40686371 0.2862270 0.8500323
## Age 0.2151412 1.00000000 0.03745681 0.2682888 0.1778494
## Days 0.4068637 0.03745681 1.00000000 0.5876009 0.3054375
## Cups 0.2862270 0.26828876 0.58760095 1.0000000 0.1594511
## Income 0.8500323 0.17784941 0.30543755 0.1594511 1.0000000
hist(StarbucksPrepaid$Amount,breaks = 20)

#model fit
StarbucksPrepaid_Fit=lm(formula = Amount ~ Age + Days +Cups +Income, data = StarbucksPrepaid)
library(lm.beta)
OPBeta = lm.beta(StarbucksPrepaid_Fit)
summary(OPBeta)
##
## Call:
## lm(formula = Amount ~ Age + Days + Cups + Income, data = StarbucksPrepaid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.874 -9.947 -2.351 7.332 56.872
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) -83.82574 0.00000 22.49445 -3.727 0.00133 **
## Age 0.23693 0.04878 0.57591 0.411 0.68515
## Days 1.18966 0.11697 1.47393 0.807 0.42909
## Cups 1.42161 0.07793 2.63105 0.540 0.59494
## Income 2.40654 0.79320 0.35972 6.690 1.64e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.15 on 20 degrees of freedom
## Multiple R-squared: 0.7546, Adjusted R-squared: 0.7056
## F-statistic: 15.38 on 4 and 20 DF, p-value: 6.758e-06
#VIF
library(car)
vif(StarbucksPrepaid_Fit)
## Age Days Cups Income
## 1.146044 1.711952 1.695650 1.145870
#model fit
StarbucksPrepaid_Fit_1=lm(formula = Amount ~ Age +Cups +Income, data = StarbucksPrepaid)
library(car)
vif(StarbucksPrepaid_Fit_1)
## Age Cups Income
## 1.099746 1.092743 1.047212
library(lm.beta)
OPBeta = lm.beta(StarbucksPrepaid_Fit_1)
summary(OPBeta)
##
## Call:
## lm(formula = Amount ~ Age + Cups + Income, data = StarbucksPrepaid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.806 -12.493 -1.030 6.862 58.207
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) -76.72545 0.00000 20.53046 -3.737 0.00122 **
## Age 0.14350 0.02955 0.55945 0.257 0.80006
## Cups 2.68789 0.14735 2.09452 1.283 0.21337
## Income 2.49174 0.82128 0.34102 7.307 3.41e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.96 on 21 degrees of freedom
## Multiple R-squared: 0.7466, Adjusted R-squared: 0.7105
## F-statistic: 20.63 on 3 and 21 DF, p-value: 1.82e-06
#model fit
StarbucksPrepaid_Fit_2=lm(formula = Amount ~ Cups +Income, data = StarbucksPrepaid)
StarbucksPrepaid_Fit_2
##
## Call:
## lm(formula = Amount ~ Cups + Income, data = StarbucksPrepaid)
##
## Coefficients:
## (Intercept) Cups Income
## -73.076 2.821 2.504
library(car)
vif(StarbucksPrepaid_Fit_2)
## Cups Income
## 1.026088 1.026088
# residual analyses
plot(StarbucksPrepaid_Fit_2)




Starbucks_Q2=StarbucksPrepaid[,-1]
#Scatter plot matrix
library(car)
scatterplotMatrix(Starbucks_Q2,col="blue",main="ScatterplotMatrix")

hist(Starbucks_Q2$Days,breaks = 10)

#model fit
StarbucksDays_Fit=lm(formula = Days ~ Age+ Cups +Income, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit)
summary(OPBeta)
##
## Call:
## lm(formula = Days ~ Age + Cups + Income, data = Starbucks_Q2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7442 -2.5016 -0.2539 2.4524 6.9865
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 5.96836 0.00000 3.06510 1.947 0.06501 .
## Age -0.07853 -0.16445 0.08352 -0.940 0.35777
## Cups 1.06441 0.59344 0.31270 3.404 0.00267 **
## Income 0.07161 0.24006 0.05091 1.407 0.17418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.279 on 21 degrees of freedom
## Multiple R-squared: 0.4159, Adjusted R-squared: 0.3324
## F-statistic: 4.984 on 3 and 21 DF, p-value: 0.009129
library(car)
vif(StarbucksDays_Fit)
## Age Cups Income
## 1.099746 1.092743 1.047212
#model fit
StarbucksDays_Fit_2=lm(formula = Days ~ Cups +Income, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit_2)
summary(OPBeta)
##
## Call:
## lm(formula = Days ~ Cups + Income, data = Starbucks_Q2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1987 -2.5260 -0.1873 2.4608 6.4772
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 3.97135 0.00000 2.20414 1.802 0.08530 .
## Cups 0.99179 0.55296 0.30222 3.282 0.00341 **
## Income 0.06481 0.21727 0.05026 1.289 0.21063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.27 on 22 degrees of freedom
## Multiple R-squared: 0.3913, Adjusted R-squared: 0.3359
## F-statistic: 7.071 on 2 and 22 DF, p-value: 0.004252
vif(StarbucksDays_Fit_2)
## Cups Income
## 1.026088 1.026088
#model fit
StarbucksDays_Fit_3=lm(formula = Days ~ Cups, data = Starbucks_Q2)
library(lm.beta)
OPBeta = lm.beta(StarbucksDays_Fit_3)
summary(OPBeta)
##
## Call:
## lm(formula = Days ~ Cups, data = Starbucks_Q2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3502 -3.2423 0.6498 2.7577 6.6498
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 6.0806 0.0000 1.4985 4.058 0.000487 ***
## Cups 1.0539 0.5876 0.3026 3.483 0.002011 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.317 on 23 degrees of freedom
## Multiple R-squared: 0.3453, Adjusted R-squared: 0.3168
## F-statistic: 12.13 on 1 and 23 DF, p-value: 0.002011
StarbucksGrowth=read.csv("/Users/ruiqianli/Desktop/EBAC_SB/Day\ 4/StarbucksGrowth.csv")
#Check the first three rows of the data
head(StarbucksGrowth,3)
## Year Revenue Stores Drinks AveWeekEarnings
## 1 1 400 676 15 386
## 2 2 700 1015 15 394
## 3 3 1000 1412 18 407
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(StarbucksGrowth)

#model fit
StarbucksGrowth_Fit=lm(formula = Revenue ~ Stores+Drinks+AveWeekEarnings, data = StarbucksGrowth)
library(lm.beta)
OPBeta = lm.beta(StarbucksGrowth_Fit)
summary(OPBeta)
##
## Call:
## lm(formula = Revenue ~ Stores + Drinks + AveWeekEarnings, data = StarbucksGrowth)
##
## Residuals:
## 1 2 3 4 5 6 7
## -3.6994 -6.6557 22.5877 -15.8762 3.9080 -0.1463 -0.1180
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) -1.350e+04 0.000e+00 9.462e+02 -14.268 0.000746 ***
## Stores -2.642e-02 -4.689e-02 2.777e-02 -0.952 0.411447
## Drinks -7.520e+01 -5.830e-01 1.007e+01 -7.468 0.004972 **
## AveWeekEarnings 3.899e+01 1.615e+00 2.847e+00 13.696 0.000842 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.69 on 3 degrees of freedom
## Multiple R-squared: 0.9998, Adjusted R-squared: 0.9996
## F-statistic: 4539 on 3 and 3 DF, p-value: 5.549e-06
vif(StarbucksGrowth_Fit)
## Stores Drinks AveWeekEarnings
## 33.06382 83.01337 189.28671
#model fit
StarbucksGrowth_Fit_2=lm(formula = Revenue ~ Drinks+AveWeekEarnings, data = StarbucksGrowth)
library(lm.beta)
OPBeta = lm.beta(StarbucksGrowth_Fit_2)
summary(OPBeta)
##
## Call:
## lm(formula = Revenue ~ Drinks + AveWeekEarnings, data = StarbucksGrowth)
##
## Residuals:
## 1 2 3 4 5 6 7
## -11.780 -3.966 24.408 -12.110 5.621 7.773 -9.946
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) -12670.302 0.000 362.794 -34.92 4.01e-06 ***
## Drinks -67.725 -0.525 6.222 -10.88 0.000404 ***
## AveWeekEarnings 36.523 1.512 1.165 31.36 6.17e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.49 on 4 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9996
## F-statistic: 6973 on 2 and 4 DF, p-value: 8.223e-08