ANLY 510, Quasi Lab

#Research Question: What is the predictive power of the total amount of purchase and reward points in determining customer satisfaction, while accounting for the influence of annual income on reward points earned?
#H0: There is no significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.
#H1: There is a significant positive relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.

library(readxl)
df<- read_excel("C:/Users/apoor/Downloads/2SLSLab.xlsx")
summary(df)

##      Reward          Purchase        Rating           Income     
##  Min.   : 1.000   Min.   :18.0   Min.   :  7.90   Min.   :10.00  
##  1st Qu.: 4.000   1st Qu.:25.0   1st Qu.: 17.55   1st Qu.:12.65  
##  Median : 6.000   Median :33.0   Median : 25.00   Median :14.60  
##  Mean   : 5.897   Mean   :31.7   Mean   : 33.79   Mean   :14.91  
##  3rd Qu.: 8.000   3rd Qu.:39.0   3rd Qu.: 41.92   3rd Qu.:16.55  
##  Max.   :16.000   Max.   :44.0   Max.   :103.60   Max.   :25.40

# Correlation check
cor(df)

##             Reward  Purchase    Rating    Income
## Reward   1.0000000 0.8372003 0.8308538 0.8607945
## Purchase 0.8372003 1.0000000 0.8638276 0.9470202
## Rating   0.8308538 0.8638276 1.0000000 0.9468095
## Income   0.8607945 0.9470202 0.9468095 1.0000000

# Initial model building
model <- lm(Rating ~ Reward + Purchase + Income, data = df)
summary(model)

## 
## Call:
## lm(formula = Rating ~ Reward + Purchase + Income, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.147  -4.422   0.229   3.148  16.304 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -98.1425     4.2114 -23.304  < 2e-16 ***
## Reward        0.6592     0.2846   2.316   0.0214 *  
## Purchase     -0.9854     0.1795  -5.489 1.04e-07 ***
## Income       10.6816     0.5933  18.003  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.073 on 238 degrees of freedom
## Multiple R-squared:  0.9089, Adjusted R-squared:  0.9078 
## F-statistic: 791.9 on 3 and 238 DF,  p-value: < 2.2e-16

# Refine the model by removing Reward
model_1 <- lm(Rating ~ Purchase + Income, data = df)
summary(model_1)

## 
## Call:
## lm(formula = Rating ~ Purchase + Income, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.366  -4.399   0.338   3.193  16.455 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -103.9585     3.4117 -30.471  < 2e-16 ***
## Purchase      -0.9294     0.1795  -5.177 4.78e-07 ***
## Income        11.2132     0.5521  20.310  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared:  0.9069, Adjusted R-squared:  0.9061 
## F-statistic:  1164 on 2 and 239 DF,  p-value: < 2.2e-16

# Refine the model with predictions 
df$Predicted <- predict(model_1,df)
model_2 <- lm(Rating ~ Purchase + Predicted, data = df)
summary(model_2)

## 
## Call:
## lm(formula = Rating ~ Purchase + Predicted, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.366  -4.399   0.338   3.193  16.455 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.899e-14  2.954e+00    0.00        1    
## Purchase     1.837e-15  1.370e-01    0.00        1    
## Predicted    1.000e+00  4.924e-02   20.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared:  0.9069, Adjusted R-squared:  0.9061 
## F-statistic:  1164 on 2 and 239 DF,  p-value: < 2.2e-16

# Refine the model by using Reward & Purchase 
model_3 <- lm(Reward ~ Purchase, data = df)
summary(model_3)

## 
## Call:
## lm(formula = Reward ~ Purchase, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6618 -0.9120  0.0044  0.6712  6.0052 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.66812    0.45931  -10.16   <2e-16 ***
## Purchase     0.33325    0.01405   23.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared:  0.7009, Adjusted R-squared:  0.6997 
## F-statistic: 562.4 on 1 and 240 DF,  p-value: < 2.2e-16

# Predict on model3 
predictions <- predict(model_3, newdata = df)
predictions

##        1        2        3        4        5        6        7        8 
## 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 
##        9       10       11       12       13       14       15       16 
## 1.663610 1.663610 1.663610 1.663610 1.663610 1.663610 1.996858 1.996858 
##       17       18       19       20       21       22       23       24 
## 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 2.330107 
##       25       26       27       28       29       30       31       32 
## 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 
##       33       34       35       36       37       38       39       40 
## 2.330107 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356 
##       41       42       43       44       45       46       47       48 
## 2.663356 2.663356 2.663356 2.996605 2.996605 2.996605 2.996605 2.996605 
##       49       50       51       52       53       54       55       56 
## 2.996605 2.996605 2.996605 2.996605 3.329853 3.329853 3.329853 3.329853 
##       57       58       59       60       61       62       63       64 
## 3.329853 3.329853 3.663102 3.663102 3.663102 3.663102 3.663102 3.663102 
##       65       66       67       68       69       70       71       72 
## 3.663102 3.663102 3.663102 3.996351 3.996351 3.996351 3.996351 3.996351 
##       73       74       75       76       77       78       79       80 
## 3.996351 3.996351 3.996351 3.996351 3.996351 4.329599 4.329599 4.329599 
##       81       82       83       84       85       86       87       88 
## 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 
##       89       90       91       92       93       94       95       96 
## 4.329599 4.662848 4.662848 4.662848 4.662848 4.662848 4.662848 4.996097 
##       97       98       99      100      101      102      103      104 
## 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 5.329345 
##      105      106      107      108      109      110      111      112 
## 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.662594 
##      113      114      115      116      117      118      119      120 
## 5.662594 5.662594 5.662594 5.662594 5.662594 5.662594 5.995843 6.329091 
##      121      122      123      124      125      126      127      128 
## 6.329091 6.329091 6.329091 6.329091 6.329091 6.329091 6.662340 6.662340 
##      129      130      131      132      133      134      135      136 
## 6.662340 6.662340 6.662340 6.662340 6.662340 6.662340 6.995589 6.995589 
##      137      138      139      140      141      142      143      144 
## 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 
##      145      146      147      148      149      150      151      152 
## 6.995589 6.995589 6.995589 7.328837 7.328837 7.328837 7.328837 7.328837 
##      153      154      155      156      157      158      159      160 
## 7.328837 7.328837 7.328837 7.328837 7.328837 7.328837 7.662086 7.662086 
##      161      162      163      164      165      166      167      168 
## 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 
##      169      170      171      172      173      174      175      176 
## 7.662086 7.662086 7.662086 7.662086 7.995335 7.995335 7.995335 7.995335 
##      177      178      179      180      181      182      183      184 
## 7.995335 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583 
##      185      186      187      188      189      190      191      192 
## 8.328583 8.328583 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 
##      193      194      195      196      197      198      199      200 
## 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 
##      201      202      203      204      205      206      207      208 
## 8.661832 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 
##      209      210      211      212      213      214      215      216 
## 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 9.328329 
##      217      218      219      220      221      222      223      224 
## 9.328329 9.328329 9.328329 9.328329 9.328329 9.328329 9.661578 9.661578 
##      225      226      227      228      229      230      231      232 
## 9.661578 9.661578 9.661578 9.661578 9.994827 9.994827 9.994827 9.994827 
##      233      234      235      236      237      238      239      240 
## 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 
##      241      242 
## 9.994827 9.994827

# Predictions 
df$Predicted <- predict(model_3, df)
model_4 <- lm(Reward ~ Purchase + Predicted, data = df)
summary(model_4)

## 
## Call:
## lm(formula = Reward ~ Purchase + Predicted, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6618 -0.9120  0.0044  0.6712  6.0052 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.66812    0.45931  -10.16   <2e-16 ***
## Purchase     0.33325    0.01405   23.71   <2e-16 ***
## Predicted         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared:  0.7009, Adjusted R-squared:  0.6997 
## F-statistic: 562.4 on 1 and 240 DF,  p-value: < 2.2e-16

#Summary: We fail to reject the alternate hypothesis, as there is a significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.

ANLY 510, Quasi Lab

2023-04-16