#Research Question: What is the predictive power of the total amount of purchase and reward points in determining customer satisfaction, while accounting for the influence of annual income on reward points earned?
#H0: There is no significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.
#H1: There is a significant positive relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.
library(readxl)
df<- read_excel("C:/Users/apoor/Downloads/2SLSLab.xlsx")
summary(df)
##      Reward          Purchase        Rating           Income     
##  Min.   : 1.000   Min.   :18.0   Min.   :  7.90   Min.   :10.00  
##  1st Qu.: 4.000   1st Qu.:25.0   1st Qu.: 17.55   1st Qu.:12.65  
##  Median : 6.000   Median :33.0   Median : 25.00   Median :14.60  
##  Mean   : 5.897   Mean   :31.7   Mean   : 33.79   Mean   :14.91  
##  3rd Qu.: 8.000   3rd Qu.:39.0   3rd Qu.: 41.92   3rd Qu.:16.55  
##  Max.   :16.000   Max.   :44.0   Max.   :103.60   Max.   :25.40
# Correlation check
cor(df)
##             Reward  Purchase    Rating    Income
## Reward   1.0000000 0.8372003 0.8308538 0.8607945
## Purchase 0.8372003 1.0000000 0.8638276 0.9470202
## Rating   0.8308538 0.8638276 1.0000000 0.9468095
## Income   0.8607945 0.9470202 0.9468095 1.0000000
# Initial model building
model <- lm(Rating ~ Reward + Purchase + Income, data = df)
summary(model)
## 
## Call:
## lm(formula = Rating ~ Reward + Purchase + Income, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.147  -4.422   0.229   3.148  16.304 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -98.1425     4.2114 -23.304  < 2e-16 ***
## Reward        0.6592     0.2846   2.316   0.0214 *  
## Purchase     -0.9854     0.1795  -5.489 1.04e-07 ***
## Income       10.6816     0.5933  18.003  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.073 on 238 degrees of freedom
## Multiple R-squared:  0.9089, Adjusted R-squared:  0.9078 
## F-statistic: 791.9 on 3 and 238 DF,  p-value: < 2.2e-16
# Refine the model by removing Reward
model_1 <- lm(Rating ~ Purchase + Income, data = df)
summary(model_1)
## 
## Call:
## lm(formula = Rating ~ Purchase + Income, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.366  -4.399   0.338   3.193  16.455 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -103.9585     3.4117 -30.471  < 2e-16 ***
## Purchase      -0.9294     0.1795  -5.177 4.78e-07 ***
## Income        11.2132     0.5521  20.310  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared:  0.9069, Adjusted R-squared:  0.9061 
## F-statistic:  1164 on 2 and 239 DF,  p-value: < 2.2e-16
# Refine the model with predictions 
df$Predicted <- predict(model_1,df)
model_2 <- lm(Rating ~ Purchase + Predicted, data = df)
summary(model_2)
## 
## Call:
## lm(formula = Rating ~ Purchase + Predicted, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.366  -4.399   0.338   3.193  16.455 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.899e-14  2.954e+00    0.00        1    
## Purchase     1.837e-15  1.370e-01    0.00        1    
## Predicted    1.000e+00  4.924e-02   20.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared:  0.9069, Adjusted R-squared:  0.9061 
## F-statistic:  1164 on 2 and 239 DF,  p-value: < 2.2e-16
# Refine the model by using Reward & Purchase 
model_3 <- lm(Reward ~ Purchase, data = df)
summary(model_3)
## 
## Call:
## lm(formula = Reward ~ Purchase, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6618 -0.9120  0.0044  0.6712  6.0052 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.66812    0.45931  -10.16   <2e-16 ***
## Purchase     0.33325    0.01405   23.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared:  0.7009, Adjusted R-squared:  0.6997 
## F-statistic: 562.4 on 1 and 240 DF,  p-value: < 2.2e-16
# Predict on model3 
predictions <- predict(model_3, newdata = df)
predictions
##        1        2        3        4        5        6        7        8 
## 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 
##        9       10       11       12       13       14       15       16 
## 1.663610 1.663610 1.663610 1.663610 1.663610 1.663610 1.996858 1.996858 
##       17       18       19       20       21       22       23       24 
## 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 2.330107 
##       25       26       27       28       29       30       31       32 
## 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 
##       33       34       35       36       37       38       39       40 
## 2.330107 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356 
##       41       42       43       44       45       46       47       48 
## 2.663356 2.663356 2.663356 2.996605 2.996605 2.996605 2.996605 2.996605 
##       49       50       51       52       53       54       55       56 
## 2.996605 2.996605 2.996605 2.996605 3.329853 3.329853 3.329853 3.329853 
##       57       58       59       60       61       62       63       64 
## 3.329853 3.329853 3.663102 3.663102 3.663102 3.663102 3.663102 3.663102 
##       65       66       67       68       69       70       71       72 
## 3.663102 3.663102 3.663102 3.996351 3.996351 3.996351 3.996351 3.996351 
##       73       74       75       76       77       78       79       80 
## 3.996351 3.996351 3.996351 3.996351 3.996351 4.329599 4.329599 4.329599 
##       81       82       83       84       85       86       87       88 
## 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 
##       89       90       91       92       93       94       95       96 
## 4.329599 4.662848 4.662848 4.662848 4.662848 4.662848 4.662848 4.996097 
##       97       98       99      100      101      102      103      104 
## 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 5.329345 
##      105      106      107      108      109      110      111      112 
## 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.662594 
##      113      114      115      116      117      118      119      120 
## 5.662594 5.662594 5.662594 5.662594 5.662594 5.662594 5.995843 6.329091 
##      121      122      123      124      125      126      127      128 
## 6.329091 6.329091 6.329091 6.329091 6.329091 6.329091 6.662340 6.662340 
##      129      130      131      132      133      134      135      136 
## 6.662340 6.662340 6.662340 6.662340 6.662340 6.662340 6.995589 6.995589 
##      137      138      139      140      141      142      143      144 
## 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 
##      145      146      147      148      149      150      151      152 
## 6.995589 6.995589 6.995589 7.328837 7.328837 7.328837 7.328837 7.328837 
##      153      154      155      156      157      158      159      160 
## 7.328837 7.328837 7.328837 7.328837 7.328837 7.328837 7.662086 7.662086 
##      161      162      163      164      165      166      167      168 
## 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 
##      169      170      171      172      173      174      175      176 
## 7.662086 7.662086 7.662086 7.662086 7.995335 7.995335 7.995335 7.995335 
##      177      178      179      180      181      182      183      184 
## 7.995335 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583 
##      185      186      187      188      189      190      191      192 
## 8.328583 8.328583 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 
##      193      194      195      196      197      198      199      200 
## 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 
##      201      202      203      204      205      206      207      208 
## 8.661832 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 
##      209      210      211      212      213      214      215      216 
## 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 9.328329 
##      217      218      219      220      221      222      223      224 
## 9.328329 9.328329 9.328329 9.328329 9.328329 9.328329 9.661578 9.661578 
##      225      226      227      228      229      230      231      232 
## 9.661578 9.661578 9.661578 9.661578 9.994827 9.994827 9.994827 9.994827 
##      233      234      235      236      237      238      239      240 
## 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 
##      241      242 
## 9.994827 9.994827
# Predictions 
df$Predicted <- predict(model_3, df)
model_4 <- lm(Reward ~ Purchase + Predicted, data = df)
summary(model_4)
## 
## Call:
## lm(formula = Reward ~ Purchase + Predicted, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6618 -0.9120  0.0044  0.6712  6.0052 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.66812    0.45931  -10.16   <2e-16 ***
## Purchase     0.33325    0.01405   23.71   <2e-16 ***
## Predicted         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared:  0.7009, Adjusted R-squared:  0.6997 
## F-statistic: 562.4 on 1 and 240 DF,  p-value: < 2.2e-16

#Summary: We fail to reject the alternate hypothesis, as there is a significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.