#Research Question: What is the predictive power of the total amount of purchase and reward points in determining customer satisfaction, while accounting for the influence of annual income on reward points earned?
#H0: There is no significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.
#H1: There is a significant positive relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.
library(readxl)
df<- read_excel("C:/Users/apoor/Downloads/2SLSLab.xlsx")
summary(df)
## Reward Purchase Rating Income
## Min. : 1.000 Min. :18.0 Min. : 7.90 Min. :10.00
## 1st Qu.: 4.000 1st Qu.:25.0 1st Qu.: 17.55 1st Qu.:12.65
## Median : 6.000 Median :33.0 Median : 25.00 Median :14.60
## Mean : 5.897 Mean :31.7 Mean : 33.79 Mean :14.91
## 3rd Qu.: 8.000 3rd Qu.:39.0 3rd Qu.: 41.92 3rd Qu.:16.55
## Max. :16.000 Max. :44.0 Max. :103.60 Max. :25.40
# Correlation check
cor(df)
## Reward Purchase Rating Income
## Reward 1.0000000 0.8372003 0.8308538 0.8607945
## Purchase 0.8372003 1.0000000 0.8638276 0.9470202
## Rating 0.8308538 0.8638276 1.0000000 0.9468095
## Income 0.8607945 0.9470202 0.9468095 1.0000000
# Initial model building
model <- lm(Rating ~ Reward + Purchase + Income, data = df)
summary(model)
##
## Call:
## lm(formula = Rating ~ Reward + Purchase + Income, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.147 -4.422 0.229 3.148 16.304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -98.1425 4.2114 -23.304 < 2e-16 ***
## Reward 0.6592 0.2846 2.316 0.0214 *
## Purchase -0.9854 0.1795 -5.489 1.04e-07 ***
## Income 10.6816 0.5933 18.003 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.073 on 238 degrees of freedom
## Multiple R-squared: 0.9089, Adjusted R-squared: 0.9078
## F-statistic: 791.9 on 3 and 238 DF, p-value: < 2.2e-16
# Refine the model by removing Reward
model_1 <- lm(Rating ~ Purchase + Income, data = df)
summary(model_1)
##
## Call:
## lm(formula = Rating ~ Purchase + Income, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.366 -4.399 0.338 3.193 16.455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -103.9585 3.4117 -30.471 < 2e-16 ***
## Purchase -0.9294 0.1795 -5.177 4.78e-07 ***
## Income 11.2132 0.5521 20.310 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared: 0.9069, Adjusted R-squared: 0.9061
## F-statistic: 1164 on 2 and 239 DF, p-value: < 2.2e-16
# Refine the model with predictions
df$Predicted <- predict(model_1,df)
model_2 <- lm(Rating ~ Purchase + Predicted, data = df)
summary(model_2)
##
## Call:
## lm(formula = Rating ~ Purchase + Predicted, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.366 -4.399 0.338 3.193 16.455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.899e-14 2.954e+00 0.00 1
## Purchase 1.837e-15 1.370e-01 0.00 1
## Predicted 1.000e+00 4.924e-02 20.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared: 0.9069, Adjusted R-squared: 0.9061
## F-statistic: 1164 on 2 and 239 DF, p-value: < 2.2e-16
# Refine the model by using Reward & Purchase
model_3 <- lm(Reward ~ Purchase, data = df)
summary(model_3)
##
## Call:
## lm(formula = Reward ~ Purchase, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6618 -0.9120 0.0044 0.6712 6.0052
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.66812 0.45931 -10.16 <2e-16 ***
## Purchase 0.33325 0.01405 23.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared: 0.7009, Adjusted R-squared: 0.6997
## F-statistic: 562.4 on 1 and 240 DF, p-value: < 2.2e-16
# Predict on model3
predictions <- predict(model_3, newdata = df)
predictions
## 1 2 3 4 5 6 7 8
## 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361 1.330361
## 9 10 11 12 13 14 15 16
## 1.663610 1.663610 1.663610 1.663610 1.663610 1.663610 1.996858 1.996858
## 17 18 19 20 21 22 23 24
## 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 1.996858 2.330107
## 25 26 27 28 29 30 31 32
## 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107 2.330107
## 33 34 35 36 37 38 39 40
## 2.330107 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356 2.663356
## 41 42 43 44 45 46 47 48
## 2.663356 2.663356 2.663356 2.996605 2.996605 2.996605 2.996605 2.996605
## 49 50 51 52 53 54 55 56
## 2.996605 2.996605 2.996605 2.996605 3.329853 3.329853 3.329853 3.329853
## 57 58 59 60 61 62 63 64
## 3.329853 3.329853 3.663102 3.663102 3.663102 3.663102 3.663102 3.663102
## 65 66 67 68 69 70 71 72
## 3.663102 3.663102 3.663102 3.996351 3.996351 3.996351 3.996351 3.996351
## 73 74 75 76 77 78 79 80
## 3.996351 3.996351 3.996351 3.996351 3.996351 4.329599 4.329599 4.329599
## 81 82 83 84 85 86 87 88
## 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599 4.329599
## 89 90 91 92 93 94 95 96
## 4.329599 4.662848 4.662848 4.662848 4.662848 4.662848 4.662848 4.996097
## 97 98 99 100 101 102 103 104
## 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 4.996097 5.329345
## 105 106 107 108 109 110 111 112
## 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.329345 5.662594
## 113 114 115 116 117 118 119 120
## 5.662594 5.662594 5.662594 5.662594 5.662594 5.662594 5.995843 6.329091
## 121 122 123 124 125 126 127 128
## 6.329091 6.329091 6.329091 6.329091 6.329091 6.329091 6.662340 6.662340
## 129 130 131 132 133 134 135 136
## 6.662340 6.662340 6.662340 6.662340 6.662340 6.662340 6.995589 6.995589
## 137 138 139 140 141 142 143 144
## 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589 6.995589
## 145 146 147 148 149 150 151 152
## 6.995589 6.995589 6.995589 7.328837 7.328837 7.328837 7.328837 7.328837
## 153 154 155 156 157 158 159 160
## 7.328837 7.328837 7.328837 7.328837 7.328837 7.328837 7.662086 7.662086
## 161 162 163 164 165 166 167 168
## 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086 7.662086
## 169 170 171 172 173 174 175 176
## 7.662086 7.662086 7.662086 7.662086 7.995335 7.995335 7.995335 7.995335
## 177 178 179 180 181 182 183 184
## 7.995335 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583 8.328583
## 185 186 187 188 189 190 191 192
## 8.328583 8.328583 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832
## 193 194 195 196 197 198 199 200
## 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832 8.661832
## 201 202 203 204 205 206 207 208
## 8.661832 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081
## 209 210 211 212 213 214 215 216
## 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 8.995081 9.328329
## 217 218 219 220 221 222 223 224
## 9.328329 9.328329 9.328329 9.328329 9.328329 9.328329 9.661578 9.661578
## 225 226 227 228 229 230 231 232
## 9.661578 9.661578 9.661578 9.661578 9.994827 9.994827 9.994827 9.994827
## 233 234 235 236 237 238 239 240
## 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827 9.994827
## 241 242
## 9.994827 9.994827
# Predictions
df$Predicted <- predict(model_3, df)
model_4 <- lm(Reward ~ Purchase + Predicted, data = df)
summary(model_4)
##
## Call:
## lm(formula = Reward ~ Purchase + Predicted, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6618 -0.9120 0.0044 0.6712 6.0052
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.66812 0.45931 -10.16 <2e-16 ***
## Purchase 0.33325 0.01405 23.71 <2e-16 ***
## Predicted NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.74 on 240 degrees of freedom
## Multiple R-squared: 0.7009, Adjusted R-squared: 0.6997
## F-statistic: 562.4 on 1 and 240 DF, p-value: < 2.2e-16
#Summary: We fail to reject the alternate hypothesis, as there is a significant relationship between customer satisfaction and the total amount of purchase and reward points, after controlling for the effect of annual income on reward points earned.