library(readxl)
df<- read_excel("2SLSLab.xlsx")
summary(df)
##      Reward          Purchase        Rating           Income     
##  Min.   : 1.000   Min.   :18.0   Min.   :  7.90   Min.   :10.00  
##  1st Qu.: 4.000   1st Qu.:25.0   1st Qu.: 17.55   1st Qu.:12.65  
##  Median : 6.000   Median :33.0   Median : 25.00   Median :14.60  
##  Mean   : 5.897   Mean   :31.7   Mean   : 33.79   Mean   :14.91  
##  3rd Qu.: 8.000   3rd Qu.:39.0   3rd Qu.: 41.92   3rd Qu.:16.55  
##  Max.   :16.000   Max.   :44.0   Max.   :103.60   Max.   :25.40
#We know that purchase-make and yearly income will influence the reward points an individual will earn. 
#Now we want to use the total amount of purchase and reward points to predict consumers' satisfaction.

#Correlation check
cor(df)
##             Reward  Purchase    Rating    Income
## Reward   1.0000000 0.8372003 0.8308538 0.8607945
## Purchase 0.8372003 1.0000000 0.8638276 0.9470202
## Rating   0.8308538 0.8638276 1.0000000 0.9468095
## Income   0.8607945 0.9470202 0.9468095 1.0000000
#Run first regression model 
model <- lm(Reward ~ Purchase + Income, data = df)
summary(model)
## 
## Call:
## lm(formula = Reward ~ Purchase + Income, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1237 -0.7162  0.1187  0.9961  4.4160 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.82233    0.76840 -11.481  < 2e-16 ***
## Purchase     0.08494    0.04043   2.101   0.0367 *  
## Income       0.80647    0.12435   6.485 5.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.608 on 239 degrees of freedom
## Multiple R-squared:  0.7457, Adjusted R-squared:  0.7435 
## F-statistic: 350.4 on 2 and 239 DF,  p-value: < 2.2e-16
#Generate the predicted reward and refine model 
df$Predicted <- predict(model,df)
model_2 <- lm(Rating ~ Purchase + Predicted, data = df)
summary(model_2)
## 
## Call:
## lm(formula = Rating ~ Purchase + Predicted, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.366  -4.399   0.338   3.193  16.455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  18.7085     3.7101   5.043 9.08e-07 ***
## Purchase     -2.1103     0.2353  -8.968  < 2e-16 ***
## Predicted    13.9042     0.6846  20.310  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.138 on 239 degrees of freedom
## Multiple R-squared:  0.9069, Adjusted R-squared:  0.9061 
## F-statistic:  1164 on 2 and 239 DF,  p-value: < 2.2e-16
#Summary
#A two-stage least squares regression model was conducted to predict ratings, based on purchase, and income 
#According to previous reports, purchase and income impacts reward.
#A significant regression equation was found on the first model (F(2,239) = 350, p < .001), with an R-square of .7457.
#This suggests that the 2 predictors can explain 75% of the variance of reward. 
#All predictors are statistically significant at .05 level. 

#The second regression model was also statistically significant (F(2,239) = 1164, p < .001),with an R-square of .9069.
#This suggests that both purchase and predicted reward points can explain 91% of the variance of employees' salary. 
#All predictors are statistically significant at .05 level.