#1 Build a multiple linear regression for predicting salary using age and MBA.

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
EmployeeSalaries <- read_excel("C:/Users/yjiang34/Desktop/EmployeeSalaries.xlsx")
lm.fit=lm(Salary~Age+MBA, data=EmployeeSalaries)

#2 Get the summary of the coefficients for model in 1C, is having MBA statistically significant? # Yes, because p-value is less than 0.05

summary(lm.fit)
## 
## Call:
## lm(formula = Salary ~ Age + MBA, data = EmployeeSalaries)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5256.5 -1555.3  -400.2  1622.4  9698.9 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   893.59    1824.58    0.49    0.628    
## Age          1044.15      42.14   24.78  < 2e-16 ***
## MBAYes      14767.23    1351.80   10.92  2.5e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2942 on 32 degrees of freedom
## Multiple R-squared:  0.9528, Adjusted R-squared:  0.9499 
## F-statistic:   323 on 2 and 32 DF,  p-value: < 2.2e-16

#3 What is the model equation for no MBA? # Salary = 944.02 * age + 7441.70

lm.fit=lm(Salary~Age, data=EmployeeSalaries)
summary(lm.fit)
## 
## Call:
## lm(formula = Salary ~ Age, data = EmployeeSalaries)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6770.6 -3407.3 -1652.7   258.5 23625.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7441.70    3690.42   2.016   0.0519 .  
## Age           944.02      88.08  10.717 2.78e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6300 on 33 degrees of freedom
## Multiple R-squared:  0.7768, Adjusted R-squared:  0.7701 
## F-statistic: 114.9 on 1 and 33 DF,  p-value: 2.778e-12

#4 Using your model in 3C, what is the estimated salary for a 34-year-old employee with no MBA? # Estimated salary: 39538.38

944.02*34+7441.70
## [1] 39538.38