library(readxl)
employee <- read_excel("C:/Users/Kendall/Downloads/employeenumeric.xls")
View(employee)
# 1. What are the dimensions of the dataset?
dim(employee)
## [1] 474 5
# 2. What variables are included in the data?
# Filter data so only individuals with 15+ yrs edu are included
emp15 <- employee[employee$`Years of Education`== 15, ]
# 3. What are the dimensions of the new dataset?
dim(emp15)
## [1] 116 5
# Conduct a t-test for mean salary and gender
# 4. What is the null hypothesis?
attach(emp15)
t.test(`Current Salary`[Gender=="m"], `Current Salary`[Gender=="f"])
##
## Welch Two Sample t-test
##
## data: `Current Salary`[Gender == "m"] and `Current Salary`[Gender == "f"]
## t = 5.0443, df = 102.38, p-value = 1.977e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3930.779 9024.884
## sample estimates:
## mean of x mean of y
## 33527.83 27050.00
# 5. Why include only those with 15 years of education?
# 6. What is the t-statistic for difference in salaries btw men and women?
# 7. What is the p-value?
# 8. What are the limites of the 95% CI?
# 9. Does the 95% CI include 0?
# 10. What are the mean salaries for men and women?
# 11. Interpret results (conclusion)
#Conduct another t-test to determine if salary differs by minority status
t.test(`Current Salary`[`Minority Classification` == 0], `Current Salary`[`Minority Classification` == 1])
##
## Welch Two Sample t-test
##
## data: `Current Salary`[`Minority Classification` == 0] and `Current Salary`[`Minority Classification` == 1]
## t = 2.4432, df = 59.458, p-value = 0.01755
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 664.4916 6673.2519
## sample estimates:
## mean of x mean of y
## 32507.33 28838.46
# 12. What is the t-statistic for difference in salaries by minority status?
# 13. What is the p-value?
# 14. What are the limits of the 95% CI?
# 15. Does the 95% CI contain the value 0?
# 16. What are the mean salaries for minorities and non-minorities
# 17. Conclusion
# 18. Compare, using a t-test for the difference in means, minority vs. non-minority men. Is there a significant difference at 95%?
men <- subset(emp15, Gender == "m")
t.test(`Current Salary`~ `Minority Classification`, data = men)
##
## Welch Two Sample t-test
##
## data: Current Salary by Minority Classification
## t = 2.4005, df = 40.643, p-value = 0.02104
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 702.7293 8164.9289
## sample estimates:
## mean in group 0 mean in group 1
## 34489.38 30055.56
# 19. Compare, using a t-test for the difference in means, minority vs non-minority women. Is there a significance at 95%?
women <- subset(emp15, Gender == "f")
t.test(`Current Salary`~ `Minority Classification`, data = women)
##
## Welch Two Sample t-test
##
## data: Current Salary by Minority Classification
## t = 0.62398, df = 11.646, p-value = 0.5447
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -3139.546 5647.546
## sample estimates:
## mean in group 0 mean in group 1
## 27354 26100
# 20. Fill in the following table
| Mean Salaries | Male | Female |
| Non-Minority | 34489.38 | 27354 |
| Minority | 30055.56 | 26100 |
not sure what happened to questions 21-22?
# 23. Include the plot and describe what it tells you about how differences in salary relate to the interaction btw gender and minority status.
interaction.plot(Gender, `Minority Classification`, `Current Salary`)