library(readr)
library(plotly)
library(dplyr)



hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

1. Perform the t-test (.5 point) Choose any two appropriate variables from the data and perform the t-test, displaying the results.

t.test(hr$satisfaction_level ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1 
##       0.6668096       0.4400980
# Technical: the variation between the satisfaction levels of employees who stayed and those who left is highly statistically significant   

# Non-Technical: employees who stayed at the company are happier than those who left 

Create a graph to help visualize the difference between means, if any. The title must be the non-technical interpretation

plot_data <- hr %>%
  mutate(Left = as.factor(ifelse(left == 0, 'Stayed', 'Left')))

plot_ly(plot_data, 
        x = ~Left,        
        y = ~satisfaction_level, 
        type = 'box',
        name = 'Current employees are happy' )  

2. Perform the t-test (.5 point) Choose any two appropriate variables from the data and perform the t-test, displaying the results.

t.test(hr$average_montly_hours ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -10.534631  -6.183384
## sample estimates:
## mean in group 0 mean in group 1 
##        199.0602        207.4192
# Technical: If the average hours worked per month was higher, employees were statistically more likely to have left.

# Non-Technical: employees who work longer leave the company  

Create a graph to help visualize the difference between means, if any. The title must be the non-technical interpretation.

plot_data <- hr %>%
  mutate(Left = as.factor(ifelse(left == 0, 'Stayed', 'Left')))

plot_ly(plot_data, 
        x = ~Left,                      
        y = ~ average_montly_hours,     
        type = 'box',
        name = "Employees left if they work more") 

3. Perform the t-test (.5 point) Choose any two appropriate variables from the data and perform the t-test, displaying the results.

t.test(hr$last_evaluation ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$last_evaluation by hr$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.009772224  0.004493874
## sample estimates:
## mean in group 0 mean in group 1 
##       0.7154734       0.7181126
## Technical: the difference in last evaluation scores between employees who stayed and those who left is not statistically significant  


# Non-Technical: there is no difference in the performance of employees who stayed at the company and those who left.  

Create a graph to help visualize the difference between means, if any. The title must be the non-technical interpretation.

plot_data <- hr %>%
  mutate(Left = as.factor(ifelse(left == 0, 'Stayed', 'Left')))

plot_ly(plot_data, 
        x = ~Left,                        
        y = ~last_evaluation,             
        type = 'box',                     
        name = 'employees preform well')

4. Perform the t-test (.5 point) Choose any two appropriate variables from the data and perform the t-test, displaying the results.

t.test(hr$Work_accident ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$Work_accident by hr$left
## t = 25.403, df = 10883, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  0.1178305 0.1375356
## sample estimates:
## mean in group 0 mean in group 1 
##      0.17500875      0.04732568
# Technical: Employees who stayed with the company  have a higher average rate of promotion in the last 5 years than those who left.  


# Non-Technical: employees who got promoted more often may have been more likely to stay with the company, while those who didn't receive promotions as often were more likely to leave the company.

Create a graph to help visualize the difference between means, if any. The title must be the non-technical interpretation.

plot_data <- hr %>%
  mutate(Left = as.factor(ifelse(left == 0, 'Stayed', 'Left')))

plot_ly(plot_data, 
        x = ~Work_accident,                        
        y = ~ left,             
        type = 'box',                     
        name = 'Promoted Employees Stayed')