Assignment 7

library(readr)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

##Correlation 1

#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.

cor.test(hr$satisfaction_level , hr$average_montly_hours)

## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$average_montly_hours
## t = -2.4556, df = 14997, p-value = 0.01408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.036040356 -0.004045605
## sample estimates:
##         cor 
## -0.02004811

#The coorelation is -0.02004811.

#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).

#For this coorelation, the P-value is 0.01408. The p-value is below 0.05, which means that the coorelation is statistically significant.

#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.

#They have a weak negative correlation,which means the relationship is not strong. In other words, while there is a tendency for one variable to decrease as the other increases, this tendency is not consistent or strong.

#4. Create a plot that helps visualize the correlation (.5 point) For each correlation, create a graph to help visualize the realtionship between the two variables. The title must be the non-technical interpretation.

library(ggplot2)

ggplot(hr, aes(x = average_montly_hours, y = satisfaction_level)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", color = "blue", se = FALSE) +  # Add a regression line
  labs(title = "Scatter Plot of Satisfaction Level vs Average Monthly Hours",
       x = "Average Monthly Hours",
       y = "Satisfaction Level") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

#Correlation 2

#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.

cor.test(hr$number_project , hr$promotion_last_5years)

## 
##  Pearson's product-moment correlation
## 
## data:  hr$number_project and hr$promotion_last_5years
## t = -0.74262, df = 14997, p-value = 0.4577
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.022065623  0.009940813
## sample estimates:
##          cor 
## -0.006063958

#The correlation is -0.006063958

#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).

#For this coorelation, the P-value is 0.4577, the p-value is not statistically significant.

#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.

#They have a weak negative correlation, which means the relationship is not strong. In other words, while there is a tendency for one variable to decrease as the other increases, this tendency is not consistent or strong.

library(ggplot2)

ggplot(hr, aes(x = promotion_last_5years, y = number_project)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", color = "blue", se = FALSE) +  # Add a regression line
  labs(title = "Scatter Plot of Promotion vs Number of Project",
       x = "Promotion over the last 5 years",
       y = "Number of Project") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

#Correlation 3

#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.

cor.test(hr$time_spend_company , hr$Work_accident)

## 
##  Pearson's product-moment correlation
## 
## data:  hr$time_spend_company and hr$Work_accident
## t = 0.25967, df = 14997, p-value = 0.7951
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01388386  0.01812361
## sample estimates:
##         cor 
## 0.002120418

#The correlation is 0.002120418

#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).

#For this coorelation, the P-value is 0.7951, the p-value is not statistically significant.

#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.

#They have a weak positive correlation, which refers to refers to a statistical relationship between two variables where an increase in one variable is associated with a slight increase in the other variable. However, the relationship is not strong, meaning that the two variables do not consistently move together. .

library(ggplot2)

ggplot(hr, aes(x = Work_accident, y = time_spend_company)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", color = "blue", se = FALSE) +  # Add a regression line
  labs(title = "Scatter Plot of Work Accident vs Time Spend Company",
       x = "Work Accident",
       y = "Time Spend Company") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

#Correlation 4

#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.

cor.test(hr$satisfaction_level , hr$last_evaluation)

## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08916727 0.12082195
## sample estimates:
##       cor 
## 0.1050212

#The correlation is 0.1050212

#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).

#For this coorelation, the P-value is < 2.2e-16, the p-value is statistically significant.

#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.

library(ggplot2)

ggplot(hr, aes(x = last_evaluation, y = satisfaction_level)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", color = "blue", se = FALSE) +  # Add a regression line
  labs(title = "Scatter Plot of Last Evaluation vs Satisfaction Level",
       x = "Last Evaluation",
       y = "Satisfaction Level") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Assignment 7

Michael O’Keefe and Owen Henderson

2024-11-04