library(readr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##Correlation 1
#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.
cor.test(hr$satisfaction_level , hr$average_montly_hours)
##
## Pearson's product-moment correlation
##
## data: hr$satisfaction_level and hr$average_montly_hours
## t = -2.4556, df = 14997, p-value = 0.01408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.036040356 -0.004045605
## sample estimates:
## cor
## -0.02004811
#The coorelation is -0.02004811.
#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).
#For this coorelation, the P-value is 0.01408. The p-value is below 0.05, which means that the coorelation is statistically significant.
#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.
#They have a weak negative correlation,which means the relationship is not strong. In other words, while there is a tendency for one variable to decrease as the other increases, this tendency is not consistent or strong.
#4. Create a plot that helps visualize the correlation (.5 point) For each correlation, create a graph to help visualize the realtionship between the two variables. The title must be the non-technical interpretation.
library(ggplot2)
ggplot(hr, aes(x = average_montly_hours, y = satisfaction_level)) +
geom_point() + # Add points
geom_smooth(method = "lm", color = "blue", se = FALSE) + # Add a regression line
labs(title = "Scatter Plot of Satisfaction Level vs Average Monthly Hours",
x = "Average Monthly Hours",
y = "Satisfaction Level") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#Correlation 2
#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.
cor.test(hr$number_project , hr$promotion_last_5years)
##
## Pearson's product-moment correlation
##
## data: hr$number_project and hr$promotion_last_5years
## t = -0.74262, df = 14997, p-value = 0.4577
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.022065623 0.009940813
## sample estimates:
## cor
## -0.006063958
#The correlation is -0.006063958
#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).
#For this coorelation, the P-value is 0.4577, the p-value is not statistically significant.
#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.
#They have a weak negative correlation, which means the relationship is not strong. In other words, while there is a tendency for one variable to decrease as the other increases, this tendency is not consistent or strong.
#4. Create a plot that helps visualize the correlation (.5 point) For each correlation, create a graph to help visualize the realtionship between the two variables. The title must be the non-technical interpretation.
library(ggplot2)
ggplot(hr, aes(x = promotion_last_5years, y = number_project)) +
geom_point() + # Add points
geom_smooth(method = "lm", color = "blue", se = FALSE) + # Add a regression line
labs(title = "Scatter Plot of Promotion vs Number of Project",
x = "Promotion over the last 5 years",
y = "Number of Project") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#Correlation 3
#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.
cor.test(hr$time_spend_company , hr$Work_accident)
##
## Pearson's product-moment correlation
##
## data: hr$time_spend_company and hr$Work_accident
## t = 0.25967, df = 14997, p-value = 0.7951
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01388386 0.01812361
## sample estimates:
## cor
## 0.002120418
#The correlation is 0.002120418
#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).
#For this coorelation, the P-value is 0.7951, the p-value is not statistically significant.
#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.
#They have a weak positive correlation, which refers to refers to a statistical relationship between two variables where an increase in one variable is associated with a slight increase in the other variable. However, the relationship is not strong, meaning that the two variables do not consistently move together. .
#4. Create a plot that helps visualize the correlation (.5 point) For each correlation, create a graph to help visualize the realtionship between the two variables. The title must be the non-technical interpretation.
library(ggplot2)
ggplot(hr, aes(x = Work_accident, y = time_spend_company)) +
geom_point() + # Add points
geom_smooth(method = "lm", color = "blue", se = FALSE) + # Add a regression line
labs(title = "Scatter Plot of Work Accident vs Time Spend Company",
x = "Work Accident",
y = "Time Spend Company") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#Correlation 4
#1. Perform the correlation (.5 point) Choose any two appropriate #variables from the data and perform the correlation, displaying the results.
cor.test(hr$satisfaction_level , hr$last_evaluation)
##
## Pearson's product-moment correlation
##
## data: hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.08916727 0.12082195
## sample estimates:
## cor
## 0.1050212
#The correlation is 0.1050212
#2. Interpret the results in technical terms (.5 point) For each correlation, #explain what the test’s p-value means (significance).
#For this coorelation, the P-value is < 2.2e-16, the p-value is statistically significant.
#3. Interpret the results in non-technical terms (1 point) For each correlation, what do the results mean in non-techical terms.
#They have a weak positive correlation, which refers to refers to a statistical relationship between two variables where an increase in one variable is associated with a slight increase in the other variable. However, the relationship is not strong, meaning that the two variables do not consistently move together. .
#4. Create a plot that helps visualize the correlation (.5 point) For each correlation, create a graph to help visualize the realtionship between the two variables. The title must be the non-technical interpretation.
library(ggplot2)
ggplot(hr, aes(x = last_evaluation, y = satisfaction_level)) +
geom_point() + # Add points
geom_smooth(method = "lm", color = "blue", se = FALSE) + # Add a regression line
labs(title = "Scatter Plot of Last Evaluation vs Satisfaction Level",
x = "Last Evaluation",
y = "Satisfaction Level") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'