data <-read.csv("C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv")
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Building three pair of columns

# Pair 1: Explanatory variable - SMV (Standard Minute Value), Response variable - Actual Productivity
data$efficiency <- data$actual_productivity / data$targeted_productivity

# Pair 2: Explanatory variable - Overtime, Response variable - Idle Time
data$total_idle_time <- data$idle_time * data$idle_men

# Pair 3: Explanatory variable - Number of Workers, Response variable - Targeted Productivity per Worker
data$targeted_productivity_per_worker <- data$targeted_productivity / data$no_of_workers

Visualizing each relationship

# Pair 1: SMV vs. Actual Productivity
ggplot(data, aes(x = smv, y = actual_productivity)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(x = "SMV", y = "Actual Productivity") +
  ggtitle("Actual Productivity vs. SMV")
## `geom_smooth()` using formula = 'y ~ x'

# Pair 2: Overtime vs. Idle Time
ggplot(data, aes(x = over_time, y = total_idle_time)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(x = "Overtime", y = "Total Idle Time") +
  ggtitle("Total Idle Time vs. Overtime")
## `geom_smooth()` using formula = 'y ~ x'

# Pair 3: Number of Workers vs. Targeted Productivity per Worker
ggplot(data, aes(x = no_of_workers, y = targeted_productivity_per_worker)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(x = "Number of Workers", y = "Targeted Productivity per Worker") +
  ggtitle("Targeted Productivity per Worker vs. Number of Workers")
## `geom_smooth()` using formula = 'y ~ x'

Calculating corelation coefficient

# Calculate correlation coefficients
cor1 <- cor(data$smv, data$actual_productivity)
cor2 <- cor(data$over_time, data$total_idle_time)
cor3 <- cor(data$no_of_workers, data$targeted_productivity_per_worker)

# Print correlation coefficients
cat("Correlation between SMV and Actual Productivity:", cor1, "\n")
## Correlation between SMV and Actual Productivity: -0.1220888
cat("Correlation between Overtime and Total Idle Time:", cor2, "\n")
## Correlation between Overtime and Total Idle Time: 0.02919401
cat("Correlation between Number of Workers and Targeted Productivity per Worker:", cor3, "\n")
## Correlation between Number of Workers and Targeted Productivity per Worker: -0.8168895

Explanation for corelation coefficient

  1. Correlation between SMV and Actual Productivity:

    • Correlation Coefficient: This coefficient measures the strength and direction of the linear relationship between SMV and Actual Productivity. If it’s close to 1, it indicates a strong positive linear relationship, while if it’s close to -1, it indicates a strong negative linear relationship.

    • Interpretation: If the correlation coefficient between SMV and Actual Productivity is positive and close to 1, it means that as SMV increases, Actual Productivity tends to increase as well. This would make sense if higher SMV values correspond to more efficient processes, resulting in higher productivity. Conversely, if the coefficient is negative, it suggests that higher SMV values are associated with lower productivity, which would be counterintuitive.

  2. Correlation between Overtime and Total Idle Time:

    • Correlation Coefficient: This coefficient measures the strength and direction of the linear relationship between Overtime and Total Idle Time. Similar to the previous case, positive values indicate a positive linear relationship, while negative values indicate a negative linear relationship.

    • Interpretation: A positive correlation coefficient between Overtime and Total Idle Time would make sense if increased overtime hours are associated with higher levels of idle time. This could occur if workers are required to stay longer to compensate for inefficiencies or delays in the production process. Conversely, a negative correlation would suggest that overtime is effectively reducing idle time, which may seem counterintuitive and would warrant further investigation.

  3. Correlation between Number of Workers and Targeted Productivity per Worker:

    • Correlation Coefficient: This coefficient measures the strength and direction of the linear relationship between the Number of Workers and Targeted Productivity per Worker.

    • Interpretation: A positive correlation coefficient here would make sense if increasing the number of workers leads to higher targeted productivity per worker. However, a negative correlation would be unexpected, suggesting that adding more workers actually decreases targeted productivity per worker. This would require closer examination to understand the underlying dynamics of the relationship.

Confidence interval for each of variables

confidence_interval_1 <- confint(lm(actual_productivity ~ smv, data=data))
confidence_interval_2 <- confint(lm(total_idle_time ~ over_time, data=data))
confidence_interval_3 <- confint(lm(targeted_productivity_per_worker ~ no_of_workers, data=data))

# Print confidence intervals
cat("Confidence Interval for Actual Productivity (SMV):", confidence_interval_1, "\n")
## Confidence Interval for Actual Productivity (SMV): 0.7476927 -0.002844867 0.7811322 -0.001048507
cat("Confidence Interval for Total Idle Time (Overtime):", confidence_interval_2, "\n")
## Confidence Interval for Total Idle Time (Overtime): -41.7705 -0.003952599 50.45413 0.01233354
cat("Confidence Interval for Targeted Productivity per Worker (Number of Workers):", confidence_interval_3, "\n")
## Confidence Interval for Targeted Productivity per Worker (Number of Workers): 0.09393249 -0.001605732 0.09901953 -0.001481995

Explanation for each of response variable

1)Actual Productivity (SMV):

Confidence Interval: The confidence interval provides a range of values within which we are confident the true mean of Actual Productivity lies, based on the relationship with SMV.

Conclusion: If the confidence interval for Actual Productivity (based on SMV) is narrow and does not include zero, it suggests that there is a statistically significant relationship between SMV and Actual Productivity. This indicates that changes in SMV are associated with changes in Actual Productivity. Conversely, if the confidence interval includes zero, it indicates that the relationship between SMV and Actual Productivity may not be statistically significant.

2)Total Idle Time (Overtime):

Confidence Interval: Similar to the previous case, the confidence interval provides a range of values for the mean of Total Idle Time, based on the relationship with Overtime

Conclusion: If the confidence interval for Total Idle Time (based on Overtime) is narrow and does not include zero, it suggests a statistically significant relationship between Overtime and Total Idle Time. This implies that changes in Overtime are associated with changes in Total Idle Time. If the confidence interval includes zero, it suggests that the relationship between Overtime and Total Idle Time may not be statistically significant.

3)Targeted Productivity per Worker (Number of Workers):

Confidence Interval: Once again, the confidence interval provides a range of values for the mean of Targeted Productivity per Worker, based on the relationship with the Number of Workers.

Conclusion: A narrow confidence interval that does not include zero indicates a statistically significant relationship between the Number of Workers and Targeted Productivity per Worker. This suggests that changes in the Number of Workers are associated with changes in Targeted Productivity per Worker. Conversely, if the confidence interval includes zero, it suggests that the relationship may not be statistically significant.