data <- read.csv("C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv")

Numeric summary for Column1

INSIGHT= It provides brief overview about actual_productivity including measures such as minimum,1st quartile, median, mean, 3rd quartile and maximum

summary_column1 <- summary(data$actual_productivity)
print(summary_column1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2337  0.6503  0.7733  0.7351  0.8503  1.1204

Numeric summary for Column2

INSIGHT = It provides brief overview about no_of_style_change column, offering insights about its central tendency, spread and distribution characterstics

# numeric summary for column 2
summary_column2 <- summary(data$no_of_style_change)
print(summary_column2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1504  0.0000  2.0000

Categorical summary of a column

INSIGHT=

It gives an overview of the distribution of days in the dataset, indicating the frequency of each unique day.

unique_values_column <-unique(data$day)
count_values_column <- table(data$day)
print(count_values_column)
## 
##    Monday  Saturday    Sunday  Thursday   Tuesday Wednesday 
##       199       187       203       199       201       208
print(unique_values_column)
## [1] "Thursday"  "Saturday"  "Sunday"    "Monday"    "Tuesday"   "Wednesday"
cat("categorical summary for day : \n")
## categorical summary for day :
print(data.frame(value = unique_values_column, count = count_values_column))
##       value count.Var1 count.Freq
## 1  Thursday     Monday        199
## 2  Saturday   Saturday        187
## 3    Sunday     Sunday        203
## 4    Monday   Thursday        199
## 5   Tuesday    Tuesday        201
## 6 Wednesday  Wednesday        208

Hypothesis

1)The data helps to find the productivity of an employees across several departments

2)it helps to find the impact of overtime on productivity of employees

3)The data help us to find the whether the certain days of week have a noticable impact on productivity

Aggregate Functions for hypothesis/question 1

The data helps to find the productivity of an employees across several departments

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv('C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv')




aggregate_result <- aggregate(actual_productivity ~ department, data = data, FUN = mean)


print("Mean Productivity Across Departments:")
## [1] "Mean Productivity Across Departments:"
print(aggregate_result)
##   department actual_productivity
## 1  finishing           0.7228757
## 2 finishing            0.7820895
## 3     sweing           0.7220130

Visual summaries

INSIGHT=

apparent relationship between overtime hours and actual productivity

library(ggplot2)


data <- read.csv('C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv')


ggplot(data, aes(x = over_time, y = actual_productivity)) +
  geom_point() +
  labs(title = "Scatter Plot: Overtime Hours vs. Actual Productivity",
       x = "Overtime Hours",
       y = "Actual Productivity")

INSIGHT=

It helps to understand the spread and frequency of actual productivity values in the dataset.

ggplot(data, aes(x = actual_productivity)) +
  geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black") +
  labs(title = "Histogram: Distribution of Actual Productivity",
       x = "Actual Productivity",
       y = "Frequency")