data <- read.csv("C:\\Users\\Krishna\\Downloads\\productivity+prediction+of+garment+employees\\garments_worker_productivity.csv")
options(repos = c(CRAN = "https://cran.rstudio.com/"))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
groupset_1 <- data %>% group_by(quarter) %>% summarise(avg_productivity = mean(actual_productivity))
lowest_count_group_df1 <- groupset_1[which.min(groupset_1$avg_productivity), ]
print(lowest_count_group_df1)
## # A tibble: 1 × 2
## quarter avg_productivity
## <chr> <dbl>
## 1 Quarter3 0.705
lowest_count_group_df1$tag <- "Lowest_Probability_Group_DF1"
data_merged <- merge(data, lowest_count_group_df1[, c("quarter", "tag")], by = "quarter", all.x = TRUE)
install.packages("ggplot2")
## Installing package into 'C:/Users/Krishna/AppData/Local/R/win-library/4.3'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Krishna\AppData\Local\Temp\Rtmpw17dCg\downloaded_packages
library(ggplot2)
ggplot(groupset_1, aes(x = quarter, y = avg_productivity)) + geom_bar(stat = "identity")
INSIGHT=By grouping into quarters it can allow us to gain insights on overall trends and performance
groupset_2 <- data %>% group_by(department) %>% summarise(total_wip = sum(wip))
lowest_count_group_df2 <- groupset_2[which.min(groupset_2$total_wip), ]
print(lowest_count_group_df2)
## # A tibble: 1 × 2
## department total_wip
## <chr> <int>
## 1 sweing 822612
lowest_count_group_df2$tag <- "Lowest_Probability_Group_DF2"
data_merged <- merge(data_merged, lowest_count_group_df2[, c("department", "tag")], by = "department", all.x = TRUE)
ggplot(groupset_2, aes(x = department, y = total_wip)) + geom_bar(stat = "identity")
## Warning: Removed 2 rows containing missing values (`position_stack()`).
INSIGHT=able to analyze productivity levels of each department and we can identify which departments performs well and which needs improvement
groupset_3 <- data %>% group_by(day) %>% summarise(total_incentive = sum(incentive))
lowest_count_group_df3 <- groupset_3[which.min(groupset_3$total_incentive), ]
print(lowest_count_group_df3)
## # A tibble: 1 × 2
## day total_incentive
## <chr> <int>
## 1 Sunday 4906
lowest_count_group_df3$tag <- "Lowest_Probability_Group_DF3"
data_merged <- merge(data_merged, lowest_count_group_df3[, c("day", "tag")], by = "day", all.x = TRUE)
ggplot(groupset_3, aes(x = day, y = total_incentive)) + geom_bar(stat = "identity")
INSIGHTS=Grouping the data by day helps to find peak and off-peak days in the week, it helps organization to deploy the workforce according to the trends
1)for group by quarter=
The average monthly sales revenue in a given business varies across different quarters of the year. Specifically, I hypothesize that the average monthly sales revenue is significantly higher in the fourth quarter (October, November, December) compared to the other three quarters.
Conclusion=The data reveals noticable seasonal variation in sales revenue across four quarters which helps company to make use of resources in timely manner.
2)For group by department=
The hypothesis aims to explore whether there are statistically significant differences in sales performance across various departments within the organization.
Conclusion=
This is crucial for organisation decision making in terms of resource usage with the respective departments
3)For group by day=
The hypothesis aims to investigate whether there is a significant difference in the mean values of a specific variable, such as daily sales across different days of the week.
Conclusion=
Businesses can use the insights gained from grouping by day to create more accurate sales forecasts, marketing calendars, and operational plans, aligning resources with anticipated demand.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
existing_combinations <- data %>%
select(department, day) %>%
distinct()
all_combinations <- crossing(
department = unique(data$department),
day = unique(data$day)
)
missing_combinations <- setdiff(all_combinations, existing_combinations)
print(missing_combinations)
## # A tibble: 0 × 2
## # ℹ 2 variables: department <chr>, day <chr>
library(dplyr)
data %>%
group_by(department, day) %>%
summarize(count = n()) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'department'. You can override using the
## `.groups` argument.
## # A tibble: 18 × 3
## # Groups: department [3]
## department day count
## <chr> <chr> <int>
## 1 "sweing" Wednesday 119
## 2 "sweing" Thursday 118
## 3 "sweing" Tuesday 118
## 4 "sweing" Monday 116
## 5 "sweing" Sunday 116
## 6 "sweing" Saturday 104
## 7 "finishing" Wednesday 52
## 8 "finishing " Saturday 51
## 9 "finishing " Sunday 44
## 10 "finishing" Sunday 43
## 11 "finishing" Tuesday 43
## 12 "finishing " Monday 43
## 13 "finishing " Thursday 42
## 14 "finishing" Monday 40
## 15 "finishing " Tuesday 40
## 16 "finishing" Thursday 39
## 17 "finishing " Wednesday 37
## 18 "finishing" Saturday 32
Insight= we can able to find insight on which combinations of categorical variables are frequently occuring and least occuring, so that organization can optimize resources for the production.
library(ggplot2)
data %>%
group_by(department, day) %>%
summarize(count = n()) %>%
ggplot(aes(x = interaction(department, day), y = count, fill = department)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Frequency of Department-Day Combinations", x = "Department-Day", y = "Count")
## `summarise()` has grouped output by 'department'. You can override using the
## `.groups` argument.