- We use below libraries for Data import and wrangling.
library(readr)
library(dplyr)
library(tidyr)
- Import reuired datasets of Employee survey and employee profile data.(readr is best suited to import data in tibble and where datatypes are preserved while importing without converting strings to factors by default)
head(survey)
head(employee)
- Glimpse of dataset to understand which columns can be used for joining
glimpse(survey)
Observations: 165
Variables: 8
$ EMPID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 2...
$ BRANCH <chr> "HO", "NORTH", "NORTH", "SOUTH", "SOUTH", "SOUTH", "SOUTH", "SOUTH", "EAST",...
$ FUNCTION <chr> "SALES", "SALES", "FINANCE", "FINANCE", "FINANCE", "FINANCE", "FINANCE", "SA...
$ EES <dbl> 46.60, 41.73, 52.64, 41.56, 42.68, 40.07, 33.30, 62.44, 55.25, 52.18, 34.30,...
$ RR <dbl> 44.73, 33.80, 57.90, 54.40, 53.19, 40.59, 63.28, 55.11, 57.60, 62.47, 46.66,...
$ PP <dbl> 58.38, 35.10, 55.87, 61.95, 53.47, 44.83, 41.07, 62.99, 60.81, 47.18, 42.14,...
$ CG <dbl> 54.51, 51.79, 54.53, 48.69, 48.79, 50.20, 41.34, 58.27, 54.63, 55.14, 47.18,...
$ LT <dbl> 45.00, 52.14, 53.70, 47.86, 48.60, 44.40, 39.11, 53.22, 54.30, 49.72, 47.17,...
glimpse(employee)
Observations: 165
Variables: 6
$ EMPID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 2...
$ BRANCH <chr> "HO", "NORTH", "NORTH", "SOUTH", "SOUTH", "SOUTH", "SOUTH", "SOUTH", "EAST",...
$ FUNCTION <chr> "SALES", "SALES", "FINANCE", "FINANCE", "FINANCE", "FINANCE", "FINANCE", "SA...
$ GENDER <chr> "M", "M", "M", "M", "F", "F", "M", "M", "M", "M", "M", "M", "F", "M", "M", "...
$ EDUQUAL <chr> "GRAD", "PG", "PRO", "PRO", "GRAD", "PG", "PRO", "GRAD", "GRAD", "GRAD", "GR...
$ EXP <int> 2, 3, 2, 5, 8, 3, 4, 5, 6, 1, 0, 1, 2, 3, 6, 4, 3, 11, 14, 2, 6, 3, 4, 5, 2,...
- We will use dplyr Inner_Join method to join survey and employee dataset and mention keys for joining.
mergedata <-
survey %>% inner_join(employee, by = c("EMPID", "BRANCH", "FUNCTION"))
head(mergedata)
- We have analyse some insights which are as follows:
#.What is the count of employees in each Function Branch wise, having experience >2 years and EES more than 40. Output Format: Branches in rows and Functions in columns.
ans1 <-
mergedata %>%
filter(EXP > 2 & EES > 40) %>%
group_by(BRANCH, FUNCTION) %>%
summarize(count = n()) %>%
spread(FUNCTION, count)
ans1
- In above insight we are using filter method of dplyr for data filtering, group_by method of dplyr clubs our datset by BRANCH and than by FUNCTION. Summarize function is used to carry out statistics on our dataset. Than we use tidyr packag’s spread method to convert long dataset to wide.(As per requirement)
#.Recode M as ‘Male’ and F as ‘Female’ and derive a new column EXP_CATEGORY (using EXP column) with 3 categories: <3, >=3 & <=5 and >5. Calculate average EES of FEMALES in each Function Branch wise, having experience between 3 to 5 years Output Format: Branches in rows and Functions in columns
mergedata$GENDER <-
ifelse(mergedata$GENDER == "M", "Male", "Female")
ans2 <-
mergedata %>% mutate(EXP_CATEGORY = ifelse(
EXP < 3,
"RED BAND",
ifelse(EXP >= 3 & EXP <= 5, "YELLOW BAND", "BLACK BAND")
))
ans2
- We will break above example into 2 parts. part a and part b. part a uses mutate function of tidyr to add a column EXP_CATEGORY which depends on Exp of employee. It also transforms existing GENDER column from M/F to Male/Female
ans22 <- ans2 %>%
filter(EXP_CATEGORY == "YELLOW BAND" & GENDER == "Female") %>%
group_by(FUNCTION, BRANCH) %>%
summarize(avgees = mean(EES, na.rm = TRUE)) %>%
spread(BRANCH, avgees)
ans22
- In above part b is just some more data manipulation to get result as per required. It uses already known methods of filter, group_by, summarize, spread.
#.Calculate average EES of employees in each Branch Gender wise, having EDUQUAL as PG or PRO Output Format: Branches in rows and Gender in columns
ans3 <- mergedata %>%
filter(EDUQUAL %in% c("PG", "PRO")) %>%
group_by(BRANCH, GENDER) %>%
summarize(avgees = mean(EES, na.rm = TRUE)) %>%
spread(BRANCH, avgees)
ans3
LS0tDQp0aXRsZTogIkVNUExPWUVFIFNVUlZFWSINCmF1dGhvcjogIkFrc2hheTkxIg0KZGF0ZTogIjI3IEp1bmUgMjAxOCINCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdA0KICBodG1sX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KIy4gV2UgdXNlIGJlbG93IGxpYnJhcmllcyBmb3IgRGF0YSBpbXBvcnQgYW5kIHdyYW5nbGluZy4NCmBgYHtyfQ0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHlyKQ0KYGBgDQoNCiMuIEltcG9ydCByZXVpcmVkIGRhdGFzZXRzIG9mIEVtcGxveWVlIHN1cnZleSBhbmQgZW1wbG95ZWUgcHJvZmlsZSBkYXRhLihyZWFkciBpcyBiZXN0IHN1aXRlZCB0byBpbXBvcnQgZGF0YSBpbiB0aWJibGUgYW5kIHdoZXJlIGRhdGF0eXBlcyBhcmUgcHJlc2VydmVkIHdoaWxlIGltcG9ydGluZyB3aXRob3V0IGNvbnZlcnRpbmcgc3RyaW5ncyB0byBmYWN0b3JzIGJ5IGRlZmF1bHQpDQpgYGB7cn0NCnN1cnZleSA8LSByZWFkX2NzdihmaWxlLmNob29zZSgpLCBjb2xfbmFtZXMgPSBUUlVFKQ0KZW1wbG95ZWUgPC0gcmVhZF9jc3YoZmlsZS5jaG9vc2UoKSwgY29sX25hbWVzID0gVFJVRSkNCmhlYWQoc3VydmV5KQ0KaGVhZChlbXBsb3llZSkNCmBgYA0KDQojLiBHbGltcHNlIG9mIGRhdGFzZXQgdG8gdW5kZXJzdGFuZCB3aGljaCBjb2x1bW5zIGNhbiBiZSB1c2VkIGZvciBqb2luaW5nDQpgYGB7cn0NCmdsaW1wc2Uoc3VydmV5KQ0KZ2xpbXBzZShlbXBsb3llZSkNCmBgYA0KDQojLiBXZSB3aWxsIHVzZSBkcGx5ciBJbm5lcl9Kb2luIG1ldGhvZCB0byBqb2luIHN1cnZleSBhbmQgZW1wbG95ZWUgZGF0YXNldCBhbmQgbWVudGlvbiBrZXlzIGZvciBqb2luaW5nLg0KYGBge3J9DQptZXJnZWRhdGEgPC0NCiAgc3VydmV5ICU+JSBpbm5lcl9qb2luKGVtcGxveWVlLCBieSA9IGMoIkVNUElEIiwgIkJSQU5DSCIsICJGVU5DVElPTiIpKQ0KaGVhZChtZXJnZWRhdGEpDQpgYGANCiMuIFdlIGhhdmUgYW5hbHlzZSBzb21lIGluc2lnaHRzIHdoaWNoIGFyZSBhcyBmb2xsb3dzOg0KDQojLldoYXQgaXMgdGhlIGNvdW50IG9mIGVtcGxveWVlcyBpbiBlYWNoIEZ1bmN0aW9uIEJyYW5jaCB3aXNlLCBoYXZpbmcgZXhwZXJpZW5jZSA+MiB5ZWFycyBhbmQgRUVTIG1vcmUgdGhhbiA0MC4gT3V0cHV0IEZvcm1hdDogQnJhbmNoZXMgaW4gcm93cyBhbmQgRnVuY3Rpb25zIGluIGNvbHVtbnMuDQpgYGB7cn0NCmFuczEgPC0NCiAgbWVyZ2VkYXRhICU+JQ0KICBmaWx0ZXIoRVhQID4gMiAmIEVFUyA+IDQwKSAlPiUNCiAgZ3JvdXBfYnkoQlJBTkNILCBGVU5DVElPTikgJT4lDQogIHN1bW1hcml6ZShjb3VudCA9IG4oKSkgJT4lDQogIHNwcmVhZChGVU5DVElPTiwgY291bnQpDQphbnMxDQpgYGANCiMuIEluIGFib3ZlIGluc2lnaHQgd2UgYXJlIHVzaW5nIGZpbHRlciBtZXRob2Qgb2YgZHBseXIgZm9yIGRhdGEgZmlsdGVyaW5nLCBncm91cF9ieSBtZXRob2Qgb2YgZHBseXIgY2x1YnMgb3VyIGRhdHNldCBieSBCUkFOQ0ggYW5kIHRoYW4gYnkgRlVOQ1RJT04uIFN1bW1hcml6ZSBmdW5jdGlvbiBpcyB1c2VkIHRvIGNhcnJ5IG91dCBzdGF0aXN0aWNzIG9uIG91ciBkYXRhc2V0LiBUaGFuIHdlIHVzZSB0aWR5ciBwYWNrYWcncyBzcHJlYWQgbWV0aG9kIHRvIGNvbnZlcnQgbG9uZyBkYXRhc2V0IHRvIHdpZGUuKEFzIHBlciByZXF1aXJlbWVudCkNCg0KDQojLlJlY29kZSBNIGFzICdNYWxlJyBhbmQgRiBhcyAnRmVtYWxlJyBhbmQgZGVyaXZlIGEgbmV3IGNvbHVtbiBFWFBfQ0FURUdPUlkgKHVzaW5nIEVYUCBjb2x1bW4pIHdpdGggMyBjYXRlZ29yaWVzOiA8MywgPj0zICYgPD01IGFuZCA+NS4gQ2FsY3VsYXRlIGF2ZXJhZ2UgRUVTIG9mIEZFTUFMRVMgaW4gZWFjaCBGdW5jdGlvbiBCcmFuY2ggd2lzZSwgaGF2aW5nIGV4cGVyaWVuY2UgYmV0d2VlbiAzIHRvIDUgeWVhcnMgT3V0cHV0IEZvcm1hdDogQnJhbmNoZXMgaW4gcm93cyBhbmQgRnVuY3Rpb25zIGluIGNvbHVtbnMNCg0KYGBge3IgUGFydCBhfQ0KbWVyZ2VkYXRhJEdFTkRFUiA8LQ0KICBpZmVsc2UobWVyZ2VkYXRhJEdFTkRFUiA9PSAiTSIsICJNYWxlIiwgIkZlbWFsZSIpDQphbnMyIDwtDQogIG1lcmdlZGF0YSAlPiUgbXV0YXRlKEVYUF9DQVRFR09SWSA9IGlmZWxzZSgNCiAgICBFWFAgPCAzLA0KICAgICJSRUQgQkFORCIsDQogICAgaWZlbHNlKEVYUCA+PSAzICYgRVhQIDw9IDUsICJZRUxMT1cgQkFORCIsICJCTEFDSyBCQU5EIikNCiAgKSkNCmFuczINCmBgYA0KDQojLiBXZSB3aWxsIGJyZWFrIGFib3ZlIGV4YW1wbGUgaW50byAyIHBhcnRzLiBwYXJ0IGEgYW5kIHBhcnQgYi4gcGFydCBhIHVzZXMgbXV0YXRlIGZ1bmN0aW9uIG9mIHRpZHlyIHRvIGFkZCBhIGNvbHVtbiBFWFBfQ0FURUdPUlkgd2hpY2ggZGVwZW5kcyBvbiBFeHAgb2YgZW1wbG95ZWUuIEl0IGFsc28gdHJhbnNmb3JtcyBleGlzdGluZyBHRU5ERVIgY29sdW1uIGZyb20gTS9GIHRvIE1hbGUvRmVtYWxlDQoNCmBgYHtyIHBhcnQgYn0NCmFuczIyIDwtIGFuczIgJT4lDQogIGZpbHRlcihFWFBfQ0FURUdPUlkgPT0gIllFTExPVyBCQU5EIiAmIEdFTkRFUiA9PSAiRmVtYWxlIikgJT4lDQogIGdyb3VwX2J5KEZVTkNUSU9OLCBCUkFOQ0gpICU+JQ0KICBzdW1tYXJpemUoYXZnZWVzID0gbWVhbihFRVMsIG5hLnJtID0gVFJVRSkpICU+JQ0KICBzcHJlYWQoQlJBTkNILCBhdmdlZXMpDQphbnMyMg0KYGBgDQojLiBJbiBhYm92ZSBwYXJ0IGIgaXMganVzdCBzb21lIG1vcmUgZGF0YSBtYW5pcHVsYXRpb24gdG8gZ2V0IHJlc3VsdCBhcyBwZXIgcmVxdWlyZWQuIEl0IHVzZXMgYWxyZWFkeSBrbm93biBtZXRob2RzIG9mIGZpbHRlciwgZ3JvdXBfYnksIHN1bW1hcml6ZSwgc3ByZWFkLg0KDQoNCiMuQ2FsY3VsYXRlIGF2ZXJhZ2UgRUVTIG9mIGVtcGxveWVlcyBpbiBlYWNoIEJyYW5jaCBHZW5kZXIgd2lzZSwgaGF2aW5nIEVEVVFVQUwgYXMgUEcgb3IgUFJPIE91dHB1dCBGb3JtYXQ6IEJyYW5jaGVzIGluIHJvd3MgYW5kIEdlbmRlciBpbiBjb2x1bW5zDQpgYGB7cn0NCmFuczMgPC0gbWVyZ2VkYXRhICU+JQ0KICBmaWx0ZXIoRURVUVVBTCAlaW4lIGMoIlBHIiwgIlBSTyIpKSAlPiUNCiAgZ3JvdXBfYnkoQlJBTkNILCBHRU5ERVIpICU+JQ0KICBzdW1tYXJpemUoYXZnZWVzID0gbWVhbihFRVMsIG5hLnJtID0gVFJVRSkpICU+JQ0KICBzcHJlYWQoQlJBTkNILCBhdmdlZXMpDQphbnMzDQpgYGANCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=