Content
This is a textual data in the form of json file. Its has more than 145k records. Each record has attributes such as
- Review Title
- Review Body
- Review Rating
- Reviewed Company
- Review description
Every organization has their pros and cons which their employees feel that it should be made public so that other people who wants to join this organization make decisions based on reviews from the people.
This is a textual data in the form of json file. Its has more than 145k records. Each record has attributes such as
All thanks to indeed.com to make this data public and easily available.
library(tidyverse)
library(jsonlite)
library(lubridate)
#download.file('https://www.kaggle.com/takkimsncn/employer-review-about-their-organisation/data?select=results.json')
er <- fromJSON('results.json')
str(er)
'data.frame': 145209 obs. of 5 variables:
$ ReviewTitle : chr "Productive" "Stressful" "Good Company for Every employee" "Productive" ...
$ CompleteReview: chr "Good company, cool workplace, work load little bit higher. Clean environment, disciplined, good cantin, big cam"| __truncated__ "1. Need to work on boss's whims and fancies 2. Priorities keep changing 3. No regards for work life balance 4. "| __truncated__ "Good company for every Engineers dream, Full Mediclaim for entired family, Free transport services from company"| __truncated__ "I am just pass out bsc in chemistry Typical day at work Mangement Work place good The most enjoyable part of th"| __truncated__ ...
$ URL : chr "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" ...
$ Rating : chr "3.0" "3.0" "5.0" "5.0" ...
$ ReviewDetails : chr "(Current Employee) - Ghansoli - August 30, 2021" "(Former Employee) - - August 26, 2021" "(Former Employee) - - August 17, 2021" "(Current Employee) - - August 17, 2021" ...
er$com <- sapply(strsplit(er$URL,'/'),'[[',5) %>% trimws(which='both')
er$CF <- sapply(strsplit(er$ReviewDetails,'-'),'[[',1) %>% trimws(which='both')
er$location <- sapply(strsplit(er$ReviewDetails,'-'),'[[',2) %>% trimws(which='both')
er$date <- sapply(strsplit(er$ReviewDetails,'-'),'[[',3) %>% trimws(which='both')
er$date <- as.Date(er$date,'%B %d, %Y')
str(er)
'data.frame': 145209 obs. of 9 variables:
$ ReviewTitle : chr "Productive" "Stressful" "Good Company for Every employee" "Productive" ...
$ CompleteReview: chr "Good company, cool workplace, work load little bit higher. Clean environment, disciplined, good cantin, big cam"| __truncated__ "1. Need to work on boss's whims and fancies 2. Priorities keep changing 3. No regards for work life balance 4. "| __truncated__ "Good company for every Engineers dream, Full Mediclaim for entired family, Free transport services from company"| __truncated__ "I am just pass out bsc in chemistry Typical day at work Mangement Work place good The most enjoyable part of th"| __truncated__ ...
$ URL : chr "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" "https://in.indeed.com/cmp/Reliance-Industries-Ltd/reviews" ...
$ Rating : chr "3.0" "3.0" "5.0" "5.0" ...
$ ReviewDetails : chr "(Current Employee) - Ghansoli - August 30, 2021" "(Former Employee) - - August 26, 2021" "(Former Employee) - - August 17, 2021" "(Current Employee) - - August 17, 2021" ...
$ com : chr "Reliance-Industries-Ltd" "Reliance-Industries-Ltd" "Reliance-Industries-Ltd" "Reliance-Industries-Ltd" ...
$ CF : chr "(Current Employee)" "(Former Employee)" "(Former Employee)" "(Current Employee)" ...
$ location : chr "Ghansoli" "" "" "" ...
$ date : Date, format: "2021-08-30" "2021-08-26" ...
er <- er %>% mutate_at(-c(4,9),tolower) %>% mutate_at(4,as.numeric) %>%
mutate_at(6:8,as.factor)
er$CF01 <- str_sub(er$CF,-18,-1) %>% trimws(which='both')
er$CF01 <- as.factor(er$CF01)
summary(er)
ReviewTitle CompleteReview URL Rating
Length:145209 Length:145209 Length:145209 Min. :1.000
Class :character Class :character Class :character 1st Qu.:4.000
Mode :character Mode :character Mode :character Median :4.000
Mean :4.054
3rd Qu.:5.000
Max. :5.000
ReviewDetails com
Length:145209 tata-consultancy-services-(tcs):14441
Class :character ibm :10820
Mode :character infosys :10696
accenture :10137
cognizant-technology-solutions : 9626
hdfc-bank : 6749
(Other) :82740
CF location
(former employee) :79193 :129943
(current employee) :65493 india : 4367
officer (former employee) : 103 bangalore urban, karnataka: 989
officer (current employee) : 91 hp : 279
health care (current employee): 20 in : 242
employee (current employee) : 19 kerala : 228
(Other) : 290 (Other) : 9161
date CF01
Min. :2011-08-31 (current employee):65747
1st Qu.:2015-06-11 (former employee) :79461
Median :2017-04-18 head row : 1
Mean :2017-02-09
3rd Qu.:2018-09-21
Max. :2021-09-08
NA's :142
er <- er[-7] %>% drop_na
colnames(er)
[1] "ReviewTitle" "CompleteReview" "URL" "Rating"
[5] "ReviewDetails" "com" "location" "date"
[9] "CF01"
er %>% subset(com=='accenture' & Rating > 4.5,select=1:2) %>% as_tibble
er %>% subset(com=='accenture' & Rating < 3,select=1:2) %>% as_tibble