library(readr)
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(stringr)
da = read.csv("/Users/nastyamurach/Desktop/DataAnalyst.csv")
da$Company.Name = str_replace_all(da$Company.Name,"/n", "")

Exploratory Analysis

library(DT)
datatable( da %>% count(Job.Title) %>% arrange(desc(n)) %>% top_n(20),
            class = 'cell-border stripe')

We have 405 results of Data Analyst

daa = da %>% filter(Job.Title == "Data Analyst" & Rating >= 0) 
daa %>% summarize(mean(Rating))
##   mean(Rating)
## 1     3.850143
daa %>% 
  ggplot(aes(x = Rating)) + geom_histogram(color="black", fill="white") + ylab("Count") +
  theme_bw()

daa$Company.Name = str_replace_all(daa$Company.Name,"5.0", "")

datatable(daa %>% group_by(Company.Name) %>% summarize(mean = mean(Rating)) %>% top_n(10),
            class = 'cell-border stripe')
daa %>% count(Location) %>% arrange(desc(n)) %>% top_n(10) %>% 
  ggplot() +
  geom_bar(aes(x = reorder(Location, n), y = n, alpha = n), stat = "identity", color="black", fill = "#85c8f5") +
  xlab("Location") +
  ylab("Count") +
  coord_flip() +
  theme_bw()

daa$Size = str_replace_all(daa$Size,"employees", "")
rat = daa %>% filter(!(Size == "Unknown")) %>% group_by(Size) %>% summarize(mean = mean(Rating))

daa %>% filter(!(Size == "Unknown")) %>% count(Size) %>% 
ggplot(  aes(x = reorder(Size, n), y = n, fill = rat$mean)) + 
  geom_bar(stat = "identity", position = position_dodge(), color = "black", 
           alpha = 0.4) + 
  ggtitle("Size of the company with the mean rating") +
  labs(x = "Size of company", y = "Count") + 
  geom_label(aes(label = round(rat$mean, digits = 2)), fill="white") + 
  theme_minimal()  +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position = "none") +
  coord_flip()