library(readr)
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(stringr)
da = read.csv("/Users/nastyamurach/Desktop/DataAnalyst.csv")
da$Company.Name = str_replace_all(da$Company.Name,"/n", "")
Exploratory Analysis
library(DT)
datatable( da %>% count(Job.Title) %>% arrange(desc(n)) %>% top_n(20),
class = 'cell-border stripe')
We have 405 results of Data Analyst
daa = da %>% filter(Job.Title == "Data Analyst" & Rating >= 0)
daa %>% summarize(mean(Rating))
## mean(Rating)
## 1 3.850143
daa %>%
ggplot(aes(x = Rating)) + geom_histogram(color="black", fill="white") + ylab("Count") +
theme_bw()
daa$Company.Name = str_replace_all(daa$Company.Name,"5.0", "")
datatable(daa %>% group_by(Company.Name) %>% summarize(mean = mean(Rating)) %>% top_n(10),
class = 'cell-border stripe')
daa %>% count(Location) %>% arrange(desc(n)) %>% top_n(10) %>%
ggplot() +
geom_bar(aes(x = reorder(Location, n), y = n, alpha = n), stat = "identity", color="black", fill = "#85c8f5") +
xlab("Location") +
ylab("Count") +
coord_flip() +
theme_bw()
daa$Size = str_replace_all(daa$Size,"employees", "")
rat = daa %>% filter(!(Size == "Unknown")) %>% group_by(Size) %>% summarize(mean = mean(Rating))
daa %>% filter(!(Size == "Unknown")) %>% count(Size) %>%
ggplot( aes(x = reorder(Size, n), y = n, fill = rat$mean)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black",
alpha = 0.4) +
ggtitle("Size of the company with the mean rating") +
labs(x = "Size of company", y = "Count") +
geom_label(aes(label = round(rat$mean, digits = 2)), fill="white") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none") +
coord_flip()