Question 1

Create a graph that shows the distribution of companies in the dataset by State (ie how many are in each state). There are a lot of States, so consider which axis you should use assuming I am using a ‘portrait’ oriented screen (ie taller than wide).

q1 = MyData %>%  group_by(State) %>% summarise(Count=n()) 
#q1 = q1%>% arrange(desc(Count)) 

ggplot(q1, aes(x=reorder(State,+Count),y=Count)) +
  geom_bar(stat="identity",fill="sky blue")+
  coord_flip()+
  labs(title = "Total Companies by State", x= "State", y= "Count")+
  theme(plot.title = element_text(hjust = 0.5,size=20))

ggsave('Figure1.png')
## Saving 8 x 11 in image

Question 2

Let’s dig in on the State with the 3rd most companies in the data set. Imagine you work for the state and are interested in how many people are employed by companies in different industries employ. Create a plot of average employment by industry for companies in this state (only use cases with full data (user R’s complete.cases() function). Your graph should show how variable the ranges are, and exclude outliers.

#extracted NY data only.

MyDataNY = subset(MyData, State=="NY")
MyDataNY = MyDataNY[complete.cases(MyDataNY),]

#located and removed outliers using outlier pakage.
outlier_tf = outlier(MyDataNY$Employees,logical=TRUE)
find_outlier = which(outlier_tf==TRUE,arr.ind=TRUE)
MyDataNY = MyDataNY[-find_outlier,]

options(digits = 0)
q2 = MyDataNY %>% 
   group_by(Industry)%>%
  summarise(Average=mean(Employees))

ggplot(q2, aes(x=reorder(Industry,+Average),y=Average)) +
  geom_bar(stat="identity",fill="dodgerblue")+
  coord_flip()+
  labs(title = "Average Employment by Industry", x= "Industry", y= "AVG")+
  theme(plot.title = element_text(hjust = 0.5,size=20))

ggsave('Figure2.png')

Question 3

Now imagine you work for an investor and want to see which industries generate the most revenue per employee. Create a chart that makes this information clear.

Fin = MyData[complete.cases(MyData),]


q3 = Fin %>% 
   group_by(Industry)%>%
  summarise(Rev=sum(Revenue),Emp = sum(Employees))%>%
  mutate(RevPEmp = Rev/Emp)


options(scipen=20)
ggplot(q3, aes(x=reorder(Industry,+RevPEmp),y=RevPEmp)) +
  geom_bar(stat="identity",fill="seagreen")+
  coord_flip()+
  labs(title = "Revenue Per Employee by Industry", x= "Industry", y= "Revenue Per Employee")+
  theme(plot.title = element_text(hjust = 0.5,size=20))

ggsave('Figure3.png')