install.packages("rmarkdown") # Make cool dynamic documents
install.packages("knitr") # Run R Code Chunks
install.packages("ggplot2") # For plotting
install.packages("DT") # Interactive HTML tables
install.packages("tidyverse") # Tidy UniverseConsData <- read.csv("https://raw.githubusercontent.com/DATA607/Project3/master/dataframe.csv", stringsAsFactors = FALSE)
# Some Filtering of Data - C++ and Analytics as the data against these is spurious
ConsData <- ConsData %>% filter(skill != 'C++') %>% filter(skill != '(C++)') %>% filter(skill != 'analytics') %>% filter(skill != 'Analytics')
ConsData$count <- as.integer(ConsData$count)## Warning: NAs introduced by coercion
datatable(head(ConsData,3))RecordCounts <- ConsData %>% group_by(location) %>% summarise(Number=n())
datatable(RecordCounts)TopSkillsData <- ConsData %>% group_by(skill) %>% summarise(Count=sum(count)) %>% arrange(desc(Count))
datatable(TopSkillsData)TopSkillsData <- head(TopSkillsData, 10)
TopSkillsData <- TopSkillsData %>% arrange(Count)
barplot(TopSkillsData$Count, main="Top 10 Skills", horiz=TRUE,
names.arg=TopSkillsData$skill, las=1, cex.axis = 0.5, cex.names=0.5)SkillGroups <- read.csv("https://raw.githubusercontent.com/DATA607/Project3/master/SkillGroupings.txt", stringsAsFactors = FALSE, header = FALSE)
colnames(SkillGroups) <- c("Skill", "Group")
datatable(SkillGroups)TopSkillsGroupData <- ConsData %>% group_by(skill) %>% summarise(Count=sum(count)) %>% arrange(desc(Count))
TopSkillsGroupData <- inner_join(TopSkillsGroupData, SkillGroups, by=c("skill" = "Skill"))
TopSkillsGroupData <- TopSkillsGroupData %>% group_by(Group) %>% summarise(GroupedCount=sum(Count)) %>% arrange(desc(GroupedCount))
datatable(TopSkillsGroupData)TopSkillsGroupData <- TopSkillsGroupData %>% arrange(GroupedCount)
barplot(TopSkillsGroupData$GroupedCount, main="Top Skills by Groups", horiz=FALSE,
names.arg=TopSkillsGroupData$Group, las=1, cex.axis = 0.5, cex.names=0.5)TopSkillsGroupData1 <- inner_join(ConsData, SkillGroups, by=c("skill" = "Skill"))
TopSkillsGroupDataProgByLocation <- TopSkillsGroupData1 %>% filter(Group=="Programming") %>% group_by(location) %>% summarise(Count=sum(count)) %>% arrange(desc(Count))
datatable(TopSkillsGroupDataProgByLocation)TopSkillsGroupData2 <- inner_join(ConsData, SkillGroups, by=c("skill" = "Skill"))
TopSkillsGroupData2 <- TopSkillsGroupData2 %>% filter(Group!="Programming")
TopNonProgSkills <- TopSkillsGroupData2 %>% group_by(skill) %>% summarise(Count=sum(count)) %>% arrange(desc(Count))
datatable(TopNonProgSkills)aggDataLocNY <- ConsData %>% filter(location == "New York, NY")
aggDataLocNY <- aggDataLocNY %>% group_by(skill) %>% summarise(totalCount=sum(count))
aggDataLocNY <- aggDataLocNY %>% arrange(desc(totalCount))
aggDataLocNY <- head(aggDataLocNY, 1)
aggDataLocSF <- ConsData %>% filter(location == "San Francisco, CA")
aggDataLocSF <- aggDataLocSF %>% group_by(skill) %>% summarise(totalCount=sum(count))
aggDataLocSF <- aggDataLocSF %>% arrange(desc(totalCount))
aggDataLocSF <- head(aggDataLocSF, 1)
aggDataLocLdn <- ConsData %>% filter(location == "London")
aggDataLocLdn <- aggDataLocLdn %>% group_by(skill) %>% summarise(totalCount=sum(count))
aggDataLocLdn <- aggDataLocLdn %>% arrange(desc(totalCount))
aggDataLocLdn <- head(aggDataLocLdn, 1)
aggDataLocSyd <- ConsData %>% filter(location == "Sydney")
aggDataLocSyd <- aggDataLocSyd %>% group_by(skill) %>% summarise(totalCount=sum(count))
aggDataLocSyd <- aggDataLocSyd %>% arrange(desc(totalCount))
aggDataLocSyd <- head(aggDataLocSyd, 1)
ggplot() +
geom_bar(data=aggDataLocNY, aes(x=skill, y=totalCount, fill='NY'), stat = "identity", position=position_dodge()) + geom_bar(data=aggDataLocSF, aes(x=skill, y=totalCount, fill='SF'), stat = "identity", position=position_dodge()) + geom_bar(data=aggDataLocLdn, aes(x=skill, y=totalCount, fill='Ldn'), stat = "identity", position=position_dodge()) + geom_bar(data=aggDataLocSyd, aes(x=skill, y=totalCount, fill='Syd'), stat = "identity", position=position_dodge()) + scale_fill_manual(values=c("#999999", "#E69F00", "#A69F55", "444444"), labels=c("NY", "SF", "Ldn", "Syd")) #+ scale_fill_brewer(palette="Paired")