Stackoverflow recently released the anonymized results of their 2019 annual developer survey. The goal of this project is to analyse various trends for developers worldwide.
The dataset has 90,000 rows. Our goal is to filter R responses so that we can drill down further and analyze the trends related to R developers.
#reading necessary libraries
require(tidyverse)
require(countrycode)
require(highcharter)
require(ggplot2)
#reading the data
data <- read_csv("data/survey_results_public.csv")
attach(data)
#Let's check who all are developers
devTable <- as.data.frame(table(data$MainBranch))
Let’s create a braplot of the profession of the respondents.
hchart(devTable,hcaes(x=Var1,y=Freq),type="column",name="Count",color="#9B6ED8") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot of occupation of participants",align="center") %>%
hc_add_theme(hc_theme_elementary())
So we can notice that that most of the participants are developers.
Now let’s see how many developers have developement just as a hobby.
table(data$Hobbyist)
##
## No Yes
## 17626 71257
table(OpenSourcer)
## OpenSourcer
## Less than once a month but more than once per year
## 20561
## Less than once per year
## 24972
## Never
## 32295
## Once a month or more often
## 11055
Let’s check the Employment of the respondents.
round(prop.table(table(Employment))*100,2)
## Employment
## Employed full-time
## 73.92
## Employed part-time
## 5.13
## Independent contractor, freelancer, or self-employed
## 9.76
## Not employed, and not looking for work
## 4.36
## Not employed, but looking for work
## 6.42
## Retired
## 0.41
We can notice that 74 % of the respondents were Full time employees.
Now let’s check to which country the respondents belonged.
#Creating a data frame of the Country and its frequency.
countryDf <- data %>% count(Country) %>%
top_n(10)
#arranging in descending order of count
countryDf<-arrange(countryDf,desc(n))
hchart(countryDf,hcaes(x=Country,y=n),type="column",name="Count") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot of Country of respondents",align="center") %>%
hc_add_theme(hc_theme_elementary())
From the above plot we can notice that most of the respondents were from US,India followed by Germany.
table(Student)
## Student
## No Yes, full-time Yes, part-time
## 65816 15769 5429
We can notice that most of the respondents are not Students.
To achieve this let’s create a Dataframe of countries with highest number of Student respondents.
StudentDf <- data %>% filter(Student=="Yes, full-time") %>%
count(Country) %>%
top_n(30) %>%
arrange(desc(n))
StudentDf
So most of the Students were from India,not from US.
Let’s check the Education level for the major countries like India and US.
EdlevelIndia<- data %>% filter(Country == "India") %>%
group_by(EdLevel) %>%
summarise(count=n()) %>%
arrange(desc(count))
#ignoring NA values
EdlevelIndia <- na.omit(EdlevelIndia)
#let's create a Barplot
hchart(EdlevelIndia,hcaes(x=EdlevelIndia$EdLevel,y=EdlevelIndia$count),type="column",color="red",name="Count") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot Education level of Indian Respondents",align="center") %>%
hc_add_theme(hc_theme_elementary())
Let’s check the results for USA.
EdlevelUS<- data %>% filter(Country == "United States") %>%
group_by(EdLevel) %>%
summarise(count=n()) %>%
arrange(desc(count))
#ignoring NA values
EdlevelIndia <- na.omit(EdlevelIndia)
# let's create a Barplot
hchart(EdlevelUS,hcaes(x=EdlevelUS$EdLevel,y=EdlevelUS$count),type="column",color="Green",name="Count") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot Education level of American Respondents",align="center") %>%
hc_add_theme(hc_theme_elementary())
Let’s check the results for Germany.
Edlevelger<- data %>% filter(Country == "Germany") %>%
group_by(EdLevel) %>%
summarise(count=n()) %>%
arrange(desc(count))
#let's create a Barplot
hchart(Edlevelger,hcaes(x=Edlevelger$EdLevel,y=Edlevelger$count),type="column",color="purple",name="Count") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot Education level of german Respondents",align="center") %>%
hc_add_theme(hc_theme_elementary())
table(UndergradMajor)
## UndergradMajor
## A business discipline (ex. accounting, finance, marketing)
## 1841
## A health science (ex. nursing, pharmacy, radiology)
## 323
## A humanities discipline (ex. literature, history, philosophy)
## 1571
## A natural science (ex. biology, chemistry, physics)
## 3232
## A social science (ex. anthropology, psychology, political science)
## 1352
## Another engineering discipline (ex. civil, electrical, mechanical)
## 6222
## Computer science, computer engineering, or software engineering
## 47214
## Fine arts or performing arts (ex. graphic design, music, studio art)
## 1233
## I never declared a major
## 976
## Information systems, information technology, or system administration
## 5253
## Mathematics or statistics
## 2975
## Web development or web design
## 3422
As expected most of them are Computer Science Graduates followed by Another Engineering disclipine and Information Systems.
Country and the under-graduate programmes in which the participants were enrolled.
#for India
UGIndia<- data %>% filter(Country == "India") %>%
group_by(UndergradMajor) %>%
summarise(count=n()) %>%
arrange(desc(count))
UGIndia<-na.omit(UGIndia)
UGIndia
#for US
UGUS<- data %>% filter(Country == "United States") %>%
group_by(UndergradMajor) %>%
summarise(count=n()) %>%
arrange(desc(count))
UGUS<-na.omit(UGUS)
UGUS
#for Germany
UGGermany<- data %>% filter(Country == "Germany") %>%
group_by(UndergradMajor) %>%
summarise(count=n()) %>%
arrange(desc(count))
UGGermany<-na.omit(UGGermany)
UGGermany
We see almost similar trends for all the 3 major countries.
#keeping only the respondents from top 10 countries.
DevTypeDf<- data %>% filter(Country %in% countryDf$Country) %>%
select(DevType) %>%
group_by(DevType) %>%
#creating a new column which has the count
summarise(Count = n()) %>%
filter(Count > 100) %>%
arrange(desc(Count))
DevTypeDf<-na.omit(DevTypeDf)
DevTypeDf
So most of the respondents from the top 10 countries were Full stack developers and backend developers.
DevIndia<- data %>% filter(Country == "India") %>%
select(DevType) %>%
group_by(DevType) %>%
summarise(Count=n()) %>%
arrange(desc(Count)) %>%
top_n(10)
DevUs<- data %>% filter(Country == "United States") %>%
select(DevType) %>%
group_by(DevType) %>%
summarise(Count=n()) %>%
arrange(desc(Count)) %>%
top_n(10)
DevGer<- data %>% filter(Country == "Germany") %>%
select(DevType) %>%
group_by(DevType) %>%
summarise(Count=n()) %>%
arrange(desc(Count)) %>%
top_n(10)
DevGer<- data %>% filter(Country == "Germany") %>%
select(DevType) %>%
group_by(DevType) %>%
summarise(Count=n()) %>%
arrange(desc(Count)) %>%
top_n(10)
Creating the Plot:
hchart(DevIndia,hcaes(x=DevIndia$DevType,y=DevIndia$Count),type="column") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot of top developers in India",align="center") %>%
hc_add_theme(hc_theme_elementary())
hchart(DevUs,hcaes(x=DevUs$DevType,y=DevUs$Count),type="column") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot of top developers in US",align="center") %>%
hc_add_theme(hc_theme_elementary())
hchart(DevGer,hcaes(x=DevGer$DevType,y=DevGer$Count),type="column") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Barplot of top developers in Germany",align="center") %>%
hc_add_theme(hc_theme_elementary())
This is a really interesting feature to analyze. We can analyze which specific Job roles were most satisfied or in the list of top 10 countries how many respondents were satisfied with their Jobs etc.
table(JobSat)
## JobSat
## Neither satisfied nor dissatisfied Slightly dissatisfied
## 8720 10752
## Slightly satisfied Very dissatisfied
## 24207 4857
## Very satisfied
## 22452
Let’s create a helper function
# Drilldown(dataset,Filter_by_var,filter_by_val,group_by_var)
DrillDown <- function(data,fil_var,val,gp_var) {
attach(data)
#Initializing new columns for the dataframe data
data["fil_var"]<-NA
data["gp_var"]<-NA
#copying the values of args to the initialized columns which would be used in the data aggregation below.
data$fil_var<-fil_var
data$gp_var<-gp_var
df <- data %>% filter(fil_var==val) %>%
group_by(gp_var) %>%
summarise(Count=n()) %>%
arrange(desc(Count))
df<-na.omit(df)
names(df)[1] <- c("Satisfaction")
df
}
# !diagnostics off
JobSatIndia <- DrillDown(data,Country,"India",JobSat)
JobSatUS <- DrillDown(data,Country,"United States",JobSat)
JobSatUk <- DrillDown(data,Country,"Argentina",JobSat)
JobSatChina <- DrillDown(data,Country,"China",JobSat)
JobSatJapan <- DrillDown(data,Country,"Japan",JobSat)
#Checking the career Satisfaction of different countries
CarSatIndia <- DrillDown(data,Country,"India",CareerSat)
CarSatUS <- DrillDown(data,Country,"United States",CareerSat)
CarSatUk <- DrillDown(data,Country,"United Kingdom",CareerSat)
CarSatChina <- DrillDown(data,Country,"China",CareerSat)
CarSatJapan <- DrillDown(data,Country,"Japan",CareerSat)
For almost all the countries the Job and career satisfactions were same. Most of the people were SLightly Satisfied .
Whereas in Country like US, most of the respondents were Very Satisfied.
Only China had most of the respondents which were Slightly Satisfied and Slightly Dissatisfied .
TOPJF <- data %>% group_by(JobFactors) %>%
summarise(Count=n()) %>%
arrange(desc(Count)) %>%
top_n(11)
TOPJF <- na.omit(TOPJF)
hchart(TOPJF,hcaes(x=TOPJF$JobFactors,y=TOPJF$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most",align="center") %>%
hc_add_theme(hc_theme_elementary())
JfIndia <- DrillDown(data,Country,"India",JobFactors)
JfIndia <- JfIndia %>%
top_n(10)
hchart(JfIndia,hcaes(x=JfIndia$Satisfaction,y=JfIndia$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for Indians",align="center") %>%
hc_add_theme(hc_theme_elementary())
JfUk <- DrillDown(data,Country,"United States",JobFactors)
JfUk <- JfUk %>%
top_n(10)
hchart(JfUk,hcaes(x=JfUk$Satisfaction,y=JfUk$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for UK",align="center") %>%
hc_add_theme(hc_theme_elementary())
So in India respondents mostly cared for factors such as ‘Languages, frameworks, and other technologies I’d be working with;Office environment or company culture;Opportunities for professional development’
JfUS <- DrillDown(data,Country,"United States",JobFactors)
JfUS <- JfUS %>%
top_n(10)
hchart(JfUS,hcaes(x=JfUS$Satisfaction,y=JfUS$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for US",align="center") %>%
hc_add_theme(hc_theme_elementary())
JfGer <- DrillDown(data,Country,"Germany",JobFactors)
JfGer <- JfGer %>%
top_n(10)
hchart(JfGer,hcaes(x=JfGer$Satisfaction,y=JfGer$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for Germany",align="center") %>%
hc_add_theme(hc_theme_elementary())
JfChina <- DrillDown(data,Country,"China",JobFactors)
JfChina <- JfChina %>%
top_n(10)
hchart(JfChina,hcaes(x=JfChina$Satisfaction,y=JfChina$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for China",align="center") %>%
hc_add_theme(hc_theme_elementary())
JfJapan <- DrillDown(data,Country,"Japan",JobFactors)
JfJapan <- na.omit(JfJapan) %>%
top_n(10)
hchart(JfJapan,hcaes(x=JfJapan$Satisfaction,y=JfJapan$Count),type="column",color="#FDE725") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Job Factors which matter the most for Japan",align="center") %>%
hc_add_theme(hc_theme_elementary())
mean(na.omit(WorkWeekHrs))
## [1] 42.1272
CountryWorkHrs<- data %>% select(Country,WorkWeekHrs) %>%
group_by(Country) %>%
summarise(Mean = mean(na.omit(WorkWeekHrs)))
# Finding the top 20 countries with highest average weekly work hours.
TopCountryWork<- CountryWorkHrs %>% arrange(desc(CountryWorkHrs$Mean)) %>%
top_n(20)
hchart(TopCountryWork,hcaes(x=TopCountryWork$Country,y=TopCountryWork$Mean),type="column",color="#ADE554") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Highest weekly work hours",align="center") %>%
hc_add_theme(hc_theme_elementary())
So the average working hours were 42 hours.
We can notice that the Country where the weekly work hours were highest is Finland followed by Norway.
WorkChallTop <- data %>% group_by(WorkChallenge) %>%
select(WorkChallenge) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
top_n(15)
WorkChallTop <- na.omit(WorkChallTop)
hchart(WorkChallTop,hcaes(x=WorkChallTop$WorkChallenge,y=WorkChallTop$Count),type="column",color="#FEA554") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Top Work challenges",align="center") %>%
hc_add_theme(hc_theme_elementary())
The top Work challenges included things like :
TopLang <- data %>% group_by(LanguageWorkedWith) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
top_n(20)
TopLang<-na.omit(TopLang)
hchart(TopLang,hcaes(x=TopLang$LanguageWorkedWith,y=TopLang$Count),type="column",color="#EAF253") %>%
hc_exporting(enabled = TRUE) %>%
hc_title(text="Top Programming languages used By developers",align="center") %>%
hc_add_theme(hc_theme_elementary())
So we can notice that the most used languages are HTML/CSS;JavaScript;PHP;SQL;C#.