******************************************************
rm(list = ls())
if(!require(readxl)) {install.packages("readxl")}
## Loading required package: readxl
if(!require(ggplot2)) {install.packages("ggplot2")}
## Loading required package: ggplot2
if(!require(corrplot)) {install.packages("corrplot")}
## Loading required package: corrplot
## Warning: package 'corrplot' was built under R version 3.5.1
## corrplot 0.84 loaded
if(!require(vcd)) {install.packages("vcd")}
## Loading required package: vcd
## Warning: package 'vcd' was built under R version 3.5.1
## Loading required package: grid
if(!require(vcdExtra)) {install.packages("vcdExtra")}
## Loading required package: vcdExtra
## Warning: package 'vcdExtra' was built under R version 3.5.1
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.5.1
#install.packages("readxl")
#install.packages("ggplot2")
# Step 1: Load the Data
library(readxl)
setwd("D:/HU/Sem 4/545-91 Analytical Methods II/LABS/LAB 1")
surveydata <- read_excel("SatisfactionSurveylab1v1.xlsx")
View(surveydata)
# Step 2: Data Diagnostics, Basic Univariate Analysis
str(surveydata)
## Classes 'tbl_df', 'tbl' and 'data.frame': 40 obs. of 7 variables:
## $ How long have you been at your current job? : chr "1-3 years" "< 1 year" "< 1 year" "1-3 years" ...
## $ Are you satisfied with your current work environment? : num 5 4 3 4 5 3 4 5 3 4 ...
## $ How satisfied are with your compensation and employee benefits?: num 4 2 3 3 5 3 4 4 2 3 ...
## $ How long is your commute to work? : chr "<10 minutes" "30 minutes-1 hour" "10-20 minutes" "10-20 minutes" ...
## $ How supportive is your supervisor? : num 5 4 4 4 5 3 4 4 1 4 ...
## $ Would you recommend your workplace to others? : num 5 3 4 4 5 3 4 4 4 5 ...
## $ How was this survey? : num 5 3 4 3 5 3 5 4 3 5 ...
# As it can be seen that the survey was taken across 7 different questions and 40 observations are collected
summary(surveydata)
## How long have you been at your current job?
## Length:40
## Class :character
## Mode :character
##
##
##
## Are you satisfied with your current work environment?
## Min. :0.000
## 1st Qu.:1.000
## Median :3.000
## Mean :2.975
## 3rd Qu.:4.000
## Max. :5.000
## How satisfied are with your compensation and employee benefits?
## Min. :0.00
## 1st Qu.:1.00
## Median :2.00
## Mean :2.25
## 3rd Qu.:3.00
## Max. :5.00
## How long is your commute to work? How supportive is your supervisor?
## Length:40 Min. :0.00
## Class :character 1st Qu.:1.75
## Mode :character Median :4.00
## Mean :3.05
## 3rd Qu.:4.00
## Max. :5.00
## Would you recommend your workplace to others? How was this survey?
## Min. :0.000 Min. :0.00
## 1st Qu.:1.000 1st Qu.:3.00
## Median :3.000 Median :4.00
## Mean :2.875 Mean :3.45
## 3rd Qu.:4.250 3rd Qu.:5.00
## Max. :5.000 Max. :5.00
#Summary statistics provides univariate analysis, Gives Min, Max, Quantiles information.
# Step 3: Distribution of Each Variable
library(ggplot2)
ggplot(data=surveydata, aes(x=surveydata$`How long have you been at your current job?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How long have you been at your current job?") + labs( x = "How long have you been at your current job?", y= "Count")+ labs(colour = "How long have you been at your current job?")

# It can be seen that most of the observations are in 1-3 years and 3-5 years from which we can conclude that majority of the survey takers are experienced employees
ggplot(data=surveydata, aes(x=surveydata$`Are you satisfied with your current work environment?`)) + geom_bar(stat="Count")+ggtitle("Are you satisfied with your current work environment?") + labs( x = "Are you satisfied with your current work environment?", y= "Count")+ labs(colour = "Are you satisfied with your current work environment?")

# It can be seen that data is well distributed between satisfied and not satisfied with the current work environment but 45% of the data is skewed towards satisfied(Rated 4 or 5).
ggplot(data=surveydata, aes(x=surveydata$`How satisfied are with your compensation and employee benefits?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How satisfied are with your compensation and employee benefits?") + labs( x = "How satisfied are with your compensation and employee benefits?", y= "Count")+ labs(colour = "How satisfied are with your compensation and employee benefits?")

# It can be seen that data is well distributed between satisfied and not satisfied with compensation and 65% of the data is skewed towards not satisfied/neutral (Rated 1,2 or 3).
ggplot(data=surveydata, aes(x=surveydata$`How long is your commute to work?`)) + geom_bar(stat="Count")+ggtitle("How long is your commute to work?") + labs( x = "How long is your commute to work?", y= "Count")+ labs(colour = "How long is your commute to work?")

# It can be seen that most of the observations are in 10 minutes to 1 hour from which we can conclude that majority of the survey takers spend an average of 30 minutes on commute to work.
ggplot(data=surveydata, aes(x=surveydata$`How supportive is your supervisor?`)) +
geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How supportive is your supervisor?") + labs( x = "How supportive is your supervisor?", y= "Count")+ labs(colour = "How supportive is your supervisor?")

# It can be seen that data is well distributed between supportive and not supportive with the supervisor but 55% of the data is skewed towards supportive(Rated 4 or 5).
ggplot(data=surveydata, aes(x=surveydata$`Would you recommend your workplace to others?`)) + geom_bar(stat="Count")+ggtitle("Would you recommend your workplace to others?") + labs( x = "Would you recommend your workplace to others?", y= "Count")+ labs(colour = "Would you recommend your workplace to others?")

# It can be seen that most of the observations are in rating 4 or 5 from which we can conclude that majority of the survey takers would recommened their workplace to there friends/others.
ggplot(data=surveydata, aes(x=surveydata$`How was this survey?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How was this survey?") + labs( x = "How was this survey?", y= "Count")+ labs(colour = "How was this survey?")

# It can be seen that most of the observations are in rating 3,4 or 5 from which we can conclude that majority of the survey takers were satisfied/happy with the survey.
# Step 4: Correlation Matrix
# conversion to factors
surveydata$`How long have you been at your current job?` <- as.numeric(surveydata$`How long have you been at your current job?`)
## Warning: NAs introduced by coercion
surveydata$`Are you satisfied with your current work environment?`<- as.numeric(surveydata$`Are you satisfied with your current work environment?`)
surveydata$`How satisfied are with your compensation and employee benefits?`<- as.numeric(surveydata$`How satisfied are with your compensation and employee benefits?`)
surveydata$`How long is your commute to work?`<- as.numeric(surveydata$`How long is your commute to work?`)
## Warning: NAs introduced by coercion
surveydata$`How supportive is your supervisor?`<- as.numeric(surveydata$`How supportive is your supervisor?`)
surveydata$`Would you recommend your workplace to others?`<- as.numeric(surveydata$`Would you recommend your workplace to others?`)
surveydata$`How was this survey?`<- as.numeric(surveydata$`How was this survey?`)
Corr <- cor(surveydata)
#install.packages("corrplot")
library(corrplot)
corrplot(Corr, type = "lower")
