Team:
Shyam Kumar Voleti
Sudhanshu Sharma
Nikhil Dandapanthula
Xueqing Wang
Priyanka Bodepudi
******* Step by step approach************
Step 1: Load the Data
Step 2: Data Diagnostics, Basic Univariate Analysis
Step 3: Distribution of Each Variable
Step 4: Correlation Matrix
Step 5: Conclusion
******************************************************
    rm(list = ls())
    if(!require(readxl)) {install.packages("readxl")}
## Loading required package: readxl
    if(!require(ggplot2)) {install.packages("ggplot2")}
## Loading required package: ggplot2
    if(!require(corrplot)) {install.packages("corrplot")}
## Loading required package: corrplot
## Warning: package 'corrplot' was built under R version 3.5.1
## corrplot 0.84 loaded
    if(!require(vcd)) {install.packages("vcd")}
## Loading required package: vcd
## Warning: package 'vcd' was built under R version 3.5.1
## Loading required package: grid
    if(!require(vcdExtra)) {install.packages("vcdExtra")}
## Loading required package: vcdExtra
## Warning: package 'vcdExtra' was built under R version 3.5.1
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.5.1
#install.packages("readxl")
#install.packages("ggplot2")
# Step 1: Load the Data
library(readxl)
setwd("D:/HU/Sem 4/545-91 Analytical Methods II/LABS/LAB 1")
surveydata <- read_excel("SatisfactionSurveylab1v1.xlsx")
View(surveydata)

# Step 2: Data Diagnostics, Basic Univariate Analysis 
str(surveydata)
## Classes 'tbl_df', 'tbl' and 'data.frame':    40 obs. of  7 variables:
##  $ How long have you  been at your current job?                   : chr  "1-3 years" "<  1 year" "<  1 year" "1-3 years" ...
##  $ Are you satisfied with your current work environment?          : num  5 4 3 4 5 3 4 5 3 4 ...
##  $ How satisfied are with your compensation and employee benefits?: num  4 2 3 3 5 3 4 4 2 3 ...
##  $ How long is your commute to work?                              : chr  "<10 minutes" "30 minutes-1 hour" "10-20 minutes" "10-20 minutes" ...
##  $ How supportive is your supervisor?                             : num  5 4 4 4 5 3 4 4 1 4 ...
##  $ Would you recommend your workplace to others?                  : num  5 3 4 4 5 3 4 4 4 5 ...
##  $ How was this survey?                                           : num  5 3 4 3 5 3 5 4 3 5 ...
# As it can be seen that the survey was taken across 7 different questions and 40 observations are collected 

summary(surveydata)
##  How long have you  been at your current job?
##  Length:40                                   
##  Class :character                            
##  Mode  :character                            
##                                              
##                                              
##                                              
##  Are you satisfied with your current work environment?
##  Min.   :0.000                                        
##  1st Qu.:1.000                                        
##  Median :3.000                                        
##  Mean   :2.975                                        
##  3rd Qu.:4.000                                        
##  Max.   :5.000                                        
##  How satisfied are with your compensation and employee benefits?
##  Min.   :0.00                                                   
##  1st Qu.:1.00                                                   
##  Median :2.00                                                   
##  Mean   :2.25                                                   
##  3rd Qu.:3.00                                                   
##  Max.   :5.00                                                   
##  How long is your commute to work? How supportive is your supervisor?
##  Length:40                         Min.   :0.00                      
##  Class :character                  1st Qu.:1.75                      
##  Mode  :character                  Median :4.00                      
##                                    Mean   :3.05                      
##                                    3rd Qu.:4.00                      
##                                    Max.   :5.00                      
##  Would you recommend your workplace to others? How was this survey?
##  Min.   :0.000                                 Min.   :0.00        
##  1st Qu.:1.000                                 1st Qu.:3.00        
##  Median :3.000                                 Median :4.00        
##  Mean   :2.875                                 Mean   :3.45        
##  3rd Qu.:4.250                                 3rd Qu.:5.00        
##  Max.   :5.000                                 Max.   :5.00
#Summary statistics provides univariate analysis, Gives Min, Max, Quantiles information.

# Step 3: Distribution of Each Variable 
library(ggplot2)

ggplot(data=surveydata, aes(x=surveydata$`How long have you  been at your current job?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How long have you  been at your current job?") + labs( x = "How long have you  been at your current job?", y= "Count")+ labs(colour = "How long have you  been at your current job?")

# It can be seen that most of the observations are in 1-3 years and 3-5 years from which we can conclude that majority of the survey takers are experienced employees

ggplot(data=surveydata, aes(x=surveydata$`Are you satisfied with your current work environment?`)) + geom_bar(stat="Count")+ggtitle("Are you satisfied with your current work environment?") + labs( x = "Are you satisfied with your current work environment?", y= "Count")+ labs(colour = "Are you satisfied with your current work environment?")

# It can be seen that data is well distributed between satisfied and not satisfied with the current work environment but 45% of the data is skewed towards satisfied(Rated 4 or 5).


ggplot(data=surveydata, aes(x=surveydata$`How satisfied are with your compensation and employee benefits?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How satisfied are with your compensation and employee benefits?") + labs( x = "How satisfied are with your compensation and employee benefits?", y= "Count")+ labs(colour = "How satisfied are with your compensation and employee benefits?")

# It can be seen that data is well distributed between satisfied and not satisfied with compensation and 65% of the data is skewed towards not satisfied/neutral (Rated 1,2 or 3).

ggplot(data=surveydata, aes(x=surveydata$`How long is your commute to work?`)) + geom_bar(stat="Count")+ggtitle("How long is your commute to work?") + labs( x = "How long is your commute to work?", y= "Count")+ labs(colour = "How long is your commute to work?")

# It can be seen that most of the observations are in 10 minutes to 1 hour from which we can conclude that majority of the survey takers spend an average of 30 minutes on commute to work.

ggplot(data=surveydata, aes(x=surveydata$`How supportive is your supervisor?`)) +
geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How supportive is your supervisor?") + labs( x = "How supportive is your supervisor?", y= "Count")+ labs(colour = "How supportive is your supervisor?")

# It can be seen that data is well distributed between supportive and not supportive with the supervisor but 55% of the data is skewed towards supportive(Rated 4 or 5).

ggplot(data=surveydata, aes(x=surveydata$`Would you recommend your workplace to others?`)) + geom_bar(stat="Count")+ggtitle("Would you recommend your workplace to others?") + labs( x = "Would you recommend your workplace to others?", y= "Count")+ labs(colour = "Would you recommend your workplace to others?")

# It can be seen that most of the observations are in rating 4 or 5 from which we can conclude that majority of the survey takers would recommened their workplace to there friends/others.

ggplot(data=surveydata, aes(x=surveydata$`How was this survey?`)) + geom_bar(stat="Count", fill = "#FF6666")+ggtitle("How was this survey?") + labs( x = "How was this survey?", y= "Count")+ labs(colour = "How was this survey?")

# It can be seen that most of the observations are in rating 3,4 or 5 from which we can conclude that majority of the survey takers were satisfied/happy with the survey.


# Step 4: Correlation Matrix
# conversion to factors
surveydata$`How long have you  been at your current job?` <- as.numeric(surveydata$`How long have you  been at your current job?`)
## Warning: NAs introduced by coercion
surveydata$`Are you satisfied with your current work environment?`<- as.numeric(surveydata$`Are you satisfied with your current work environment?`)
surveydata$`How satisfied are with your compensation and employee benefits?`<- as.numeric(surveydata$`How satisfied are with your compensation and employee benefits?`)
surveydata$`How long is your commute to work?`<- as.numeric(surveydata$`How long is your commute to work?`)
## Warning: NAs introduced by coercion
surveydata$`How supportive is your supervisor?`<- as.numeric(surveydata$`How supportive is your supervisor?`)
surveydata$`Would you recommend your workplace to others?`<- as.numeric(surveydata$`Would you recommend your workplace to others?`)
surveydata$`How was this survey?`<- as.numeric(surveydata$`How was this survey?`)


Corr <- cor(surveydata)
#install.packages("corrplot")
library(corrplot)
corrplot(Corr, type = "lower")

From the graph we can conclude that there is a stong correlation between “Are you satisfied with your current work environment” and “How satisfied are with your compensation and employee benefits” variables
From the graph we can also conclude that there is a stong correlation between “Are you satisfied with your current work environment” and “How supportive is your supervisor” variables
From the graph we can conclude that there is a stong correlation between How supportive is your supervisor" and “How satisfied are with your compensation and employee benefits” variables

Step 5: Conclusion/Observation

Majority of the survey takers are experienced employees
Majority of the survey takers are distributed across satisfied and not satisfied with the current work environment
Majority of the survey takers are distributed across satisfied and not satisfied with compensation.
Majority of the survey takers spend an average of 30 minutes on commute to work.
Majority of the survey takers are well distributed across supportive and not supportive with the supervisor
Majority of the survey takers would recommened their workplace to there friends/others.
Majority of the survey takers were satisfied/happy with the survey.
Stong correlation is seen between “Are you satisfied with your current work environment” and “Compensation and employee benefits” variables
Stong correlation is seen between “Are you satisfied with your current work environment” and “How supportive is your supervisor” variables
Stong correlation is seen between “How supportive is your supervisor” and “Compensation and employee benefits” variables