How To access a column in R: We have a dataset of a survey containing multiple columns of information. We are interested in the isolating the satisfaction scores of the survey:

So, we need to extract the data from the Satisfaction column:

# Question 1 & 2
# Q1: Mean and Median of Satisfaction Ratings
# Q2: Find the average age of someone working in HR

# Extracts the satisfaction column from our data set creating vector now referred to as "Sat"
Sat<-SatisfactionSurvey$Satisfaction

# Creating a vector containing the ages of those in the HR Department
HR_Ages<- SatisfactionSurvey$Age[SatisfactionSurvey$Department == "HR"]

# Average age of those in the HR department
mean(HR_Ages)
## [1] 30
# Calculates the mean and median of the Satisfaction variable
mean(Sat)
## [1] 6.75
median(Sat)
## [1] 7
# Creates a histogram and boxplot displaying Satisfaction scores
hist(Sat,
     col = rainbow(5),
     border = "grey",
     main = "Satisfaction Distribution",
     xlab = "Satisfaction Score"
  )

boxplot(Sat,
        col = "lightsteelblue3",
        border = "black",
        main = "Boxplot of Satisfaction",
        ylab = "Satisfaction Score"
  )

Converting columns within the data set into matrices:

Determining the average satisfaction rating of each age group.

# Question 3

# Extracts the satisfaction from our data set creating vector now referred to as "Sat"
Sat <- SatisfactionSurvey

# Creates a matrix containing columns Age and Satisfaction scores
Age_Sat_Matrix <- as.matrix(Sat[, c("Age", "Satisfaction")])


Age_Mean_Sat <- tapply(Age_Sat_Matrix[, 2],
                       Age_Sat_Matrix[, 1],
                       mean,na.rm = TRUE)

# Creates data frame using our matrix
Age_Mean_Sat_df <- data.frame(
                   Age = as.numeric(names(Age_Mean_Sat)),
                   "Mean Satisfaction" = as.numeric(Age_Mean_Sat)
)

# Makes a cleaner version of our data
names(Age_Mean_Sat_df) <- c("Age", "Mean Satisfaction")

# Displays data frame
Age_Mean_Sat_df
##    Age Mean Satisfaction
## 1   22               7.0
## 2   25               8.0
## 3   26               5.0
## 4   27               7.0
## 5   28               6.5
## 6   29               8.0
## 7   30               9.0
## 8   31               7.0
## 9   33               8.0
## 10  34               6.0
## 11  35               6.0
## 12  38               6.0
## 13  39               7.0
## 14  40               4.0
## 15  42               8.0
## 16  45               5.0
## 17  46               7.0
## 18  50               7.0

Creating a matrix sorting named elements and a numberic value:

Determining the average satisfaction score per department:

# Question 4

# Keeps full data set
Sat <- SatisfactionSurvey

# Mean Satisfaction per Department
Department_Mean_Sat <- tapply(Sat$Satisfaction, 
                               Sat$Department,
                               mean,na.rm = TRUE)

# Convert to matrix with departments as columns
Department_Sat_Matrix <- matrix(Department_Mean_Sat,
                                nrow = 1,
                                dimnames = list(NULL, names(Department_Mean_Sat)))

# Transposes matrix into departments as rows
Department_Sat_Matrix_T <- t(Department_Sat_Matrix)

# Rename column name for clarity
colnames(Department_Sat_Matrix_T) <- c("Mean Satisfaction")

# View Matrix
Department_Sat_Matrix_T
##           Mean Satisfaction
## Finance                 6.4
## HR                      7.8
## IT                      6.4
## Marketing               6.4

Analyzing Data frames:

Summarizing the Gender and Department columns:

Filtering the Data frame to only measure the Satisfaction Scores of those aged 30-40:

# Question 5 & 6

# Keeps full data set
Sat <- SatisfactionSurvey

# Columns and Rows of data set
dim(Sat)
## [1] 20  5
nrow(Sat)
## [1] 20
ncol(Sat)
## [1] 5
# Converting values to factors
Sat$Gender <- as.factor(Sat$Gender)
Sat$Department <- as.factor(Sat$Department)

# Summary of columns
message("Demographics: Gender and Department Demographics")
## Demographics: Gender and Department Demographics
summary(Sat$Gender)
##     Female       Male Non-binary 
##          8          8          4
summary(Sat$Department)
##   Finance        HR        IT Marketing 
##         5         5         5         5
# Mean of filtered data
message("Average Satisfaction for individuals aged 30-40")
## Average Satisfaction for individuals aged 30-40
Filtered_Sat <- Sat[Sat$Age >= 30 & Sat$Age <= 40, ]
mean(Filtered_Sat$Satisfaction, na.rm = TRUE)
## [1] 6.666667

Histogram Distribution:

The range of different columns within the data set (Age, Satisfaction)

# Question 7 & 8

library(RColorBrewer)

# Keeps full data set
Sat <- SatisfactionSurvey

# Making the Histogram
hist(Sat$Age,
     col = brewer.pal(8, "Set2"),
     border = "grey",
     main = "Employee Age Distribution",
     xlab = "Age"
)

hist(Sat$Satisfaction,
     col = brewer.pal(7, "RdYlGn"),
     border = "grey",
     main = "Satisfaction Distribution",
     xlab = "Satisfaction Scores"
)

Distribution analysis through box plots:

Assessing data spread through box plot, determining IQR to identify outliers.

# Question 9-12

# Keeps full data set
Sat <- SatisfactionSurvey

# Satisfaction across Departments
boxplot(Satisfaction ~ Department,
        data = Sat,
        col = c("orchid3", "plum4", "lightpink3", "lightsteelblue"),
        border = "grey",
        main = "Satisfaction Ratings by Department",
        xlab = "Department",
        ylab = "Satisfaction Score"
)

#Age Distribution across Gender
boxplot(Sat$Age ~ Sat$Gender,
        data = Sat,
        col = c("pink2", "skyblue2", "springgreen2"),
        border = "grey",
        main = "Different Age Range by Gender",
        xlab = "Gender",
        ylab = "Ages"
)

# Overall Satisfaction Scores
boxplot(Sat$Satisfaction,
        col = "lightsteelblue3",
        border = "black",
        main = "Boxplot of Satisfaction",
        ylab = "Satisfaction Score"
)

# Explaining findings- overall satisfaction
message("IQR and Outliers")
## IQR and Outliers
cat("Satisfaction Scores:",
    "\nIQR:", IQR(Sat$Satisfaction, na.rm = TRUE),
    "\nQ1: 6, Q3: 8",
    "\nRange: 3-11")
## Satisfaction Scores: 
## IQR: 2 
## Q1: 6, Q3: 8 
## Range: 3-11
# Explaining Satisfaction per Department
cat("\nSatisfaction Scores per Department:",
    "\nIQR per Department:\n")
## 
## Satisfaction Scores per Department: 
## IQR per Department:
# breakdown per department
tapply(Sat$Satisfaction, Sat$Department, IQR, na.rm = TRUE)
##   Finance        HR        IT Marketing 
##         1         1         1         2
cat("Ranges per Department:",
    "\nFinance & IT : 4.5-8.5",
    "\nHR: 5.5-9.5",
    "\nMarketing: 2-9")
## Ranges per Department: 
## Finance & IT : 4.5-8.5 
## HR: 5.5-9.5 
## Marketing: 2-9