#the dataset
emp_id <- c(101, 102, 103, 104, 105, 106)
emp_name <- c("john", "anna", "mary", "ryan", "peter", "ruth")
salary <- c(65000, 80000, 54000, 75000, 95000, 56000)
start_date <- as.Date(c("2022-01-01", "2023-09-23", "2024-11-22", "2022-05-11", "2019-03-27", "2020-03-02"))
gender <- c("male", "female", "female", "male", "male", "female")
emp.data <- data.frame(emp_id, emp_name, salary, start_date, gender)
#Displaying the dataset
print(emp.data)
## emp_id emp_name salary start_date gender
## 1 101 john 65000 2022-01-01 male
## 2 102 anna 80000 2023-09-23 female
## 3 103 mary 54000 2024-11-22 female
## 4 104 ryan 75000 2022-05-11 male
## 5 105 peter 95000 2019-03-27 male
## 6 106 ruth 56000 2020-03-02 female
#Summary
print(summary(emp.data))
## emp_id emp_name salary start_date
## Min. :101.0 Length:6 Min. :54000 Min. :2019-03-27
## 1st Qu.:102.2 Class :character 1st Qu.:58250 1st Qu.:2020-08-16
## Median :103.5 Mode :character Median :70000 Median :2022-03-07
## Mean :103.5 Mean :70833 Mean :2022-01-23
## 3rd Qu.:104.8 3rd Qu.:78750 3rd Qu.:2023-05-21
## Max. :106.0 Max. :95000 Max. :2024-11-22
## gender
## Length:6
## Class :character
## Mode :character
##
##
##
#Structure of the data frame
str(emp.data)
## 'data.frame': 6 obs. of 5 variables:
## $ emp_id : num 101 102 103 104 105 106
## $ emp_name : chr "john" "anna" "mary" "ryan" ...
## $ salary : num 65000 80000 54000 75000 95000 56000
## $ start_date: Date, format: "2022-01-01" "2023-09-23" ...
## $ gender : chr "male" "female" "female" "male" ...
#High earners
high_earners <- emp.data[emp.data$salary >= 75000, ]
print(high_earners)
## emp_id emp_name salary start_date gender
## 2 102 anna 80000 2023-09-23 female
## 4 104 ryan 75000 2022-05-11 male
## 5 105 peter 95000 2019-03-27 male
#Ordering the records in descending order
emp.data_sorted_desc <- emp.data[order(-emp.data$salary), ]
print(emp.data_sorted_desc)
## emp_id emp_name salary start_date gender
## 5 105 peter 95000 2019-03-27 male
## 2 102 anna 80000 2023-09-23 female
## 4 104 ryan 75000 2022-05-11 male
## 1 101 john 65000 2022-01-01 male
## 6 106 ruth 56000 2020-03-02 female
## 3 103 mary 54000 2024-11-22 female
# Bar chart of employee salaries
library(ggplot2)
ggplot(emp.data, aes(x = emp_name, y = salary)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(title = "Employee Salaries", x = "Employee Name", y = "Salary")
