# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Example dataset
set.seed(123)  # For reproducibility
data <- data.frame(
  Gender = sample(c("Male", "Female", "Other"), size = 1000, replace = TRUE, prob = c(0.51, 0.32, 0.17)),
  Location = sample(c("India", "United States", "Vietnam", "Brazil", "Philippines", "Indonesia", "Pakistan", "Japan", "Mexico", "Germany"),
                    size = 1000, replace = TRUE, prob = c(0.23, 0.17, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.07, 0.06)),
  Profession = sample(c("Students", "Waiting Staff", "Labor/Worker", "Driver", "Engineer", "Cashier", "Manager", "Artist", "Teacher"),
                      size = 1000, replace = TRUE, prob = c(0.25, 0.19, 0.18, 0.11, 0.06, 0.06, 0.05, 0.05, 0.04))
)

# View data
head(data)
##   Gender      Location   Profession
## 1   Male United States     Students
## 2 Female     Indonesia     Students
## 3   Male         India     Students
## 4  Other         Japan Labor/Worker
## 5  Other         Japan Labor/Worker
## 6   Male   Philippines Labor/Worker
# Frequency table for Gender
gender_count <- data %>%
  group_by(Gender) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round((Count / sum(Count)) * 100, 2))
# Bar Chart for Gender
ggplot(data = gender_count, aes(x = Gender, y = Count, fill = Gender)) +
  geom_bar(stat = "identity") +
  labs(title = "Frequency of Gender", x = "Gender", y = "Count") +
  theme_minimal()

# Donut Chart for Gender
gender_count <- gender_count %>%
  mutate(Label = paste0(Gender, " (", Percentage, "%)"))

ggplot(data = gender_count, aes(x = "", y = Percentage, fill = Gender)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  theme_void() +
  labs(title = "Frequency of Gender") +
  geom_text(aes(label = Label), position = position_stack(vjust = 0.5))

# Frequency table for Location
location_count <- data %>%
  group_by(Location) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round((Count / sum(Count)) * 100, 2))
# Frequency table for Profession
profession_count <- data %>%
  group_by(Profession) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round((Count / sum(Count)) * 100, 2))
# Bar Chart for Profession
ggplot(data = profession_count, aes(x = Profession, y = Count, fill = Profession)) +
  geom_bar(stat = "identity") +
  labs(title = "Frequency of Profession", x = "Profession", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Donut Chart for Profession
profession_count <- profession_count %>%
  mutate(Label = paste0(Profession, " (", Percentage, "%)"))

ggplot(data = profession_count, aes(x = "", y = Percentage, fill = Profession)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y") +
  theme_void() +
  labs(title = "Frequency of Profession") +
  geom_text(aes(label = Label), position = position_stack(vjust = 0.5))