This document explores factors related to unemployment fluctuations in Malaysia.
# Load necessary libraries
# Set CRAN mirror
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Install and load necessary packages
if (!requireNamespace("GGally", quietly = TRUE)) {
install.packages("GGally")
}
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(GGally)
## Loading required package: ggplot2
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load data
malaysia_data <- read.csv('Malasiya_Data.csv')
# View structure
str(malaysia_data)
## 'data.frame': 21 obs. of 16 variables:
## $ Year : int 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 ...
## $ Population : num 24030 24542 25038 25542 26046 ...
## $ Labour_force : num 9699 9886 10240 10346 10413 ...
## $ Employed : num 9357 9543 9870 9980 10045 ...
## $ Working_less_than_30_hours : num 525 517 529 599 484 ...
## $ Unemployed : num 342 344 370 367 368 ...
## $ Outside_labour._force : num 5240 5474 5459 5730 6048 ...
## $ Labour_force_participation.rate: num 64.9 64.4 65.2 64.4 63.3 63.1 63.2 62.6 62.9 63.7 ...
## $ Unemployment_rate : num 3.5 3.5 3.6 3.5 3.5 3.3 3.2 3.3 3.7 3.3 ...
## $ GDP : num 0.518 5.391 5.788 6.783 5.332 ...
## $ Trade : num 203 199 194 210 204 ...
## $ FDI_inflows : num 0.597 3.166 2.921 3.508 2.734 ...
## $ FDI_outflows : num 0.288 1.878 1.919 1.453 2.042 ...
## $ net_migration : int 198361 224999 226003 244359 245376 251101 247032 217538 194571 114491 ...
## $ Inflation_Rate : num 1.42 1.81 1.09 1.42 2.98 ...
## $ Dollar_Exchange_rate : num 3.8 3.8 3.8 3.8 3.79 ...
# Summary statistics
summary(malaysia_data)
## Year Population Labour_force Employed
## Min. :2001 Min. :24030 Min. : 9699 Min. : 9357
## 1st Qu.:2006 1st Qu.:26550 1st Qu.:10629 1st Qu.:10275
## Median :2011 Median :29062 Median :12741 Median :12352
## Mean :2011 Mean :28922 Mean :12736 Mean :12296
## 3rd Qu.:2016 3rd Qu.:31634 3rd Qu.:14668 3rd Qu.:14164
## Max. :2021 Max. :32581 Max. :15797 Max. :15074
## Working_less_than_30_hours Unemployed Outside_labour._force
## Min. :337.6 Min. :342.4 Min. :5240
## 1st Qu.:471.9 1st Qu.:368.1 1st Qu.:6205
## Median :511.3 Median :404.4 Median :6821
## Mean :506.0 Mean :439.9 Mean :6565
## 3rd Qu.:534.8 3rd Qu.:503.3 3rd Qu.:7023
## Max. :645.8 Max. :733.0 Max. :7226
## Labour_force_participation.rate Unemployment_rate GDP
## Min. :62.60 Min. :2.900 Min. :-5.534
## 1st Qu.:63.70 1st Qu.:3.200 1st Qu.: 4.450
## Median :65.20 Median :3.300 Median : 5.294
## Mean :65.73 Mean :3.433 Mean : 4.289
## 3rd Qu.:67.90 3rd Qu.:3.500 3rd Qu.: 5.788
## Max. :68.70 Max. :4.600 Max. : 7.425
## Trade FDI_inflows FDI_outflows net_migration
## Min. :116.8 Min. :0.05669 Min. :0.2876 Min. : 46165
## 1st Qu.:131.4 1st Qu.:2.73439 1st Qu.:1.8783 1st Qu.: 80603
## Median :154.9 Median :3.16612 Median :3.3517 Median :114491
## Mean :160.9 Mean :3.17221 Mean :3.4016 Mean :146246
## 3rd Qu.:194.2 3rd Qu.:4.26865 3rd Qu.:4.7505 3rd Qu.:224999
## Max. :210.4 Max. :5.42793 Max. :6.6721 Max. :251101
## Inflation_Rate Dollar_Exchange_rate
## Min. :-1.139 Min. :3.058
## 1st Qu.: 1.417 1st Qu.:3.331
## Median : 2.027 Median :3.800
## Mean : 2.049 Mean :3.695
## 3rd Qu.: 2.975 3rd Qu.:4.033
## Max. : 5.441 Max. :4.298
# Check for missing data
missing_values <- colSums(is.na(malaysia_data))
# Display columns with missing data
cat("Columns with missing data:\n")
## Columns with missing data:
print(missing_values[missing_values > 0])
## named numeric(0)
# Extract the columns you want to scale
columns_to_scale <- c("Unemployment_rate", "GDP", "Trade", "FDI_inflows", "FDI_outflows", "net_migration","Inflation_Rate", "Dollar_Exchange_rate")
# Scale the selected columns
malaysia_data_scaled <- malaysia_data
malaysia_data_scaled[columns_to_scale] <- scale(malaysia_data[columns_to_scale])
# Plot unemployment trends
ggplot(malaysia_data_scaled, aes(x = Year, y = Unemployment_rate)) +
geom_line() +
labs(title = "Unemployment Trends Over Time",
x = "Year",
y = "Unemployment Rate")
# Select relevant columns for correlation analysis
correlation_data <- malaysia_data_scaled[, c("Unemployment_rate", "GDP", "Trade", "FDI_inflows", "FDI_outflows", "net_migration","Inflation_Rate", "Dollar_Exchange_rate")]
# Compute correlation matrix
correlation_matrix <- cor(correlation_data)
# Plot heatmap with larger dimensions
ggplot(data = as.data.frame(as.table(correlation_matrix)),
aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
labs(title = "Correlation Heatmap of Factors and Unemployment",
x = "Factors",
y = "Factors") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme_minimal() +
theme(legend.position = "right") +
theme(plot.title = element_text(hjust = 0.5))
# Select columns for analysis
selected_columns <- c("Unemployment_rate", "GDP", "Trade", "FDI_inflows", "FDI_outflows", "net_migration","Inflation_Rate", "Dollar_Exchange_rate")
# Plot pairs plot
ggpairs(malaysia_data_scaled, columns = selected_columns, title = "Pair Plot of Selected Variables")
# Correlation matrix
correlation_matrix <- cor(correlation_data)
# Extract correlations with "Unemployed"
unemployment_correlations <- correlation_matrix["Unemployment_rate", ]
# Display correlations
unemployment_correlations
## Unemployment_rate GDP Trade
## 1.0000000 -0.6512398 -0.1202328
## FDI_inflows FDI_outflows net_migration
## -0.1438791 -0.4456116 -0.1420961
## Inflation_Rate Dollar_Exchange_rate
## -0.3960882 0.5711119
# Select columns for analysis
factors_of_interest <- c("GDP", "Trade", "FDI_inflows", "FDI_outflows", "net_migration", "Inflation_Rate", "Dollar_Exchange_rate")
# Create separate plots for each factor
plots_list <- list()
for (factor in factors_of_interest) {
# Plot unemployment_rate against the current factor with year and two lines
plot <- ggplot(malaysia_data_scaled, aes(x = Year, y = !!rlang::sym(factor))) +
geom_line(aes(y = Unemployment_rate), color = "red", linetype = "solid") +
geom_line(color = "blue", linetype = "dashed") +
labs(title = paste("Unemployment Rate and", factor, "Over Time"),
x = "Year",
y = "Scaled Value") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "bottom") + # Optional: Adjust legend position
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "black"))
plots_list[[factor]] <- plot
}
# Display the list of plots
plots_list
## $GDP
##
## $Trade
##
## $FDI_inflows
##
## $FDI_outflows
##
## $net_migration
##
## $Inflation_Rate
##
## $Dollar_Exchange_rate