Prompt: [insert only the number of the prompt you chose to answer for this assignment; do not copy/paste the text of the prompt here]

ChatGPT/AI disclosure statement: [describe whether and how you used ChatGPT or other AI tools for this assignment. If you did not, please write “I did not use ChatGPT or other AI for this assignment.”]

1.Female Labor Force(% of female above15 years of age)

The median female labor force participation rate globally is 52.3%, with a staggering 77 percentage-point gap between Solomon Islands (highest) and Afghanistan (lowest). Women, on average, participate 20.3 percentage points less than men, with a median gap of 15.75 percentage points.

The distribution is heavily left-skewed, highlighting significant barriers women face in the labor market, calling for urgent corrective measure.

library(httr)
library(jsonlite)
library(dplyr)
library(janitor)
library(rnaturalearth)
library(sf)
library(dplyr)
library(classInt)
library(ggplot2)
library(viridis)
library(tidyverse)
library(kableExtra)
library(ggthemes)

##Codes for getting Labor force participation data

# Base URL and parameters for World Bank API
base_url_flfpr <- "https://api.worldbank.org/v2/country/all/indicator/SL.TLF.CACT.FE.ZS;SL.TLF.CACT.FE.NE.ZS;SL.TLF.CACT.MA.ZS;SL.TLF.CACT.MA.NE.ZS;SL.TLF.CACT.ZS;SL.TLF.CACT.NE.ZS;SL.TLF.ACTI.1524.FE.ZS;SL.TLF.ACTI.1524.FE.NE.ZS;SL.TLF.ACTI.1524.MA.ZS;SL.TLF.ACTI.1524.MA.NE.ZS;SL.TLF.ACTI.1524.ZS;SL.TLF.ACTI.1524.NE.ZS;SL.TLF.ACTI.FE.ZS;SL.TLF.ACTI.MA.ZS;SL.TLF.ACTI.ZS?source=14"
params_flfpr <- "&format=json&per_page=10000"

# Fetch the first page to determine total pages
first_page_flfpr  <- fromJSON(content(GET(paste0(base_url_flfpr, params_flfpr, "&page=1")), "text"))
total_pages_flfpr  <- first_page_flfpr[[1]]$pages

# Function to fetch and clean data
fetch_data_flfpr  <- function(page_flfpr ) {
  url_flfpr<- paste0(base_url_flfpr, params_flfpr, "&page=", page_flfpr)
  data_flfpr <- fromJSON(content(GET(url_flfpr ), "text"))[[2]]
  return(data_flfpr )
}

# Fetch all data and remove NA values from 'value' column
all_data_flfpr <- bind_rows(lapply(1:total_pages_flfpr , fetch_data_flfpr )) %>%
  filter(!is.na(value))  # Remove rows with NA in value column

# Clean column names by removing special characters and standardizing
cleaned_data_flfpr  <- all_data_flfpr  %>% 
  clean_names()  # janitor removes spaces, special characters, and ensures lowercase

flatten_data_flfpr  <- all_data_flfpr  %>%
  mutate(indicator_id = indicator$id,
         indicator_value = indicator$value,
         country_id = country$id,
         country_name = country$value) %>%
  select(-indicator, -country) %>%
  select(country_id, country_name, countryiso3code, date, value, unit, obs_status, decimal, indicator_id, indicator_value) %>%
  clean_names()  # Standardize column names using janitor 



map_flfpr<-flatten_data_flfpr%>%ungroup()%>%group_by(country_name)%>%
mutate(date=as.numeric(date))%>%filter(date==max(date))%>%ungroup()


map_flfpr<-map_flfpr%>%mutate(indicator_id = case_when(
    indicator_id == "SL.TLF.CACT.FE.ZS" ~ "Female",
    indicator_id=="SL.TLF.CACT.MA.ZS"~ "Male",
    TRUE~indicator_id))%>%filter(indicator_id%in%c("Male","Female"))

map_flfpr<-map_flfpr%>%select(country_name,value,indicator_id)

map_flfpr<-map_flfpr%>%pivot_wider(names_from =indicator_id,values_from = value)%>%
  mutate(across(where(is.numeric), ~ round(.x, 2)))


time_series<-flatten_data_flfpr%>%filter(country_name%in%c("United Kingdom",
    "United States","China","India"))%>%filter(date%in%c(1990:2023))


map_flfpr<-map_flfpr%>%mutate(Rank=dense_rank(desc(Female)))

top_20 <- map_flfpr%>% 
  arrange(desc(Female)) %>%
  slice_head(n = 20)%>%mutate(Cat="Top 20")

bottom_20 <- map_flfpr %>% 
  arrange(Female) %>%
  slice_head(n = 20)%>%mutate(Cat="Bottom 20")

# Merge the top 20 and bottom 20
merged_data <- bind_rows(top_20, bottom_20)

merged_data<-merged_data%>%select(country_name,Female,Cat,Rank)


difference<-map_flfpr%>%mutate(Difference=Female-Male)

difference<-difference%>%select(country_name,Difference)%>%
  mutate(Rank=dense_rank(desc(Difference)))

mean_value <- mean(difference$Difference)
median_value <- median(difference$Difference)

merged_data %>%
  kable("html", caption = "Female Labor Force Participation Rate", 
        col.names = c("Country", "Female (%)", "Category", "Rank")) %>%
  kable_styling("striped", full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Huge difference between top 1 and bottom most country" = 3)) %>%
  footnote(general = "Data source: World Bank", 
           general_title = "Twenty countries with highest and lowest female Labor Force Participation Rate (%)")
Female Labor Force Participation Rate
Huge difference between top 1 and bottom most country
Country Female (%) Category Rank
Solomon Islands 82.73 Top 20 1
Madagascar 82.58 Top 20 2
Burundi 78.78 Top 20 3
Mozambique 78.42 Top 20 4
Tanzania 77.14 Top 20 5
Ethiopia 74.75 Top 20 6
Angola 72.76 Top 20 7
Liberia 72.42 Top 20 8
Kenya 72.20 Top 20 9
Moldova 72.17 Top 20 10
Eritrea 71.37 Top 20 11
South Sudan 70.33 Top 20 12
Iceland 70.07 Top 20 13
Korea, Dem. People’s Rep.  69.65 Top 20 14
Cambodia 69.46 Top 20 15
Viet Nam 68.48 Top 20 16
Uganda 67.59 Top 20 17
New Zealand 67.57 Top 20 18
Azerbaijan 67.40 Top 20 19
Cameroon 67.29 Top 20 20
Afghanistan 4.83 Bottom 20 224
Yemen, Rep.  5.10 Bottom 20 223
Iraq 11.38 Bottom 20 222
Jordan 14.05 Bottom 20 221
Syrian Arab Republic 14.14 Bottom 20 220
Iran, Islamic Rep.  14.38 Bottom 20 219
Middle East & North Africa (excluding high income) 15.60 Bottom 20 218
Middle East & North Africa (IDA & IBRD) 15.60 Bottom 20 218
Egypt, Arab Rep.  16.45 Bottom 20 217
Algeria 16.81 Bottom 20 216
Djibouti 18.53 Bottom 20 215
West Bank and Gaza 18.89 Bottom 20 214
Middle East & North Africa 18.94 Bottom 20 213
Morocco 19.81 Bottom 20 212
Arab World 19.98 Bottom 20 211
Somalia 21.15 Bottom 20 210
Pakistan 24.46 Bottom 20 209
Mauritania 26.41 Bottom 20 208
Tunisia 26.85 Bottom 20 207
Lebanon 27.48 Bottom 20 206
Twenty countries with highest and lowest female Labor Force Participation Rate (%)
Data source: World Bank
ggplot(difference, aes(x = Difference)) +
  geom_density(fill = "lightblue", alpha = 0.5) +  # Density plot with fill
  geom_vline(aes(xintercept = mean_value), color = "red", linetype = "dashed", size = 1) +  # Mean line
  geom_vline(aes(xintercept = median_value), color = "blue", linetype = "dotted", size = 1) +  # Median line
  annotate("text", x = mean_value, y = 0.02, label = paste("Mean =", round(mean_value, 2)), color = "red", angle = 90, vjust = -0.5) +  # Mean annotation
  annotate("text", x = median_value, y = 0.02, label = paste("Median =", round(median_value, 2)), color = "blue", angle = 90, vjust = 1.5) +  # Median annotation
  labs(title = "Density distribution of the difference of Female and Male LFPR",
       subtitle = "Difference is in percentage points",
       x = "Value",
       y = "Density",
       caption = "Data Source:World Bank") +
  theme_stata()

2. [Insert a section title here]

[The text and code for this section goes here.]

3. [Insert a section title here]

[The text and code for this section goes here.]

4. [Insert a section title here]

[The text and code for this section goes here.]

5. [Insert a section title here]

[The text and code for this section goes here.]

6. [Insert a section title here]

[The text and code for this section goes here.]