Introduction

This report analyzes H1B visa application trends, focusing on gender differences, total applicant trends, and geographic distribution across U.S. states.

Load and Prepare Data

country <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_Country Codes.csv") %>%
  clean_names()

job <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_I-129 H1B Job Codes.csv") %>%
  clean_names()

trk <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_TRK_13139_FY2021.csv") %>%
  clean_names()

Graph 2: Geographic Distribution of Beneficiaries

world_map <- map_data("world")
us_states <- st_read("/Users/yii/Desktop/H1bTableau/us-state-boundaries/us-state-boundaries.shp")
## Reading layer `us-state-boundaries' from data source 
##   `/Users/yii/Desktop/H1bTableau/us-state-boundaries/us-state-boundaries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.44069
## Geodetic CRS:  WGS 84
state_table <- read.csv("/Users/yii/Desktop/H1bTableau/state_abb.csv")

state_abb <- state_table %>%
  filter(Status.of.region %in% c("State","Federal district",
                                 "Insular area (Commonwealth)",
                                 "Insular area (Territory)",
                                 "US military mail code")) %>%
  mutate(pet_state = ANSI, name = Name) %>%
  select(pet_state, name)

trk_map <- trk %>%
  filter(gender %in% c("female", "male")) %>%
  filter(pet_state != "") %>%
  group_by(pet_state) %>%
  summarise(number_of_beneficiaries_sum = sum(number_of_beneficiaries, na.rm = TRUE), .groups = "drop")

merged_map <- trk_map %>%
  left_join(state_abb, by = "pet_state") %>%
  left_join(us_states, by = "name") %>%
  drop_na() %>%
  st_as_sf() %>%
  mutate(centroid = st_centroid(geometry))

p3 <- ggplot() +
  geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "lightgray", color = "white") +
  geom_sf(data = merged_map, aes(fill = number_of_beneficiaries_sum), color = "lightgray") +
  scale_fill_gradient(low = "#bfa554", high = "#994926") +
  labs(title = "H1B Applications in Different States", fill = "Number of Beneficiaries") +
  geom_text(aes(x = st_coordinates(merged_map$centroid)[,1],
                y = st_coordinates(merged_map$centroid)[,2],
                label = merged_map$pet_state), size = 3, color = "white") +
  coord_sf(xlim = c(-170, -65), ylim = c(20, 70)) +
  theme_void(base_family = "Arial") +
  theme(
    plot.background = element_rect(fill = "black", color = NA),
    panel.background = element_rect(fill = "black", color = NA),
    plot.title = element_text(size = 16, color = "white", face = "bold"),
    axis.ticks = element_blank(), 
    axis.line = element_blank(),  
    legend.position = "right",
    legend.title = element_text(size = 10, face = "bold", color = "white"),
    legend.text = element_text(size = 8, color = "white"),
    legend.key.size = unit(0.2, "cm"),
    legend.key.width = unit(0.5, "cm"),
    legend.key.height = unit(0.5, "cm"),
    legend.spacing.y = unit(0.1, "cm")
  ) +
  theme(
    plot.background = element_rect(fill = "black"),
    panel.background = element_rect(fill = "black"),
    plot.title = element_text(size = 16, color = "white", face = "bold"),
    legend.position = "right",
    legend.title = element_text(size = 10, color = "white", face = "bold"),
    legend.text = element_text(size = 8, color = "white"),
    legend.key.size = unit(0.2, "cm"),
    legend.key.width = unit(0.5, "cm"),
    legend.key.height = unit(0.5, "cm")
  )

ggplotly(p3)

Conclusion

This analysis provides insights into H1B application trends by gender, application volume, and geographic distribution across the United States. Future work could include analysis by occupation or salary levels.