This report analyzes H1B visa application trends, focusing on gender differences, total applicant trends, and geographic distribution across U.S. states.
country <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_Country Codes.csv") %>%
clean_names()
job <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_I-129 H1B Job Codes.csv") %>%
clean_names()
trk <- read.csv("/Users/yii/Desktop/H1bTableau/FromTableau/TRK_13139_FY2021.csv++ (Multiple Connections)_TRK_13139_FY2021.csv") %>%
clean_names()
p1 <- trk %>%
filter(gender %in% c("female", "male")) %>%
group_by(lottery_year, gender) %>%
summarise(num_of_emp_in_us_sum = sum(num_of_emp_in_us, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = lottery_year, y = num_of_emp_in_us_sum, color = gender)) +
geom_line(linewidth = 1) +
geom_point(size = 3, shape = 21, fill = "white", stroke = 1) +
labs(title = "Trends in Applicants of Different Genders",
x = "Year of Lottery",
y = "Number of Employees in US") +
theme_classic(base_family = "Arial") +
theme(
plot.background = element_rect(fill = "black"),
panel.background = element_rect(fill = "black"),
text = element_text(color = "white"),
axis.text = element_text(color = "white"),
axis.title = element_text(color = "white"),
plot.title = element_text(color = "white", size = 16, face = "bold"),
legend.background = element_rect(fill = "black"),
legend.text = element_text(color = "white"),
legend.title = element_text(color = "white")
)
ggplotly(p1)
world_map <- map_data("world")
us_states <- st_read("/Users/yii/Desktop/H1bTableau/us-state-boundaries/us-state-boundaries.shp")
## Reading layer `us-state-boundaries' from data source
## `/Users/yii/Desktop/H1bTableau/us-state-boundaries/us-state-boundaries.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 20 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.44069
## Geodetic CRS: WGS 84
state_table <- read.csv("/Users/yii/Desktop/H1bTableau/state_abb.csv")
state_abb <- state_table %>%
filter(Status.of.region %in% c("State","Federal district",
"Insular area (Commonwealth)",
"Insular area (Territory)",
"US military mail code")) %>%
mutate(pet_state = ANSI, name = Name) %>%
select(pet_state, name)
trk_map <- trk %>%
filter(gender %in% c("female", "male")) %>%
filter(pet_state != "") %>%
group_by(pet_state) %>%
summarise(number_of_beneficiaries_sum = sum(number_of_beneficiaries, na.rm = TRUE), .groups = "drop")
merged_map <- trk_map %>%
left_join(state_abb, by = "pet_state") %>%
left_join(us_states, by = "name") %>%
drop_na() %>%
st_as_sf() %>%
mutate(centroid = st_centroid(geometry))
p3 <- ggplot() +
geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "lightgray", color = "white") +
geom_sf(data = merged_map, aes(fill = number_of_beneficiaries_sum), color = "lightgray") +
scale_fill_gradient(low = "#bfa554", high = "#994926") +
labs(title = "H1B Applications in Different States", fill = "Number of Beneficiaries") +
geom_text(aes(x = st_coordinates(merged_map$centroid)[,1],
y = st_coordinates(merged_map$centroid)[,2],
label = merged_map$pet_state), size = 3, color = "white") +
coord_sf(xlim = c(-170, -65), ylim = c(20, 70)) +
theme_void(base_family = "Arial") +
theme(
plot.background = element_rect(fill = "black", color = NA),
panel.background = element_rect(fill = "black", color = NA),
plot.title = element_text(size = 16, color = "white", face = "bold"),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.position = "right",
legend.title = element_text(size = 10, face = "bold", color = "white"),
legend.text = element_text(size = 8, color = "white"),
legend.key.size = unit(0.2, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.key.height = unit(0.5, "cm"),
legend.spacing.y = unit(0.1, "cm")
) +
theme(
plot.background = element_rect(fill = "black"),
panel.background = element_rect(fill = "black"),
plot.title = element_text(size = 16, color = "white", face = "bold"),
legend.position = "right",
legend.title = element_text(size = 10, color = "white", face = "bold"),
legend.text = element_text(size = 8, color = "white"),
legend.key.size = unit(0.2, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.key.height = unit(0.5, "cm")
)
ggplotly(p3)
This analysis provides insights into H1B application trends by gender, application volume, and geographic distribution across the United States. Future work could include analysis by occupation or salary levels.