testing final project

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readr)
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(broom)
library(magrittr)

Attaching package: 'magrittr'

The following object is masked from 'package:purrr':

    set_names

The following object is masked from 'package:tidyr':

    extract
# Load data set
data <- readr::read_csv("/Users/jasonlaucel/Data 110 Folder/2023_0824_plcy_state_immigration_data_sheets_fy2022/2023_0824_plcy_state_immigration_data_sheets_fy2022 2/All States Totals 2022-Table 1 7.csv")
New names:
Rows: 52 Columns: 16
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(1): States dbl (15): Population, Lawful Permanent Residents, Lawful Permanent
Residents...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Rank` -> `Rank...6`
• `Rank` -> `Rank...8`
head(data)
# A tibble: 6 × 16
  States Population Lawful Permanent Res…¹ Lawful Permanent Res…² `New Arrivals`
  <chr>       <dbl>                  <dbl>                  <dbl>          <dbl>
1 Alaba…    5074296                   3747                     34           1655
2 Alaska     733583                    673                     49            386
3 Arizo…    7359197                  16984                     15           7156
4 Arkan…    3045637                   3391                     35           1419
5 Calif…   39029342                 182921                      1          78493
6 Color…    5839926                  11410                     17           5266
# ℹ abbreviated names: ¹​`Lawful Permanent Residents`,
#   ²​`Lawful Permanent Residents Rank`
# ℹ 11 more variables: Rank...6 <dbl>, Adjustments <dbl>, Rank...8 <dbl>,
#   Nonimmigrants <dbl>, `Nonimmigrants Rank` <dbl>, Naturalizations <dbl>,
#   `Naturalizations Rank` <dbl>, Refugees <dbl>, `Refugees Rank` <dbl>,
#   Asylees <dbl>, `Asylee Rank` <dbl>
library(dplyr)

# Three Dplyr commands
data <- data %>%
  filter(Population > 5000000) %>%  # Filter for states w/ population > 5 million
  group_by(States) %>%  # Group by State 
  summarise(mean_Naturalizations = mean(Naturalizations))  # Calculate Mean Naturalizations by state

# Summarize the model
# Load ggplot2 
library(ggplot2)


data$State_ <- gsub(" ", "\n", data$States)


# I randomly chose my own custom color palette

my_colors <- c("#FF5733", "#7FFF00", "#33FFB5", "#FF3386", "#336BFF", "#FFD700", "#33FF33", "#FF00FF", "#FF5733", "#00FFFF", "#8A2BE2", "#FF4500", "#00FF7F", "#ADFF2F", "#008080", "#FFD700", "#FF6347", "#BA55D3", "#4169E1", "#00FA9A", "#20B2AA", "#FFA500", "#800080", "#FFC0CB", "#2E8B57", "#FF00FF", "#4682B4", "#9400D3", "#FFA07A", "#FF69B4", "#8B008B", "#4B0082", "#7FFFD4", "#000080", "#FF6347", "#ADFF2F", "#00BFFF", "#FF7F50", "#6495ED", "#FF00FF", "#40E0D0", "#FFD700", "#00FF7F", "#BDB76B", "#20B2AA", "#B22222", "#FF4500", "#B0C4DE", "#008080", "#DAA520", "#FF6347", "#0000FF", "#FF00FF")

# ggplot with custom palette 
p <- ggplot(data, aes(x = States, y = mean_Naturalizations, fill = States)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = my_colors) +  # Use custom color palette
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=3)) +
  labs(x = "State", y = "Naturalizations to Population") +  # Label axes
  ggtitle("Naturalizations to Population by State")


# interactivity 
p <- ggplotly(p)

plotly::ggplotly(p)