library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
fifa_Untidy<-read.csv("https://raw.githubusercontent.com/NikoletaEm/607LABS/main/Project1/fifa21_raw_data.csv")
head(fifa_Untidy)
## photoUrl LongName
## 1 https://cdn.sofifa.com/players/158/023/21_60.png Lionel Messi
## 2 https://cdn.sofifa.com/players/020/801/21_60.png C. Ronaldo dos Santos Aveiro
## 3 https://cdn.sofifa.com/players/200/389/21_60.png Jan Oblak
## 4 https://cdn.sofifa.com/players/192/985/21_60.png Kevin De Bruyne
## 5 https://cdn.sofifa.com/players/190/871/21_60.png Neymar da Silva Santos Jr.
## 6 https://cdn.sofifa.com/players/188/545/21_60.png Robert Lewandowski
## playerUrl
## 1 http://sofifa.com/player/158023/lionel-messi/210005/
## 2 http://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/210005/
## 3 http://sofifa.com/player/200389/jan-oblak/210005/
## 4 http://sofifa.com/player/192985/kevin-de-bruyne/210005/
## 5 http://sofifa.com/player/190871/neymar-da-silva-santos-jr/210005/
## 6 http://sofifa.com/player/188545/robert-lewandowski/210005/
## Nationality Positions Name Age X.OVA POT
## 1 Argentina RW ST CF L. Messi 33 93 93
## 2 Portugal ST LW Cristiano Ronaldo 35 92 92
## 3 Slovenia GK J. Oblak 27 91 93
## 4 Belgium CAM CM K. De Bruyne 29 91 91
## 5 Brazil LW CAM Neymar Jr 28 91 91
## 6 Poland ST R. Lewandowski 31 91 91
## Team...Contract ID Height Weight foot BOV
## 1 \n\n\n\nFC Barcelona\n2004 ~ 2021\n\n 158023 5'7" 159lbs Left 93
## 2 \n\n\n\nJuventus\n2018 ~ 2022\n\n 20801 6'2" 183lbs Right 92
## 3 \n\n\n\nAtlético Madrid\n2014 ~ 2023\n\n 200389 6'2" 192lbs Right 91
## 4 \n\n\n\nManchester City\n2015 ~ 2023\n\n 192985 5'11" 154lbs Right 91
## 5 \n\n\n\nParis Saint-Germain\n2017 ~ 2022\n\n 190871 5'9" 150lbs Right 91
## 6 \n\n\n\nFC Bayern München\n2014 ~ 2023\n\n 188545 6'0" 176lbs Right 91
## BP Growth Joined Loan.Date.End Value Wage Release.Clause Attacking
## 1 RW 0 Jul 1, 2004 N/A €67.5M €560K €138.4M 429
## 2 ST 0 Jul 10, 2018 N/A €46M €220K €75.9M 437
## 3 GK 2 Jul 16, 2014 N/A €75M €125K €159.4M 95
## 4 CAM 0 Aug 30, 2015 N/A €87M €370K €161M 407
## 5 LW 0 Aug 3, 2017 N/A €90M €270K €166.5M 408
## 6 ST 0 Jul 1, 2014 N/A €80M €240K €132M 423
## Crossing Finishing Heading.Accuracy Short.Passing Volleys Skill Dribbling
## 1 85 95 70 91 88 470 96
## 2 84 95 90 82 86 414 88
## 3 13 11 15 43 13 109 12
## 4 94 82 55 94 82 441 88
## 5 85 87 62 87 87 448 95
## 6 71 94 85 84 89 407 85
## Curve FK.Accuracy Long.Passing Ball.Control Movement Acceleration
## 1 93 94 91 96 451 91
## 2 81 76 77 92 431 87
## 3 13 14 40 30 307 43
## 4 85 83 93 92 398 77
## 5 88 89 81 95 453 94
## 6 79 85 70 88 407 77
## Sprint.Speed Agility Reactions Balance Power Shot.Power Jumping Stamina
## 1 80 91 94 95 389 86 68 72
## 2 91 87 95 71 444 94 95 84
## 3 60 67 88 49 268 59 78 41
## 4 76 78 91 76 408 91 63 89
## 5 89 96 91 83 357 80 62 81
## 6 78 77 93 82 420 89 84 76
## Strength Long.Shots Mentality Aggression Interceptions Positioning Vision
## 1 69 94 347 44 40 93 95
## 2 78 93 353 63 29 95 82
## 3 78 12 140 34 19 11 65
## 4 74 91 408 76 66 88 94
## 5 50 84 356 51 36 87 90
## 6 86 85 391 81 49 94 79
## Penalties Composure Defending Marking Standing.Tackle Sliding.Tackle
## 1 75 96 91 32 35 24
## 2 84 95 84 28 32 24
## 3 11 68 57 27 12 18
## 4 84 91 186 68 65 53
## 5 92 93 94 35 30 29
## 6 88 88 96 35 42 19
## Goalkeeping GK.Diving GK.Handling GK.Kicking GK.Positioning GK.Reflexes
## 1 54 6 11 15 14 8
## 2 58 7 11 15 14 11
## 3 437 87 92 78 90 90
## 4 56 15 13 5 10 13
## 5 59 9 9 15 15 11
## 6 51 15 6 12 8 10
## Total.Stats Base.Stats W.F SM A.W D.W IR PAC SHO PAS DRI DEF PHY Hits
## 1 2231 466 4 ★ 4★ Medium Low 5 ★ 85 92 91 95 38 65 \n372
## 2 2221 464 4 ★ 5★ High Low 5 ★ 89 93 81 89 35 77 \n344
## 3 1413 489 3 ★ 1★ Medium Medium 3 ★ 87 92 78 90 52 90 \n86
## 4 2304 485 5 ★ 4★ High High 4 ★ 76 86 93 88 64 78 \n163
## 5 2175 451 5 ★ 5★ High Medium 5 ★ 91 85 86 94 36 59 \n273
## 6 2195 457 4 ★ 4★ High Medium 4 ★ 78 91 78 85 43 82 \n182
fifa_Untidy <- fifa_Untidy %>% rename(Full_Name=LongName,
Team=Team...Contract,
Foot=foot,
Loan_Date_End=Loan.Date.End,
Release_Clause=Release.Clause,
Heading_Accuracy=Heading.Accuracy,
Short_Passing=Short.Passing,
FK_Accuracy=FK.Accuracy,
Long_Passing=Long.Passing,
Ball_Control=Ball.Control,
Sprint_Speed=Sprint.Speed,
Shot_Power=Shot.Power,
Long_Shots=Long.Shots,
Standing_Tackle=Standing.Tackle,
Sliding_Tackle=Sliding.Tackle,
GK_Diving=GK.Diving,
GK_Handling=GK.Handling)
Columns_to_remove<- c('photoUrl',"playerUrl")
fifa_Untidy <- fifa_Untidy %>% select(-any_of(Columns_to_remove))
selected_Data <- subset(fifa_Untidy, select = c("Full_Name", "Nationality","Team","Attacking","Crossing","Short_Passing","Dribbling","Long_Passing","Shot_Power","Penalties","Foot","Value"))
selected_Data <- selected_Data %>%
pivot_longer(cols=!c("Full_Name","Nationality","Team","Foot","Value"),
names_to = "Gameplay_stats",
values_to="Count" )
dribbling_data <- selected_Data[selected_Data$Gameplay_stats == "Dribbling", ]
index_max_dribbling <- which.max(dribbling_data$Count)
player_max_dribbling <- dribbling_data[index_max_dribbling, ]
print(player_max_dribbling)
## # A tibble: 1 × 7
## Full_Name Nationality Team Foot Value Gameplay_stats Count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Lionel Messi Argentina "\n\n\n\nFC Barcelo… Left €67.… Dribbling 96
penalties_data <- selected_Data[selected_Data$Gameplay_stats == "Penalties", ]
max_penalty_index <- which.max(penalties_data$Count)
player_max_penalty <- penalties_data[max_penalty_index, ]
print(player_max_penalty)
## # A tibble: 1 × 7
## Full_Name Nationality Team Foot Value Gameplay_stats Count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Neymar da Silva Santos Jr. Brazil "\n\n… Right €90M Penalties 92
shot_power_data <- selected_Data[selected_Data$Gameplay_stats == "Shot_Power", ]
max_shot_power_index <- which.max(shot_power_data$Count)
player_max_shot_power <- shot_power_data[max_shot_power_index, ]
print(player_max_shot_power)
## # A tibble: 1 × 7
## Full_Name Nationality Team Foot Value Gameplay_stats Count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Aleksandar Kolarov Serbia "\n\n\n\nInte… Left €8M Shot_Power 95
passing_data <- selected_Data %>%
filter(Gameplay_stats %in% c("Short_Passing", "Long_Passing"))
total_passing <- passing_data %>%
group_by(Full_Name, Nationality, Team) %>%
summarise(Total_Passing = sum(Count)) %>%
arrange(desc(Total_Passing))
## `summarise()` has grouped output by 'Full_Name', 'Nationality'. You can
## override using the `.groups` argument.
player_max_total_passing <- total_passing[1, ]
print(player_max_total_passing)
## # A tibble: 1 × 4
## # Groups: Full_Name, Nationality [1]
## Full_Name Nationality Team Total_Passing
## <chr> <chr> <chr> <int>
## 1 Kevin Berlaso Ecuador "\n Ecuador\nFree\n\n" 282
crossing_data <- selected_Data[selected_Data$Gameplay_stats == "Crossing", ]
max_crossing_count <- max(crossing_data$Count)
max_crossing_player <- crossing_data[crossing_data$Count == max_crossing_count, ]
print(max_crossing_player)
## # A tibble: 1 × 7
## Full_Name Nationality Team Foot Value Gameplay_stats Count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Kevin De Bruyne Belgium "\n\n\n\nManches… Right €87M Crossing 94
attacking_data <- selected_Data[selected_Data$Gameplay_stats == "Attacking", ]
# Find the row with the maximum attacking count
max_attacking_row <- attacking_data[which.max(attacking_data$Count), ]
# Print the player's name with the maximum attacking count
print(max_attacking_row)
## # A tibble: 1 × 7
## Full_Name Nationality Team Foot Value Gameplay_stats Count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 C. Ronaldo dos Santos Avei… Portugal "\n\… Right €46M Attacking 437
percentage <- prop.table(table(selected_Data$Foot)) * 100
foot_distribution <- data.frame(
Foot = names(percentage),
Count = as.numeric(table(selected_Data$Foot)),
Percentage = percentage
)
ggplot(foot_distribution, aes(x = Foot, y = Count)) +
geom_bar(stat = "identity", fill = "orange", color = "black") +
geom_text(aes(label = paste0(sprintf("%.1f", percentage), "%")),
vjust = -0.5, size = 4, color = "black") +
labs(title = "Distribution of Foot Attribute", x = "Foot", y = "Count")
greece_players <- selected_Data[selected_Data$Nationality == "Greece", ]
# Create a bar plot of wages of players from Greece
ggplot(greece_players, aes(x = factor(Value))) +
geom_bar(fill = "lightblue", color = "black") +
labs(title = "Distribution of Values for Players from Greece", x = "Value", y = "Frequency") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
value_frequency <- table(greece_players$Value)
max_frequency_value <- names(which.max(value_frequency))
max_frequency_count <- max(value_frequency)
cat("Value category with the highest frequency:", max_frequency_value, "\n")
## Value category with the highest frequency: €6.5M
cat("Count:", max_frequency_count, "\n")
## Count: 42
library(tidyr)
library(dplyr)
library(ggplot2)
library(readr)
Untidy_Covid19<- read.csv("https://raw.githubusercontent.com/NikoletaEm/607LABS/main/Rates_of_COVID-19_Cases_or_Deaths_by_Age_Group_and_Updated__Bivalent__Booster_Status_20240225.csv")
Tidy_Covid19 <- Untidy_Covid19 %>%
mutate(unvaccinated_population = as.character(unvaccinated_population)) %>%
pivot_longer(
cols = c("age_group", "unvaccinated_population","month"),
names_to = "Covid19_stats",
values_to = "Counts"
)
Tidy_Covid19<- Tidy_Covid19 %>%
select("outcome","mmwr_week","Covid19_stats","Counts","unvaccinated_with_outcome","vaccinated_with_outcome")
outcome_counts <- Tidy_Covid19 %>%
group_by(outcome) %>%
summarise(Count = n())
Outcome_counts <- as.data.frame(outcome_counts)
colnames(Outcome_counts) <- c("Outcome", "Count")
Outcome_counts$Percentage <- (Outcome_counts$Count / sum(Outcome_counts$Count)) * 100
ggplot(Outcome_counts, aes(x = Outcome, y = Count, fill = Outcome)) +
geom_bar(stat = "identity",fill="darkred") +
geom_text(aes(label = paste0(round(Percentage, 2), "%")),
position = position_stack(vjust = 0.5),
color = "black", size = 4) +
labs(x = "Outcome", y = "Count", title = "Counts of Cases and Deaths") +
theme_minimal()
cases_data <- Tidy_Covid19 %>%
filter(outcome == "case")
cases_data$date <- as.Date(paste(substr(cases_data$mmwr_week, 1, 4), "-W", substr(cases_data$mmwr_week, 5, 6), "-1", sep = ""), format = "%Y-W%U-%u")
# Create a time series plot
ggplot(cases_data, aes(x = date, y = unvaccinated_with_outcome)) +
geom_line() +
labs(x = "Date", y = "Cases", title = "Time Series of COVID-19 Cases (Unvaccinated)") +
theme_minimal()
Tidy_Covid19 <- Tidy_Covid19 %>%
mutate(year = substr(mmwr_week, 1, 4))
cases_unvaccinated <- Tidy_Covid19 %>%
filter(outcome == "case" & unvaccinated_with_outcome > 0) %>%
group_by(year) %>%
summarise(total_cases = sum(unvaccinated_with_outcome))
max_cases_year_data <- cases_unvaccinated %>%
filter(total_cases == max(total_cases))
max_cases_year <- max_cases_year_data %>%
pull(year)
max_cases_number <- max_cases_year_data %>%
pull(total_cases)
print(paste("Year with the highest number of cases:", max_cases_year))
## [1] "Year with the highest number of cases: 2022"
print(paste("Number of cases in the highest year:", max_cases_number))
## [1] "Number of cases in the highest year: 77598648"
deaths_data <- Tidy_Covid19 %>%
filter(outcome == "death")
deaths_data$date <- as.Date(paste(substr(deaths_data$mmwr_week, 1, 4), "-W", substr(deaths_data$mmwr_week, 5, 6), "-1", sep = ""), format = "%Y-W%U-%u")
ggplot(deaths_data, aes(x = date, y = unvaccinated_with_outcome)) +
geom_line() +
labs(x = "Date", y = "Deaths", title = "Time Series of COVID-19 Deaths (Unvaccinated)") +
theme_minimal()
deaths_unvaccinated <- Tidy_Covid19 %>%
filter(outcome == "death" & unvaccinated_with_outcome > 0) %>%
group_by(year) %>%
summarise(total_deaths = sum(unvaccinated_with_outcome))
max_deaths_year_data <- deaths_unvaccinated %>%
filter(total_deaths == max(total_deaths))
max_deaths_year <- max_deaths_year_data %>%
pull(year)
max_deaths_number <- max_deaths_year_data %>%
pull(total_deaths)
# Print the results
print(paste("Year with the highest number of deaths:", max_deaths_year))
## [1] "Year with the highest number of deaths: 2022"
print(paste("Number of deaths in the highest year:", max_deaths_number))
## [1] "Number of deaths in the highest year: 500034"
vaccinated_data <- Tidy_Covid19 %>%
filter(outcome == "case" & !is.na(vaccinated_with_outcome))
vaccinated_data <- vaccinated_data %>%
group_by(mmwr_week) %>%
summarise(mean_cases = mean(vaccinated_with_outcome, na.rm = TRUE))
ggplot(vaccinated_data, aes(x = mmwr_week, y = mean_cases)) +
geom_line(color = "green") +
geom_smooth(method = "loess", color = "orange") +
labs(x = "Week", y = "Mean Cases", title = "Time Series of COVID-19 Cases (Vaccinated)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
cases_vaccinated <- Tidy_Covid19 %>%
filter(outcome == "case" & vaccinated_with_outcome > 0)
cases_vaccinated <- cases_vaccinated %>%
mutate(year = substr(mmwr_week, 1, 4))
cases_vaccinated_summary <- cases_vaccinated %>%
group_by(year) %>%
summarise(total_cases = sum(vaccinated_with_outcome))
max_cases_vaccinated_year <- cases_vaccinated_summary %>%
filter(total_cases == max(total_cases)) %>%
pull(year)
max_cases_vaccinated_number <- max(cases_vaccinated_summary$total_cases)
print(paste("Year with the highest number of cases (vaccinated):", max_cases_vaccinated_year))
## [1] "Year with the highest number of cases (vaccinated): 2022"
print(paste("Number of cases in the highest year (vaccinated):", max_cases_vaccinated_number))
## [1] "Number of cases in the highest year (vaccinated): 52375464"
deaths_vaccinated_data <- Tidy_Covid19 %>%
filter(outcome == "death" & !is.na(vaccinated_with_outcome))
deaths_vaccinated_data <- deaths_vaccinated_data %>%
group_by(mmwr_week) %>%
summarise(mean_deaths = mean(vaccinated_with_outcome, na.rm = TRUE))
ggplot(deaths_vaccinated_data, aes(x = mmwr_week, y = mean_deaths)) +
geom_line(color = "red") +
geom_smooth(method = "loess", color = "blue") +
labs(x = "Week", y = "Mean Deaths", title = "Time Series of COVID-19 Deaths (Vaccinated)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
deaths_vaccinated <- Tidy_Covid19 %>%
filter(outcome == "death" & vaccinated_with_outcome > 0) %>%
group_by(year) %>%
summarise(total_deaths = sum(vaccinated_with_outcome))
max_deaths_vaccinated_year_data <- deaths_vaccinated %>%
filter(total_deaths == max(total_deaths))
max_deaths_vaccinated_year <- max_deaths_vaccinated_year_data %>%
pull(year)
max_deaths_vaccinated_number <- max_deaths_vaccinated_year_data %>%
pull(total_deaths)
print(paste("Year with the highest number of deaths in vaccinated individuals:", max_deaths_vaccinated_year))
## [1] "Year with the highest number of deaths in vaccinated individuals: 2022"
print(paste("Number of deaths in the highest year in vaccinated individuals:", max_deaths_vaccinated_number))
## [1] "Number of deaths in the highest year in vaccinated individuals: 246813"
marriage_messy <- read.csv("https://raw.githubusercontent.com/NikoletaEm/607LABS/main/Project1/marriage_2022.csv.csv")
marriage_messy <- marriage_messy %>%
rename("Demographics"="Label..Grouping.",
"Total"="United.States..Total..Estimate",
"Total_margin.of.error"="United.States..Total..Margin.of.Error",
"Now_Married"="United.States..Now.married..except.separated...Estimate",
"Now_Married_margin.of.error"="United.States..Now.married..except.separated...Margin.of.Error",
"Widowed"="United.States..Widowed..Estimate",
"Widowed_margin.of.error"='United.States..Widowed..Margin.of.Error',
"Divorced"='United.States..Divorced..Estimate',
"Divorced_margin.of.error"='United.States..Divorced..Margin.of.Error',
"Seperated"= "United.States..Separated..Estimate",
"Seperated_margin.of.error"="United.States..Separated..Margin.of.Error",
"Never_married"="United.States..Never.married..Estimate",
"Never_married_margin.of.error"="United.States..Never.married..Margin.of.Error")
males_data <- marriage_messy[4:9, ]
females_data<-marriage_messy[11:16, ]
race_data <- marriage_messy[19:28, ]
laborforce_data <- marriage_messy[30:33, ]
males_data_long <- pivot_longer(males_data, cols = c(Now_Married,Widowed, Divorced, Seperated, Never_married),
names_to = "Marital_Status", values_to = "Percentage")
females_data_long <- pivot_longer(females_data,cols = c(Now_Married,Widowed, Divorced, Seperated, Never_married),
names_to = "Marital_Status", values_to = "Percentage")
race_data_long <- pivot_longer(race_data, cols = c(Now_Married,Widowed, Divorced, Seperated, Never_married),
names_to = "Marital_Status", values_to = "Percentage")
laborforce_data_long <- pivot_longer(laborforce_data, cols = c(Now_Married,Widowed, Divorced, Seperated, Never_married),
names_to = "Marital_Status", values_to = "Percentage")
## Males data
males_data_long$Percentage <- as.numeric(gsub("%", "", males_data_long$Percentage))
Percentages_male <- males_data_long %>%
group_by(Demographics, Marital_Status) %>%
summarize(Percentages_male = max(Percentage, na.rm = TRUE), .groups = 'drop')
Males_percentage_rows <- Percentages_male %>%
group_by(Demographics) %>%
filter(Percentages_male == max(Percentages_male ))
print(Males_percentage_rows)
## # A tibble: 6 × 3
## # Groups: Demographics [6]
## Demographics Marital_Status Percentages_male
## <chr> <chr> <dbl>
## 1 15 to 19 years Never_married 98.9
## 2 20 to 34 years Never_married 71.3
## 3 35 to 44 years Now_Married 60
## 4 45 to 54 years Now_Married 66
## 5 55 to 64 years Now_Married 65.9
## 6 65 years and over Now_Married 68.5
## Plot
ggplot(males_data_long, aes(x = Demographics, y = Percentage, fill = Marital_Status)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Marital Status Among Males by Age Group",
x = "Age Group",
y = "Percentage") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("Now_Married"="purple","Widowed" = "blue", "Divorced" = "red",
"Seperated" = "green", "Never_married" = "orange"),name = "Marital Status")
## Females data
females_data_long$Percentage <- as.numeric(gsub("%", "", females_data_long$Percentage))
Percentages_female <- females_data_long %>%
group_by(Demographics, Marital_Status) %>%
summarize(Percentages_female = max(Percentage, na.rm = TRUE), .groups = 'drop')
females_percentage_rows <- Percentages_female %>%
group_by(Demographics) %>%
filter(Percentages_female == max(Percentages_female))
print(females_percentage_rows)
## # A tibble: 6 × 3
## # Groups: Demographics [6]
## Demographics Marital_Status Percentages_female
## <chr> <chr> <dbl>
## 1 15 to 19 years Never_married 98.7
## 2 20 to 34 years Never_married 63.5
## 3 35 to 44 years Now_Married 61.4
## 4 45 to 54 years Now_Married 63
## 5 55 to 64 years Now_Married 60.2
## 6 65 years and over Now_Married 46.1
## Plot
ggplot(females_data_long, aes(x = Demographics, y = Percentage, fill = Marital_Status)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Marital Status Among Females by Age Group",
x = "Age Group",
y = "Percentage") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("Now_Married"="lightblue","Widowed" = "black", "Divorced" = "maroon",
"Seperated" = "darkgreen", "Never_married" = "darkred"),name = "Marital Status")
race_data_long$Percentage <- as.numeric(gsub("%", "", race_data_long$Percentage))
Percentages_race <- race_data_long %>%
group_by(Demographics, Marital_Status) %>%
summarize(Percentages_race = max(Percentage, na.rm = TRUE), .groups = 'drop')
race_percentage_rows <- Percentages_race %>%
group_by(Demographics) %>%
filter(Percentages_race == max(Percentages_race))
print(race_percentage_rows)
## # A tibble: 11 × 3
## # Groups: Demographics [10]
## Demographics Marital_Status Percentages_race
## <chr> <chr> <dbl>
## 1 Hispanic or Latino origin (of any ra… Never_married 43.2
## 2 One race Now_Married 48.7
## 3 Two or more races Never_married 42.4
## 4 Two or more races Now_Married 42.4
## 5 White alone, not Hispanic or Latino Now_Married 52.6
## 6 American Indian and Alaska Native Never_married 45.3
## 7 Asian Now_Married 58.2
## 8 Black or African American Never_married 49.8
## 9 Native Hawaiian and Other Pacifi… Now_Married 43.2
## 10 Some other race Never_married 43.4
## 11 White Now_Married 52.1
laborforce_data_long$Percentage <- as.numeric(gsub("%", "", laborforce_data_long$Percentage))
Percentages_laborforce <- laborforce_data_long %>%
group_by(Demographics, Marital_Status) %>%
summarize(Percentages_laborforce = max(Percentage, na.rm = TRUE), .groups = 'drop')
laborforce_percentage_rows <- Percentages_laborforce %>%
group_by(Demographics) %>%
filter(Percentages_laborforce == max(Percentages_laborforce))
print(laborforce_percentage_rows)
## # A tibble: 3 × 3
## # Groups: Demographics [3]
## Demographics Marital_Status Percentages_laborforce
## <chr> <chr> <dbl>
## 1 Females 16 years and over Now_Married 47
## 2 Males 16 years and over Now_Married 50.6
## 3 In labor force Now_Married 53.2
## Plot
ggplot(laborforce_data_long, aes(x = Demographics, y = Percentage, fill = Marital_Status)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Labor Force Participation by Marital Status",
x = "Demographics",
y = "Percentage") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~Marital_Status, scales = "free_y")