remove() #Remove objects from the workspace
rm(list=ls()) #removes all objects from the current workspace (R memory)
data1 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 5\\Mortality Data.csv")
str(data1)
## 'data.frame': 22 obs. of 7 variables:
## $ agegroup : chr "< 1 year" "1-4 years" "5-9 years" "10-14 years" ...
## $ hispanic_f: int 391 70 44 67 117 165 180 232 307 399 ...
## $ hispanic_m: int 489 103 42 82 281 528 572 540 670 738 ...
## $ black_f : int 217 42 28 18 44 77 116 173 228 293 ...
## $ black_m : int 304 50 28 24 134 238 305 307 428 413 ...
## $ white_f : int 281 61 39 47 113 170 227 354 487 621 ...
## $ white_m : int 307 88 38 72 282 441 614 734 870 960 ...
data2 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 5\\Population Data.csv")
str(data2)
## 'data.frame': 19 obs. of 7 variables:
## $ agegroup1 : chr "< 1 year" "1-4 years" "5-9 years" "10-14 years" ...
## $ hispanic_female: int 93162 394290 500046 506509 483922 442320 436094 403262 402441 377867 ...
## $ hispanic_male : int 97001 411497 517487 527397 498493 468737 469751 441109 424263 384500 ...
## $ black_female : int 23409 99335 124359 129919 127690 131892 152464 139251 136082 120633 ...
## $ black_male : int 24171 102055 127882 134510 133870 138407 152817 131915 126652 110053 ...
## $ white_female : int 60417 263329 328060 334407 333213 340081 396599 401837 393314 357927 ...
## $ white_male : int 63139 276548 343865 350939 355229 365945 411770 409305 400772 362238 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1 <- data1 %>%
mutate(agegroup = as.character(agegroup)) # Ensure 'agegroup' is a character column
# Combining the last four rows
combined_row <- data1 %>%
slice((n()-3):n()) %>% # Select the last 4 rows
summarise(across(where(is.numeric), sum)) %>% # Sum only the numeric columns
mutate(agegroup = "85+ years") # Label the new group as "85+"
# Removing the last four rows and appending the new combined row
data1a <- data1 %>%
slice(1:(n()-4)) %>%
bind_rows(combined_row)
print(data1a)
## agegroup hispanic_f hispanic_m black_f black_m white_f white_m
## 1 < 1 year 391 489 217 304 281 307
## 2 1-4 years 70 103 42 50 61 88
## 3 5-9 years 44 42 28 28 39 38
## 4 10-14 years 67 82 18 24 47 72
## 5 15-19 years 117 281 44 134 113 282
## 6 20-24 years 165 528 77 238 170 441
## 7 25-29 years 180 572 116 305 227 614
## 8 30-34 years 232 540 173 307 354 734
## 9 35-39 years 307 670 228 428 487 870
## 10 40-44 years 399 738 293 413 621 960
## 11 45-49 years 537 1085 413 584 1030 1586
## 12 50-54 years 913 1513 581 838 1551 2460
## 13 55-59 years 1128 2074 884 1344 2906 4404
## 14 60-64 years 1379 2325 1187 1713 3739 6067
## 15 65-69 years 1664 2372 1267 1683 4644 6946
## 16 70-74 years 1923 2494 1236 1380 6083 8066
## 17 75-79 years 1987 2184 1166 1204 7112 8566
## 18 80-84 years 2380 2256 1250 961 8454 8688
## 19 85+ years 8769 5288 2728 1334 23921 15385
library(dplyr)
datafinal <- left_join(data1a, data2, by = c("agegroup" = "agegroup1"))
print(datafinal)
## agegroup hispanic_f hispanic_m black_f black_m white_f white_m
## 1 < 1 year 391 489 217 304 281 307
## 2 1-4 years 70 103 42 50 61 88
## 3 5-9 years 44 42 28 28 39 38
## 4 10-14 years 67 82 18 24 47 72
## 5 15-19 years 117 281 44 134 113 282
## 6 20-24 years 165 528 77 238 170 441
## 7 25-29 years 180 572 116 305 227 614
## 8 30-34 years 232 540 173 307 354 734
## 9 35-39 years 307 670 228 428 487 870
## 10 40-44 years 399 738 293 413 621 960
## 11 45-49 years 537 1085 413 584 1030 1586
## 12 50-54 years 913 1513 581 838 1551 2460
## 13 55-59 years 1128 2074 884 1344 2906 4404
## 14 60-64 years 1379 2325 1187 1713 3739 6067
## 15 65-69 years 1664 2372 1267 1683 4644 6946
## 16 70-74 years 1923 2494 1236 1380 6083 8066
## 17 75-79 years 1987 2184 1166 1204 7112 8566
## 18 80-84 years 2380 2256 1250 961 8454 8688
## 19 85+ years 8769 5288 2728 1334 23921 15385
## hispanic_female hispanic_male black_female black_male white_female
## 1 93162 97001 23409 24171 60417
## 2 394290 411497 99335 102055 263329
## 3 500046 517487 124359 127882 328060
## 4 506509 527397 129919 134510 334407
## 5 483922 498493 127690 133870 333213
## 6 442320 468737 131892 138407 340081
## 7 436094 469751 152464 152817 396599
## 8 403262 441109 139251 131915 401837
## 9 402441 424263 136082 126652 393314
## 10 377867 384500 120633 110053 357927
## 11 351784 346614 120551 109636 392050
## 12 298755 297548 113276 102376 396034
## 13 259589 253427 112103 98464 456356
## 14 210596 195428 97202 82216 440572
## 15 161077 141051 74198 57361 379659
## 16 117165 97476 49490 36422 312437
## 17 77517 58665 32033 21267 221710
## 18 55056 37619 20859 12199 152203
## 19 53650 30738 21441 9576 171669
## white_male
## 1 63139
## 2 276548
## 3 343865
## 4 350939
## 5 355229
## 6 365945
## 7 411770
## 8 409305
## 9 400772
## 10 362238
## 11 397515
## 12 396364
## 13 444492
## 14 417127
## 15 348195
## 16 280285
## 17 186821
## 18 114515
## 19 99244
library(dplyr)
datafinal1 <- datafinal %>%
mutate(total_deaths <- sum(hispanic_f+hispanic_m+black_f+black_m+white_f+white_m),
total_deaths_male <- sum(hispanic_m+black_m+white_m),
total_deaths_female <- sum(hispanic_f+black_f+white_f),totalpop <- sum(hispanic_female+hispanic_male+black_female+black_male+white_female+white_male),
totalpop_male <- sum(hispanic_male+black_male+white_male),
totalpop_female <- sum(hispanic_female+black_female+white_female))
print(datafinal1)
# Total deaths (sum of deaths across all age groups)
total_deaths <- sum(datafinal1$hispanic_f + datafinal1$hispanic_m + datafinal1$black_f + datafinal1$black_m+datafinal1$white_f+datafinal1$white_m)
total_deaths_male <- sum(datafinal1$hispanic_m + datafinal1$black_m+datafinal1$white_m)
total_deaths_female <- sum(datafinal1$hispanic_f + datafinal1$black_f+datafinal1$white_f)
# Total population (sum of population across all age groups)
total_population <- sum(datafinal1$hispanic_female + datafinal1$hispanic_male + datafinal1$black_female + datafinal1$black_male+datafinal1$white_female+datafinal1$white_male)
total_population_male <- sum(datafinal1$hispanic_male + datafinal1$black_male+datafinal1$white_male)
total_population_female <- sum(datafinal1$hispanic_female + datafinal1$black_female+datafinal1$white_female)
# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total <- (total_deaths / total_population) * 100000
cdr_male <- (total_deaths_male / total_population_male) * 100000
cdr_female <- (total_deaths_female / total_population_female) * 100000
# Output CDRs
print(paste("Total CDR: ", round(cdr_total, 2), "per 100,000"))
## [1] "Total CDR: 747.36 per 100,000"
print(paste("Male CDR: ", round(cdr_male, 2), "per 100,000"))
## [1] "Male CDR: 785.13 per 100,000"
print(paste("Female CDR: ", round(cdr_female, 2), "per 100,000"))
## [1] "Female CDR: 710 per 100,000"
# Total deaths (sum of deaths across all age groups)
total_deaths_hispan <- sum(datafinal1$hispanic_f + datafinal1$hispanic_m)
total_deaths_malehispan <- sum(datafinal1$hispanic_m)
total_deaths_femalehispan <- sum(datafinal1$hispanic_f)
# Total population (sum of population across all age groups)
total_population_hispan <- sum(datafinal1$hispanic_female + datafinal1$hispanic_male)
total_population_malehispan <- sum(datafinal1$hispanic_male)
total_population_femalehispan <- sum(datafinal1$hispanic_female)
# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_hispan <- (total_deaths_hispan / total_population_hispan) * 100000
cdr_male_hispan <- (total_deaths_malehispan / total_population_malehispan) * 100000
cdr_female_hispan <- (total_deaths_femalehispan / total_population_femalehispan) * 100000
# Output CDRs
print(paste("Total CDR_hispan: ", round(cdr_total_hispan, 2), "per 100,000"))
## [1] "Total CDR_hispan: 426.43 per 100,000"
print(paste("Male CDR_hispan: ", round(cdr_male_hispan, 2), "per 100,000"))
## [1] "Male CDR_hispan: 449.85 per 100,000"
print(paste("Female CDR_hispan: ", round(cdr_female_hispan, 2), "per 100,000"))
## [1] "Female CDR_hispan: 402.69 per 100,000"
# Total deaths (sum of deaths across all age groups)
total_deaths_black <- sum(datafinal1$black_f + datafinal1$black_m)
total_deaths_maleblack <- sum(datafinal1$black_m)
total_deaths_femaleblack <- sum(datafinal1$black_f)
# Total population (sum of population across all age groups)
total_population_black <- sum(datafinal1$black_female + datafinal1$black_male)
total_population_maleblack <- sum(datafinal1$black_male)
total_population_femaleblack <- sum(datafinal1$black_female)
# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_black <- (total_deaths_black / total_population_black) * 100000
cdr_male_black <- (total_deaths_maleblack / total_population_maleblack) * 100000
cdr_female_black <- (total_deaths_femaleblack / total_population_femaleblack) * 100000
# Output CDRs
print(paste("Total CDR_black: ", round(cdr_total_black, 2), "per 100,000"))
## [1] "Total CDR_black: 712.82 per 100,000"
print(paste("Male CDR_black: ", round(cdr_male_black, 2), "per 100,000"))
## [1] "Male CDR_black: 775.3 per 100,000"
print(paste("Female CDR_black: ", round(cdr_female_black, 2), "per 100,000"))
## [1] "Female CDR_black: 654.26 per 100,000"
# Total deaths (sum of deaths across all age groups)
total_deaths_white <- sum(datafinal1$white_f + datafinal1$white_m)
total_deaths_malewhite <- sum(datafinal1$white_m)
total_deaths_femalewhite <- sum(datafinal1$white_f)
# Total population (sum of population across all age groups)
total_population_white <- sum(datafinal1$white_female + datafinal1$white_male)
total_population_malewhite <- sum(datafinal1$white_male)
total_population_femalewhite <- sum(datafinal1$white_female)
# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_white <- (total_deaths_white / total_population_white) * 100000
cdr_male_white <- (total_deaths_malewhite / total_population_malewhite) * 100000
cdr_female_white <- (total_deaths_femalewhite / total_population_femalewhite) * 100000
# Output CDRs
print(paste("Total CDR_white: ", round(cdr_total_white, 2), "per 100,000"))
## [1] "Total CDR_white: 1056.37 per 100,000"
print(paste("Male CDR_white: ", round(cdr_male_white, 2), "per 100,000"))
## [1] "Male CDR_white: 1105.09 per 100,000"
print(paste("Female CDR_white: ", round(cdr_female_white, 2), "per 100,000"))
## [1] "Female CDR_white: 1008.5 per 100,000"
# Load necessary libraries
library(ggplot2)
# Create a sample data frame for the crude death rates
data <- data.frame(
group = c("White Male", "Black Male", "Hispanic Male", "White Female",
"Black Female", "Hispanic Female", "White CDR",
"Black CDR", "Hispanic CDR", "Male CDR", "Female CDR", "Total CDR"),
cdr = c(1105, 775, 450, 1009, 654, 403, 1056, 713, 426, 785, 710, 747) # Example CDR values
)
# Create the plot using ggplot2
ggplot(data, aes(x = cdr, y = reorder(group, cdr), fill = group)) + # Reorder bars by CDR
geom_bar(stat = "identity", width = 0.5) + # Make bars thinner by adjusting width
geom_text(aes(label = cdr), hjust = -0.1, size = 3) + # Add CDR numbers with smaller size
labs(title = "Crude Death Rates by Sex & Race/Ethnicity, Texas 2018",
subtitle = "Data: Centers for Disease Control, 1999-2018",
x = "Crude Death Rate (per 100,000 population)",
y = "") + # Customize title and axis labels
theme_minimal() + # Use a clean theme
theme(legend.position = "none") + # Remove legend
coord_cartesian(xlim = c(0, 1200)) # Adjust x-axis limits to match your data
# Load necessary libraries
library(ggplot2)
# Create a sample data frame for the crude death rates, including total CDR
data <- data.frame(
group = c("White", "White", "White",
"Black", "Black", "Black",
"Hispanic", "Hispanic", "Hispanic",
"Total Population", "Total Population", "Total Population"), # Adding total population
subgroup = c("Male", "Female", "Total",
"Male", "Female", "Total",
"Male", "Female", "Total",
"Male", "Female", "Total"), # New male, female, total categories
cdr = c(1105, 1009, 1056,
775, 654, 713,
450, 403, 426,
785, 710, 747) # Example CDR values including totals
)
# Ensure "White" group appears first, then "Total"
data$group <- factor(data$group, levels = c("White", "Black", "Hispanic", "Total Population"))
# Define a custom color palette for Male, Female, and Total
custom_colors <- c("Male" = "#377eb8", "Female" = "#4daf4a", "Total" = "#ff7f00")
# Create the plot using ggplot2
ggplot(data, aes(x = group, y = cdr, fill = subgroup)) + # Group by 'group' (race), fill by 'subgroup' (male/female/total)
geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.4) + # Thinner bars with dodged subgroups
geom_text(aes(label = cdr), position = position_dodge(width = 0.7), vjust = -0.5, size = 3) + # Add CDR numbers on top of bars
scale_fill_manual(values = custom_colors, name = "Sex/Category") + # Apply custom colors and set legend for Male/Female/Total
labs(title = "Crude Death Rates by Sex & Race/Ethnicity, Texas 2018",
subtitle = "Data: Centers for Disease Control, 1999-2018",
x = "",
y = "Crude Death Rate (per 100,000 population)") + # Customize title and axis labels
theme_minimal() + # Use a clean theme
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) + # Keep x-axis labels straight
theme(legend.position = "top") + # Place legend at the top
theme(plot.subtitle = element_text(size = 8), # Reduce the font size of the subtitle (Data source)
axis.title.y = element_text(size = 10), # Adjust axis title font size if needed
plot.title = element_text(size = 14)) + # Keep title size larger
coord_cartesian(ylim = c(0, 1200)) # Adjust y-axis limits to fit the bars
### ASDR
datafinal2 <- datafinal1 %>%
mutate(
hispanic_f_rate = (hispanic_f / hispanic_female) * 100000,
hispanic_m_rate = (hispanic_m / hispanic_male) * 100000,
black_f_rate = (black_f / black_female) * 100000,
black_m_rate = (black_m / black_male)*100000,
white_f_rate = (white_f / white_female) * 100000,
white_m_rate = (white_m / white_male) * 100000) %>%
mutate(total_rate_h = (hispanic_f_rate + hispanic_m_rate) / 2,total_rate_b=(black_f_rate + black_m_rate) / 2,total_rate_w = (white_f_rate + white_m_rate) / 2)%>%
mutate(
hispanic_mf_ratio = hispanic_m_rate / hispanic_f_rate,black_mf_ratio = black_m_rate / black_f_rate,white_mf_ratio = white_m_rate / white_f_rate)
datafinal2 %>%
select(agegroup, hispanic_f_rate, hispanic_m_rate,black_f_rate,black_m_rate,white_m_rate,white_m_rate,total_rate_h,total_rate_b,total_rate_w,hispanic_mf_ratio,black_mf_ratio,white_mf_ratio)
## agegroup hispanic_f_rate hispanic_m_rate black_f_rate black_m_rate
## 1 < 1 year 419.69902 504.118514 926.99389 1257.70551
## 2 1-4 years 17.75343 25.030559 42.28117 48.99319
## 3 5-9 years 8.79919 8.116146 22.51546 21.89518
## 4 10-14 years 13.22780 15.548060 13.85479 17.84254
## 5 15-19 years 24.17745 56.369899 34.45845 100.09711
## 6 20-24 years 37.30331 112.643124 58.38110 171.95662
## 7 25-29 years 41.27550 121.766638 76.08353 199.58512
## 8 30-34 years 57.53084 122.418722 124.23609 232.72562
## 9 35-39 years 76.28447 157.920912 167.54604 337.93387
## 10 40-44 years 105.59271 191.937581 242.88545 375.27373
## 11 45-49 years 152.65049 313.028325 342.59359 532.67175
## 12 50-54 years 305.60158 508.489387 512.90653 818.55122
## 13 55-59 years 434.53305 818.381625 788.56052 1364.96588
## 14 60-64 years 654.80826 1189.696461 1221.16829 2083.53605
## 15 65-69 years 1033.04631 1681.661243 1707.59320 2934.04927
## 16 70-74 years 1641.27512 2558.578522 2497.47424 3788.91879
## 17 75-79 years 2563.30869 3722.833035 3639.99625 5661.35327
## 18 80-84 years 4322.87126 5996.969616 5992.61710 7877.69489
## 19 85+ years 16344.82759 17203.461513 12723.28716 13930.65998
## white_m_rate total_rate_h total_rate_b total_rate_w hispanic_mf_ratio
## 1 486.22880 461.908767 1092.34970 475.66484 1.2011429
## 2 31.82088 21.391995 45.63718 27.49291 1.4098999
## 3 11.05085 8.457668 22.20532 11.46946 0.9223742
## 4 20.51639 14.387930 15.84866 17.28556 1.1754078
## 5 79.38541 40.273674 67.27778 56.64883 2.3315072
## 6 120.50991 74.973217 115.16886 85.24900 3.0196549
## 7 149.11237 81.521071 137.83433 103.17451 2.9500945
## 8 179.32837 89.974779 178.48086 133.71190 2.1278801
## 9 217.08103 117.102693 252.73995 170.45034 2.0701580
## 10 265.01913 148.765145 309.07959 219.25910 1.8177162
## 11 398.97865 232.839408 437.63267 330.85012 2.0506212
## 12 620.64163 407.045483 665.72888 506.13734 1.6638965
## 13 990.79399 626.457338 1076.76320 813.78877 1.8833588
## 14 1454.47310 922.252360 1652.35217 1151.57128 1.8168623
## 15 1994.85920 1357.353775 2320.82123 1609.03106 1.6278663
## 16 2877.78511 2099.926823 3143.19651 2412.36881 1.5588968
## 17 4585.13765 3143.070864 4650.67476 3896.46581 1.4523545
## 18 7586.77902 5159.920437 6935.15599 6570.60153 1.3872654
## 19 15502.19661 16774.144550 13326.97357 14718.28516 1.0525325
## black_mf_ratio white_mf_ratio
## 1 1.3567571 1.0454265
## 2 1.1587473 1.3736655
## 3 0.9724512 0.9295747
## 4 1.2878249 1.4597496
## 5 2.9048636 2.3409072
## 6 2.9454159 2.4107724
## 7 2.6232368 2.6051901
## 8 1.8732529 2.0356151
## 9 2.0169612 1.7532035
## 10 1.5450647 1.5274960
## 11 1.5548211 1.5186367
## 12 1.5959072 1.5847530
## 13 1.7309589 1.5559352
## 14 1.7061826 1.7138276
## 15 1.7182367 1.6308489
## 16 1.5171002 1.4780972
## 17 1.5553184 1.4293741
## 18 1.3145667 1.3658984
## 19 1.0948947 1.1125148
library(ggplot2)
library(tidyr)
library(dplyr)
datafinal2$agegroup <- factor(datafinal2$agegroup, levels = c(
"< 1 year", "1-4 years", "5-9 years", "10-14 years", "15-19 years",
"20-24 years", "25-29 years", "30-34 years", "35-39 years", "40-44 years",
"45-49 years", "50-54 years", "55-59 years", "60-64 years", "65-69 years",
"70-74 years", "75-79 years", "80-84 years", "85+ years"
))
# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_hisp <- datafinal2 %>%
mutate(
total_rate1 = (hispanic_f + hispanic_m) / (hispanic_female + hispanic_male) * 1000,
hispanic_f_rate = (hispanic_f / hispanic_female) * 1000,
hispanic_m_rate = (hispanic_m / hispanic_male) * 1000
)
# Remove any rows where agegroup is NA
datafinal_hisp <- datafinal_hisp %>%
filter(!is.na(agegroup)) # Filter out rows with NA in agegroup
# Reshape data to long format for plotting male and female ASDR as bars
data_long <- datafinal_hisp %>%
select(agegroup, hispanic_f_rate, hispanic_m_rate) %>%
pivot_longer(cols = c(hispanic_f_rate, hispanic_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "hispanic_f_rate", "Hispanic Female ASDR", "Hispanic Male ASDR"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_long, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.6) +
# Line for total ASDR
geom_line(data = datafinal_hisp, aes(x = agegroup, y = total_rate1, group = 1, color = "Total ASDR by Age Group"), size = 1) +
# Logarithmic Y-axis with custom breaks
scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
# Custom colors for the bars and line
scale_fill_manual(values = c("Hispanic Female ASDR" = "orange", "Hispanic Male ASDR" = "gray")) +
scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
# Custom labels
labs(
title = "Hispanic Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",y = NULL,
fill = "",
color = ""
) +
# Clean theme with grid lines and minimalistic style
theme_minimal() +
# Adjustments to match the style
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom", # Place the legend at the bottom
legend.title = element_blank() # Remove legend title
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(ggplot2)
library(tidyr)
library(dplyr)
# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_black <- datafinal2 %>%
mutate(
total_rate2 = (black_f + black_m) / (black_female + black_male) * 1000,
black_f_rate = (black_f / black_female) * 1000,
black_m_rate = (black_m / black_male) * 1000
)
datafinal_black <- datafinal_black%>%
filter(!is.na(agegroup))
# Reshape data to long format for plotting male and female ASDR as bars
data_long1 <- datafinal_black %>%
select(agegroup, black_f_rate, black_m_rate) %>%
pivot_longer(cols = c(black_f_rate, black_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "black_f_rate", "Black Female ASDR", "Black Male ASDR"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_long1, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.6) +
# Line for total ASDR
geom_line(data = datafinal_black, aes(x = agegroup, y = total_rate2, group = 1, color = "Total ASDR by Age Group"), size = 1) +
# Logarithmic Y-axis with custom breaks
scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
# Custom colors for the bars and line
scale_fill_manual(values = c("Black Female ASDR" = "orange", "Black Male ASDR" = "gray")) +
scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
# Custom labels
labs(
title = "Black Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",y = NULL,
fill = "",
color = ""
) +
# Clean theme with grid lines and minimalistic style
theme_minimal() +
# Adjustments to match the style
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom", # Place the legend at the bottom
legend.title = element_blank() # Remove legend title
)
library(ggplot2)
library(tidyr)
library(dplyr)
# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_white <- datafinal2 %>%
mutate(
total_rate3 = (white_f + white_m) / (white_female + white_male) * 1000,
white_f_rate = (white_f / white_female) * 1000,
white_m_rate = (white_m / white_male) * 1000
)
datafinal_white <- datafinal_white%>%
filter(!is.na(agegroup))
# Reshape data to long format for plotting male and female ASDR as bars
data_long2 <- datafinal_white %>%
select(agegroup, white_f_rate, white_m_rate) %>%
pivot_longer(cols = c(white_f_rate, white_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "white_f_rate", "White Female ASDR", "White Male ASDR"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_long2, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.6) +
# Line for total ASDR
geom_line(data = datafinal_white, aes(x = agegroup, y = total_rate3, group = 1, color = "Total ASDR by Age Group"), size = 1) +
# Logarithmic Y-axis with custom breaks
scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
# Custom colors for the bars and line
scale_fill_manual(values = c("White Female ASDR" = "orange", "White Male ASDR" = "gray")) +
scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
# Custom labels
labs(
title = "White Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",y = NULL,
fill = "",
color = ""
) +
# Clean theme with grid lines and minimalistic style
theme_minimal() +
# Adjustments to match the style
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom", # Place the legend at the bottom
legend.title = element_blank() # Remove legend title
)
library(ggplot2)
library(tidyr)
library(dplyr)
# Ensure data is clean and free of NA values in 'agegroup'
datafinal2 <- datafinal2 %>%
filter(!is.na(agegroup))
# Reshape the data to long format for easier plotting
data_long_ratio <- datafinal2 %>%
select(agegroup, hispanic_mf_ratio, black_mf_ratio, white_mf_ratio) %>%
pivot_longer(cols = c(hispanic_mf_ratio, black_mf_ratio, white_mf_ratio),
names_to = "Ethnic_Group",
values_to = "Ratio") %>%
# Rename ethnic groups in the legend
mutate(Ethnic_Group = recode(Ethnic_Group,
"hispanic_mf_ratio" = "Hispanic",
"black_mf_ratio" = "Black",
"white_mf_ratio" = "White"))
# Create the plot
ggplot(data_long_ratio, aes(x = agegroup, y = Ratio, color = Ethnic_Group, group = Ethnic_Group)) +
geom_line(size = 1) + # Line plot
geom_point(size = 2) + # Add points to highlight the exact ratio at each age group
scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) + # Adjust Y axis format
labs(
title = "Male-to-Female Age-Specific Death Rate Ratios by Race/Ethnicity",
x = "Age Group",
y = "Male-to-Female Death Rate Ratio",
color = "Race/Ethnic Group" # Updated legend title
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability
library(ggplot2)
library(tidyr)
library(dplyr)
# Reshape the data to long format for easier plotting
data_long_ratio1 <- datafinal2 %>%
select(agegroup, hispanic_mf_ratio, black_mf_ratio, white_mf_ratio) %>%
pivot_longer(cols = c(hispanic_mf_ratio, black_mf_ratio, white_mf_ratio),
names_to = "Ethnic_Group",
values_to = "Ratio") %>%
# Update the names for the ethnic groups
mutate(Ethnic_Group = recode(Ethnic_Group,
"hispanic_mf_ratio" = "Hispanic",
"black_mf_ratio" = "Black",
"white_mf_ratio" = "White"))
# Create the plot with less space between x-axis labels and the x-axis
ggplot(data_long_ratio1, aes(x = agegroup, y = Ratio, fill = Ethnic_Group)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.6) + # Adjust dodge width and bar width
labs(
title = "Male-to-Female Age-Specific Death Rate Ratios by Race/Ethnicity",
x = NULL,
y = "Male-to-Female Death Rate Ratio"
) +
scale_fill_manual(values = c("Hispanic" = "green", "Black" = "red", "White" = "blue")) + # Custom colors
scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) + # Y-axis label format
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.2), # Rotate x-axis labels and lessen the gap with vjust
panel.grid.major.x = element_blank(), # Remove grid lines between age groups
legend.title = element_blank() # Remove legend title
)
### ASDR
datafinal2 <- datafinal2 %>%
mutate(total_rate = (hispanic_f_rate + hispanic_m_rate) / 2)
# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)
# Reshape the data to long format using pivot_longer()
data_longa <- datafinal2 %>%
pivot_longer(cols = c(hispanic_f_rate, hispanic_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "hispanic_f_rate", "Female", "Male"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_longa, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.7) +
# Line for total ASDR
geom_line(data = datafinal2, aes(x = agegroup, y = total_rate, group = 1, color = "Total"), size = 1) +
# Logarithmic Y-axis
scale_y_log10() +
# Labels for the plot
labs(
title = "Hispanic Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",
y = "ASDR (per 100,000 population)"
) +
# Custom colors for bars
scale_fill_manual(values = c("Female" = "orange", "Male" = "purple")) +
# Ensure the total ASDR line is blue
scale_color_manual(values = c("Total" = "red")) +
# Clean theme
theme_minimal() +
# Rotate x-axis labels for better readability
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# Remove legend title and adjust the position
theme(
legend.position = "bottom",
legend.title = element_blank()
)
datafinal2 <- datafinal2 %>%
mutate(total_rate1 = (black_f_rate + black_m_rate) / 2)
# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)
# Reshape the data to long format using pivot_longer()
data_longb <- datafinal2 %>%
pivot_longer(cols = c(black_f_rate, black_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "black_f_rate", "Female", "Male"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_longb, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.7) +
# Line for total ASDR
geom_line(data = datafinal2, aes(x = agegroup, y = total_rate1, group = 1, color = "Total"), size = 1) +
# Logarithmic Y-axis
scale_y_log10() +
# Labels for the plot
labs(
title = "Black Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",
y = "ASDR (per 100,000 population)"
) +
# Clean theme
theme_minimal() +
# Rotate x-axis labels for better readability
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# Ensure the total ASDR line is blue
scale_color_manual(values = c("Total" = "blue")) +
# Remove legend title and adjust the position
theme(
legend.position = "bottom",
legend.title = element_blank()
)
datafinal2 <- datafinal2 %>%
mutate(total_rate2 = (white_f_rate + white_m_rate) / 2)
# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)
# Reshape the data to long format using pivot_longer()
data_longc <- datafinal2 %>%
pivot_longer(cols = c(white_f_rate, white_m_rate),
names_to = "Gender",
values_to = "ASDR") %>%
mutate(Gender = ifelse(Gender == "white_f_rate", "Female", "Male"))
# Create the plot
ggplot() +
# Bars for male and female ASDR
geom_bar(data = data_longc, aes(x = agegroup, y = ASDR, fill = Gender),
stat = "identity", position = "dodge", width = 0.7) +
# Line for total ASDR
geom_line(data = datafinal2, aes(x = agegroup, y = total_rate2, group = 1, color = "Total"), size = 1) +
# Logarithmic Y-axis
scale_y_log10() +
# Labels for the plot
labs(
title = "White Age-Specific Death Rates by Gender, Texas 2018",
x = "Age Group",
y = "ASDR (per 100,000 population)"
) +
# Custom colors for bars based on the provided colors
scale_fill_manual(values = c("Female" = "#4e773f", # Green
"Male" = "#002f56")) + # Dark Blue
# Ensure the total ASDR line is blue
scale_color_manual(values = c("Total" = "red")) +
# Clean theme
theme_minimal() +
# Rotate x-axis labels for better readability
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# Remove legend title and adjust the position
theme(
legend.position = "bottom",
legend.title = element_blank()
)