remove()  #Remove objects from the workspace
 rm(list=ls())  #removes all objects from the current workspace (R memory)
data1 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 5\\Mortality Data.csv")
str(data1)
## 'data.frame':    22 obs. of  7 variables:
##  $ agegroup  : chr  "< 1 year" "1-4 years" "5-9 years" "10-14 years" ...
##  $ hispanic_f: int  391 70 44 67 117 165 180 232 307 399 ...
##  $ hispanic_m: int  489 103 42 82 281 528 572 540 670 738 ...
##  $ black_f   : int  217 42 28 18 44 77 116 173 228 293 ...
##  $ black_m   : int  304 50 28 24 134 238 305 307 428 413 ...
##  $ white_f   : int  281 61 39 47 113 170 227 354 487 621 ...
##  $ white_m   : int  307 88 38 72 282 441 614 734 870 960 ...
data2 <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\DEM\\Assignment 5\\Population Data.csv")
str(data2)
## 'data.frame':    19 obs. of  7 variables:
##  $ agegroup1      : chr  "< 1 year" "1-4 years" "5-9 years" "10-14 years" ...
##  $ hispanic_female: int  93162 394290 500046 506509 483922 442320 436094 403262 402441 377867 ...
##  $ hispanic_male  : int  97001 411497 517487 527397 498493 468737 469751 441109 424263 384500 ...
##  $ black_female   : int  23409 99335 124359 129919 127690 131892 152464 139251 136082 120633 ...
##  $ black_male     : int  24171 102055 127882 134510 133870 138407 152817 131915 126652 110053 ...
##  $ white_female   : int  60417 263329 328060 334407 333213 340081 396599 401837 393314 357927 ...
##  $ white_male     : int  63139 276548 343865 350939 355229 365945 411770 409305 400772 362238 ...
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data1 <- data1 %>%
  mutate(agegroup = as.character(agegroup)) # Ensure 'agegroup' is a character column

# Combining the last four rows
combined_row <- data1 %>%
  slice((n()-3):n()) %>%    # Select the last 4 rows
  summarise(across(where(is.numeric), sum)) %>%  # Sum only the numeric columns
  mutate(agegroup = "85+ years")  # Label the new group as "85+"

# Removing the last four rows and appending the new combined row
data1a <- data1 %>%
  slice(1:(n()-4)) %>%  
  bind_rows(combined_row) 
print(data1a)
##        agegroup hispanic_f hispanic_m black_f black_m white_f white_m
## 1      < 1 year        391        489     217     304     281     307
## 2     1-4 years         70        103      42      50      61      88
## 3     5-9 years         44         42      28      28      39      38
## 4   10-14 years         67         82      18      24      47      72
## 5   15-19 years        117        281      44     134     113     282
## 6   20-24 years        165        528      77     238     170     441
## 7   25-29 years        180        572     116     305     227     614
## 8   30-34 years        232        540     173     307     354     734
## 9   35-39 years        307        670     228     428     487     870
## 10  40-44 years        399        738     293     413     621     960
## 11  45-49 years        537       1085     413     584    1030    1586
## 12  50-54 years        913       1513     581     838    1551    2460
## 13  55-59 years       1128       2074     884    1344    2906    4404
## 14 60-64 years        1379       2325    1187    1713    3739    6067
## 15  65-69 years       1664       2372    1267    1683    4644    6946
## 16  70-74 years       1923       2494    1236    1380    6083    8066
## 17  75-79 years       1987       2184    1166    1204    7112    8566
## 18  80-84 years       2380       2256    1250     961    8454    8688
## 19    85+ years       8769       5288    2728    1334   23921   15385
library(dplyr)
datafinal <- left_join(data1a, data2, by = c("agegroup" = "agegroup1"))
print(datafinal)
##        agegroup hispanic_f hispanic_m black_f black_m white_f white_m
## 1      < 1 year        391        489     217     304     281     307
## 2     1-4 years         70        103      42      50      61      88
## 3     5-9 years         44         42      28      28      39      38
## 4   10-14 years         67         82      18      24      47      72
## 5   15-19 years        117        281      44     134     113     282
## 6   20-24 years        165        528      77     238     170     441
## 7   25-29 years        180        572     116     305     227     614
## 8   30-34 years        232        540     173     307     354     734
## 9   35-39 years        307        670     228     428     487     870
## 10  40-44 years        399        738     293     413     621     960
## 11  45-49 years        537       1085     413     584    1030    1586
## 12  50-54 years        913       1513     581     838    1551    2460
## 13  55-59 years       1128       2074     884    1344    2906    4404
## 14 60-64 years        1379       2325    1187    1713    3739    6067
## 15  65-69 years       1664       2372    1267    1683    4644    6946
## 16  70-74 years       1923       2494    1236    1380    6083    8066
## 17  75-79 years       1987       2184    1166    1204    7112    8566
## 18  80-84 years       2380       2256    1250     961    8454    8688
## 19    85+ years       8769       5288    2728    1334   23921   15385
##    hispanic_female hispanic_male black_female black_male white_female
## 1            93162         97001        23409      24171        60417
## 2           394290        411497        99335     102055       263329
## 3           500046        517487       124359     127882       328060
## 4           506509        527397       129919     134510       334407
## 5           483922        498493       127690     133870       333213
## 6           442320        468737       131892     138407       340081
## 7           436094        469751       152464     152817       396599
## 8           403262        441109       139251     131915       401837
## 9           402441        424263       136082     126652       393314
## 10          377867        384500       120633     110053       357927
## 11          351784        346614       120551     109636       392050
## 12          298755        297548       113276     102376       396034
## 13          259589        253427       112103      98464       456356
## 14          210596        195428        97202      82216       440572
## 15          161077        141051        74198      57361       379659
## 16          117165         97476        49490      36422       312437
## 17           77517         58665        32033      21267       221710
## 18           55056         37619        20859      12199       152203
## 19           53650         30738        21441       9576       171669
##    white_male
## 1       63139
## 2      276548
## 3      343865
## 4      350939
## 5      355229
## 6      365945
## 7      411770
## 8      409305
## 9      400772
## 10     362238
## 11     397515
## 12     396364
## 13     444492
## 14     417127
## 15     348195
## 16     280285
## 17     186821
## 18     114515
## 19      99244
library(dplyr)
datafinal1 <- datafinal %>%
mutate(total_deaths <- sum(hispanic_f+hispanic_m+black_f+black_m+white_f+white_m),
total_deaths_male <- sum(hispanic_m+black_m+white_m),
total_deaths_female <- sum(hispanic_f+black_f+white_f),totalpop <- sum(hispanic_female+hispanic_male+black_female+black_male+white_female+white_male),
totalpop_male <- sum(hispanic_male+black_male+white_male),
totalpop_female <- sum(hispanic_female+black_female+white_female))
print(datafinal1)

CDR

# Total deaths (sum of deaths across all age groups)
total_deaths <- sum(datafinal1$hispanic_f + datafinal1$hispanic_m + datafinal1$black_f + datafinal1$black_m+datafinal1$white_f+datafinal1$white_m)
total_deaths_male <- sum(datafinal1$hispanic_m + datafinal1$black_m+datafinal1$white_m)
total_deaths_female <- sum(datafinal1$hispanic_f + datafinal1$black_f+datafinal1$white_f)

# Total population (sum of population across all age groups)
total_population <- sum(datafinal1$hispanic_female + datafinal1$hispanic_male + datafinal1$black_female + datafinal1$black_male+datafinal1$white_female+datafinal1$white_male)
total_population_male <- sum(datafinal1$hispanic_male + datafinal1$black_male+datafinal1$white_male)
total_population_female <- sum(datafinal1$hispanic_female + datafinal1$black_female+datafinal1$white_female)

# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total <- (total_deaths / total_population) * 100000
cdr_male <- (total_deaths_male / total_population_male) * 100000
cdr_female <- (total_deaths_female / total_population_female) * 100000

# Output CDRs
print(paste("Total CDR: ", round(cdr_total, 2), "per 100,000"))
## [1] "Total CDR:  747.36 per 100,000"
print(paste("Male CDR: ", round(cdr_male, 2), "per 100,000"))
## [1] "Male CDR:  785.13 per 100,000"
print(paste("Female CDR: ", round(cdr_female, 2), "per 100,000"))
## [1] "Female CDR:  710 per 100,000"

hispanics

# Total deaths (sum of deaths across all age groups)
total_deaths_hispan <- sum(datafinal1$hispanic_f + datafinal1$hispanic_m)
total_deaths_malehispan <- sum(datafinal1$hispanic_m)
total_deaths_femalehispan <- sum(datafinal1$hispanic_f)

# Total population (sum of population across all age groups)
total_population_hispan <- sum(datafinal1$hispanic_female + datafinal1$hispanic_male)
total_population_malehispan <- sum(datafinal1$hispanic_male)
total_population_femalehispan <- sum(datafinal1$hispanic_female)

# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_hispan <- (total_deaths_hispan / total_population_hispan) * 100000
cdr_male_hispan <- (total_deaths_malehispan / total_population_malehispan) * 100000
cdr_female_hispan <- (total_deaths_femalehispan / total_population_femalehispan) * 100000

# Output CDRs
print(paste("Total CDR_hispan: ", round(cdr_total_hispan, 2), "per 100,000"))
## [1] "Total CDR_hispan:  426.43 per 100,000"
print(paste("Male CDR_hispan: ", round(cdr_male_hispan, 2), "per 100,000"))
## [1] "Male CDR_hispan:  449.85 per 100,000"
print(paste("Female CDR_hispan: ", round(cdr_female_hispan, 2), "per 100,000"))
## [1] "Female CDR_hispan:  402.69 per 100,000"

blacks

# Total deaths (sum of deaths across all age groups)
total_deaths_black <- sum(datafinal1$black_f + datafinal1$black_m)
total_deaths_maleblack <- sum(datafinal1$black_m)
total_deaths_femaleblack <- sum(datafinal1$black_f)

# Total population (sum of population across all age groups)
total_population_black <- sum(datafinal1$black_female + datafinal1$black_male)
total_population_maleblack <- sum(datafinal1$black_male)
total_population_femaleblack <- sum(datafinal1$black_female)

# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_black <- (total_deaths_black / total_population_black) * 100000
cdr_male_black <- (total_deaths_maleblack / total_population_maleblack) * 100000
cdr_female_black <- (total_deaths_femaleblack / total_population_femaleblack) * 100000

# Output CDRs
print(paste("Total CDR_black: ", round(cdr_total_black, 2), "per 100,000"))
## [1] "Total CDR_black:  712.82 per 100,000"
print(paste("Male CDR_black: ", round(cdr_male_black, 2), "per 100,000"))
## [1] "Male CDR_black:  775.3 per 100,000"
print(paste("Female CDR_black: ", round(cdr_female_black, 2), "per 100,000"))
## [1] "Female CDR_black:  654.26 per 100,000"

whites

# Total deaths (sum of deaths across all age groups)
total_deaths_white <- sum(datafinal1$white_f + datafinal1$white_m)
total_deaths_malewhite <- sum(datafinal1$white_m)
total_deaths_femalewhite <- sum(datafinal1$white_f)

# Total population (sum of population across all age groups)
total_population_white <- sum(datafinal1$white_female + datafinal1$white_male)
total_population_malewhite <- sum(datafinal1$white_male)
total_population_femalewhite <- sum(datafinal1$white_female)

# Calculate Crude Death Rates (CDR) for total, males, and females
cdr_total_white <- (total_deaths_white / total_population_white) * 100000
cdr_male_white <- (total_deaths_malewhite / total_population_malewhite) * 100000
cdr_female_white <- (total_deaths_femalewhite / total_population_femalewhite) * 100000

# Output CDRs
print(paste("Total CDR_white: ", round(cdr_total_white, 2), "per 100,000"))
## [1] "Total CDR_white:  1056.37 per 100,000"
print(paste("Male CDR_white: ", round(cdr_male_white, 2), "per 100,000"))
## [1] "Male CDR_white:  1105.09 per 100,000"
print(paste("Female CDR_white: ", round(cdr_female_white, 2), "per 100,000"))
## [1] "Female CDR_white:  1008.5 per 100,000"
# Load necessary libraries
library(ggplot2)

# Create a sample data frame for the crude death rates
data <- data.frame(
  group = c("White Male", "Black Male", "Hispanic Male", "White Female", 
            "Black Female", "Hispanic Female", "White CDR", 
            "Black CDR", "Hispanic CDR", "Male CDR", "Female CDR", "Total CDR"),
  cdr = c(1105, 775, 450, 1009, 654, 403, 1056, 713, 426, 785, 710, 747)  # Example CDR values
)

# Create the plot using ggplot2
ggplot(data, aes(x = cdr, y = reorder(group, cdr), fill = group)) +  # Reorder bars by CDR
  geom_bar(stat = "identity", width = 0.5) +  # Make bars thinner by adjusting width
  geom_text(aes(label = cdr), hjust = -0.1, size = 3) +  # Add CDR numbers with smaller size
  labs(title = "Crude Death Rates by Sex & Race/Ethnicity, Texas 2018", 
       subtitle = "Data: Centers for Disease Control, 1999-2018", 
       x = "Crude Death Rate (per 100,000 population)", 
       y = "") +  # Customize title and axis labels
  theme_minimal() +  # Use a clean theme
  theme(legend.position = "none") +  # Remove legend
  coord_cartesian(xlim = c(0, 1200))  # Adjust x-axis limits to match your data

# Load necessary libraries
library(ggplot2)

# Create a sample data frame for the crude death rates, including total CDR
data <- data.frame(
  group = c("White", "White", "White", 
            "Black", "Black", "Black", 
            "Hispanic", "Hispanic", "Hispanic", 
            "Total Population", "Total Population", "Total Population"),  # Adding total population
  subgroup = c("Male", "Female", "Total", 
               "Male", "Female", "Total", 
               "Male", "Female", "Total", 
               "Male", "Female", "Total"),  # New male, female, total categories
  cdr = c(1105, 1009, 1056, 
          775, 654, 713, 
          450, 403, 426, 
          785, 710, 747)  # Example CDR values including totals
)

# Ensure "White" group appears first, then "Total"
data$group <- factor(data$group, levels = c("White", "Black", "Hispanic", "Total Population"))

# Define a custom color palette for Male, Female, and Total
custom_colors <- c("Male" = "#377eb8", "Female" = "#4daf4a", "Total" = "#ff7f00")

# Create the plot using ggplot2
ggplot(data, aes(x = group, y = cdr, fill = subgroup)) +  # Group by 'group' (race), fill by 'subgroup' (male/female/total)
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.4) +  # Thinner bars with dodged subgroups
  geom_text(aes(label = cdr), position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +  # Add CDR numbers on top of bars
  scale_fill_manual(values = custom_colors, name = "Sex/Category") +  # Apply custom colors and set legend for Male/Female/Total
  labs(title = "Crude Death Rates by Sex & Race/Ethnicity, Texas 2018", 
       subtitle = "Data: Centers for Disease Control, 1999-2018", 
       x = "", 
       y = "Crude Death Rate (per 100,000 population)") +  # Customize title and axis labels
  theme_minimal() +  # Use a clean theme
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +  # Keep x-axis labels straight
  theme(legend.position = "top") +  # Place legend at the top
  theme(plot.subtitle = element_text(size = 8),  # Reduce the font size of the subtitle (Data source)
        axis.title.y = element_text(size = 10),  # Adjust axis title font size if needed
        plot.title = element_text(size = 14)) +  # Keep title size larger
  coord_cartesian(ylim = c(0, 1200))  # Adjust y-axis limits to fit the bars

### ASDR

datafinal2 <- datafinal1 %>%
  mutate(
    hispanic_f_rate = (hispanic_f / hispanic_female) * 100000,  
    hispanic_m_rate = (hispanic_m / hispanic_male) * 100000, 
    black_f_rate = (black_f / black_female) * 100000,  
    black_m_rate = (black_m / black_male)*100000,
    white_f_rate = (white_f / white_female) * 100000,  
    white_m_rate = (white_m / white_male) * 100000)  %>%
  mutate(total_rate_h = (hispanic_f_rate + hispanic_m_rate) / 2,total_rate_b=(black_f_rate + black_m_rate) / 2,total_rate_w = (white_f_rate + white_m_rate) / 2)%>%
mutate(
    hispanic_mf_ratio = hispanic_m_rate / hispanic_f_rate,black_mf_ratio = black_m_rate / black_f_rate,white_mf_ratio = white_m_rate / white_f_rate)


datafinal2 %>%
  select(agegroup, hispanic_f_rate, hispanic_m_rate,black_f_rate,black_m_rate,white_m_rate,white_m_rate,total_rate_h,total_rate_b,total_rate_w,hispanic_mf_ratio,black_mf_ratio,white_mf_ratio)
##        agegroup hispanic_f_rate hispanic_m_rate black_f_rate black_m_rate
## 1      < 1 year       419.69902      504.118514    926.99389   1257.70551
## 2     1-4 years        17.75343       25.030559     42.28117     48.99319
## 3     5-9 years         8.79919        8.116146     22.51546     21.89518
## 4   10-14 years        13.22780       15.548060     13.85479     17.84254
## 5   15-19 years        24.17745       56.369899     34.45845    100.09711
## 6   20-24 years        37.30331      112.643124     58.38110    171.95662
## 7   25-29 years        41.27550      121.766638     76.08353    199.58512
## 8   30-34 years        57.53084      122.418722    124.23609    232.72562
## 9   35-39 years        76.28447      157.920912    167.54604    337.93387
## 10  40-44 years       105.59271      191.937581    242.88545    375.27373
## 11  45-49 years       152.65049      313.028325    342.59359    532.67175
## 12  50-54 years       305.60158      508.489387    512.90653    818.55122
## 13  55-59 years       434.53305      818.381625    788.56052   1364.96588
## 14 60-64 years        654.80826     1189.696461   1221.16829   2083.53605
## 15  65-69 years      1033.04631     1681.661243   1707.59320   2934.04927
## 16  70-74 years      1641.27512     2558.578522   2497.47424   3788.91879
## 17  75-79 years      2563.30869     3722.833035   3639.99625   5661.35327
## 18  80-84 years      4322.87126     5996.969616   5992.61710   7877.69489
## 19    85+ years     16344.82759    17203.461513  12723.28716  13930.65998
##    white_m_rate total_rate_h total_rate_b total_rate_w hispanic_mf_ratio
## 1     486.22880   461.908767   1092.34970    475.66484         1.2011429
## 2      31.82088    21.391995     45.63718     27.49291         1.4098999
## 3      11.05085     8.457668     22.20532     11.46946         0.9223742
## 4      20.51639    14.387930     15.84866     17.28556         1.1754078
## 5      79.38541    40.273674     67.27778     56.64883         2.3315072
## 6     120.50991    74.973217    115.16886     85.24900         3.0196549
## 7     149.11237    81.521071    137.83433    103.17451         2.9500945
## 8     179.32837    89.974779    178.48086    133.71190         2.1278801
## 9     217.08103   117.102693    252.73995    170.45034         2.0701580
## 10    265.01913   148.765145    309.07959    219.25910         1.8177162
## 11    398.97865   232.839408    437.63267    330.85012         2.0506212
## 12    620.64163   407.045483    665.72888    506.13734         1.6638965
## 13    990.79399   626.457338   1076.76320    813.78877         1.8833588
## 14   1454.47310   922.252360   1652.35217   1151.57128         1.8168623
## 15   1994.85920  1357.353775   2320.82123   1609.03106         1.6278663
## 16   2877.78511  2099.926823   3143.19651   2412.36881         1.5588968
## 17   4585.13765  3143.070864   4650.67476   3896.46581         1.4523545
## 18   7586.77902  5159.920437   6935.15599   6570.60153         1.3872654
## 19  15502.19661 16774.144550  13326.97357  14718.28516         1.0525325
##    black_mf_ratio white_mf_ratio
## 1       1.3567571      1.0454265
## 2       1.1587473      1.3736655
## 3       0.9724512      0.9295747
## 4       1.2878249      1.4597496
## 5       2.9048636      2.3409072
## 6       2.9454159      2.4107724
## 7       2.6232368      2.6051901
## 8       1.8732529      2.0356151
## 9       2.0169612      1.7532035
## 10      1.5450647      1.5274960
## 11      1.5548211      1.5186367
## 12      1.5959072      1.5847530
## 13      1.7309589      1.5559352
## 14      1.7061826      1.7138276
## 15      1.7182367      1.6308489
## 16      1.5171002      1.4780972
## 17      1.5553184      1.4293741
## 18      1.3145667      1.3658984
## 19      1.0948947      1.1125148
library(ggplot2)
library(tidyr)
library(dplyr)
datafinal2$agegroup <- factor(datafinal2$agegroup, levels = c(
  "< 1 year", "1-4 years", "5-9 years", "10-14 years", "15-19 years",
  "20-24 years", "25-29 years", "30-34 years", "35-39 years", "40-44 years",
  "45-49 years", "50-54 years", "55-59 years", "60-64 years", "65-69 years",
  "70-74 years", "75-79 years", "80-84 years", "85+ years"
))


# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_hisp <- datafinal2 %>%
  mutate(
    total_rate1 = (hispanic_f + hispanic_m) / (hispanic_female + hispanic_male) * 1000,
    hispanic_f_rate = (hispanic_f / hispanic_female) * 1000,
    hispanic_m_rate = (hispanic_m / hispanic_male) * 1000
  )


# Remove any rows where agegroup is NA
datafinal_hisp <- datafinal_hisp %>%
  filter(!is.na(agegroup))  # Filter out rows with NA in agegroup

# Reshape data to long format for plotting male and female ASDR as bars
data_long <- datafinal_hisp %>%
  select(agegroup, hispanic_f_rate, hispanic_m_rate) %>%
  pivot_longer(cols = c(hispanic_f_rate, hispanic_m_rate),
               names_to = "Gender",
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "hispanic_f_rate", "Hispanic Female ASDR", "Hispanic Male ASDR"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_long, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.6) +
  
  # Line for total ASDR
  geom_line(data = datafinal_hisp, aes(x = agegroup, y = total_rate1, group = 1, color = "Total ASDR by Age Group"), size = 1) +
  
  # Logarithmic Y-axis with custom breaks
  scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
  
  # Custom colors for the bars and line
  scale_fill_manual(values = c("Hispanic Female ASDR" = "orange", "Hispanic Male ASDR" = "gray")) +
  scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
  
  # Custom labels
  labs(
    title = "Hispanic Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",y = NULL,
    fill = "",
    color = ""
  ) +
  
  # Clean theme with grid lines and minimalistic style
  theme_minimal() +
  
  # Adjustments to match the style
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",  # Place the legend at the bottom
    legend.title = element_blank()  # Remove legend title
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

blacks

library(ggplot2)
library(tidyr)
library(dplyr)

# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_black <- datafinal2 %>%
  mutate(
    total_rate2 = (black_f + black_m) / (black_female + black_male) * 1000,
    black_f_rate = (black_f / black_female) * 1000,
    black_m_rate = (black_m / black_male) * 1000
  )

datafinal_black <- datafinal_black%>%
  filter(!is.na(agegroup))  

# Reshape data to long format for plotting male and female ASDR as bars
data_long1 <- datafinal_black %>%
  select(agegroup, black_f_rate, black_m_rate) %>%
  pivot_longer(cols = c(black_f_rate, black_m_rate),
               names_to = "Gender",
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "black_f_rate", "Black Female ASDR", "Black Male ASDR"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_long1, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.6) +
  
  # Line for total ASDR
  geom_line(data = datafinal_black, aes(x = agegroup, y = total_rate2, group = 1, color = "Total ASDR by Age Group"), size = 1) +
  
  # Logarithmic Y-axis with custom breaks
  scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
  
  # Custom colors for the bars and line
  scale_fill_manual(values = c("Black Female ASDR" = "orange", "Black Male ASDR" = "gray")) +
  scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
  
  # Custom labels
  labs(
    title = "Black Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",y = NULL,
    fill = "",
    color = ""
  ) +
  
  # Clean theme with grid lines and minimalistic style
  theme_minimal() +
  
  # Adjustments to match the style
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",  # Place the legend at the bottom
    legend.title = element_blank()  # Remove legend title
  )

whites

library(ggplot2)
library(tidyr)
library(dplyr)

# Calculate Total ASDR for each age group as the average of male and female ASDR
datafinal_white <- datafinal2 %>%
  mutate(
    total_rate3 = (white_f + white_m) / (white_female + white_male) * 1000,
    white_f_rate = (white_f / white_female) * 1000,
    white_m_rate = (white_m / white_male) * 1000
  )

datafinal_white <- datafinal_white%>%
  filter(!is.na(agegroup))  

# Reshape data to long format for plotting male and female ASDR as bars
data_long2 <- datafinal_white %>%
  select(agegroup, white_f_rate, white_m_rate) %>%
  pivot_longer(cols = c(white_f_rate, white_m_rate),
               names_to = "Gender",
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "white_f_rate", "White Female ASDR", "White Male ASDR"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_long2, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.6) +
  
  # Line for total ASDR
  geom_line(data = datafinal_white, aes(x = agegroup, y = total_rate3, group = 1, color = "Total ASDR by Age Group"), size = 1) +
  
  # Logarithmic Y-axis with custom breaks
  scale_y_log10(breaks = c(0.1, 1, 10, 100, 1000), labels = c(0.1, 1, 10, 100, 1000)) +
  
  # Custom colors for the bars and line
  scale_fill_manual(values = c("White Female ASDR" = "orange", "White Male ASDR" = "gray")) +
  scale_color_manual(values = c("Total ASDR by Age Group" = "blue")) +
  
  # Custom labels
  labs(
    title = "White Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",y = NULL,
    fill = "",
    color = ""
  ) +
  
  # Clean theme with grid lines and minimalistic style
  theme_minimal() +
  
  # Adjustments to match the style
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",  # Place the legend at the bottom
    legend.title = element_blank()  # Remove legend title
  )

Male to female age specific death rate ratios

library(ggplot2)
library(tidyr)
library(dplyr)

# Ensure data is clean and free of NA values in 'agegroup'
datafinal2 <- datafinal2 %>%
  filter(!is.na(agegroup))  

# Reshape the data to long format for easier plotting
data_long_ratio <- datafinal2 %>%
  select(agegroup, hispanic_mf_ratio, black_mf_ratio, white_mf_ratio) %>%
  pivot_longer(cols = c(hispanic_mf_ratio, black_mf_ratio, white_mf_ratio),
               names_to = "Ethnic_Group",
               values_to = "Ratio") %>%
  # Rename ethnic groups in the legend
  mutate(Ethnic_Group = recode(Ethnic_Group,
                               "hispanic_mf_ratio" = "Hispanic",
                               "black_mf_ratio" = "Black",
                               "white_mf_ratio" = "White"))

# Create the plot
ggplot(data_long_ratio, aes(x = agegroup, y = Ratio, color = Ethnic_Group, group = Ethnic_Group)) +
  geom_line(size = 1) +  # Line plot
  geom_point(size = 2) +  # Add points to highlight the exact ratio at each age group
  scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) +  # Adjust Y axis format
  labs(
    title = "Male-to-Female Age-Specific Death Rate Ratios by Race/Ethnicity",
    x = "Age Group",
    y = "Male-to-Female Death Rate Ratio",
    color = "Race/Ethnic Group"  # Updated legend title
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels for better readability

library(ggplot2)
library(tidyr)
library(dplyr)

# Reshape the data to long format for easier plotting
data_long_ratio1 <- datafinal2 %>%
  select(agegroup, hispanic_mf_ratio, black_mf_ratio, white_mf_ratio) %>%
  pivot_longer(cols = c(hispanic_mf_ratio, black_mf_ratio, white_mf_ratio),
               names_to = "Ethnic_Group",
               values_to = "Ratio") %>%
  # Update the names for the ethnic groups
  mutate(Ethnic_Group = recode(Ethnic_Group,
                               "hispanic_mf_ratio" = "Hispanic",
                               "black_mf_ratio" = "Black",
                               "white_mf_ratio" = "White"))

# Create the plot with less space between x-axis labels and the x-axis
ggplot(data_long_ratio1, aes(x = agegroup, y = Ratio, fill = Ethnic_Group)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.6) +  # Adjust dodge width and bar width
  labs(
    title = "Male-to-Female Age-Specific Death Rate Ratios by Race/Ethnicity",
    x = NULL,
    y = "Male-to-Female Death Rate Ratio"
  ) +
  scale_fill_manual(values = c("Hispanic" = "green", "Black" = "red", "White" = "blue")) +  # Custom colors
  scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) +  # Y-axis label format
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.2),  # Rotate x-axis labels and lessen the gap with vjust
    panel.grid.major.x = element_blank(),  # Remove grid lines between age groups
    legend.title = element_blank()  # Remove legend title
  )

### ASDR

datafinal2 <- datafinal2 %>%
  mutate(total_rate = (hispanic_f_rate + hispanic_m_rate) / 2)

# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)

# Reshape the data to long format using pivot_longer()
data_longa <- datafinal2 %>%
  pivot_longer(cols = c(hispanic_f_rate, hispanic_m_rate), 
               names_to = "Gender", 
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "hispanic_f_rate", "Female", "Male"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_longa, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.7) +
  
  # Line for total ASDR
  geom_line(data = datafinal2, aes(x = agegroup, y = total_rate, group = 1, color = "Total"), size = 1) +
  
  # Logarithmic Y-axis
  scale_y_log10() +
  
  # Labels for the plot
  labs(
    title = "Hispanic Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",
    y = "ASDR (per 100,000 population)"
  ) +
  
  # Custom colors for bars
  scale_fill_manual(values = c("Female" = "orange", "Male" = "purple")) +
  
  # Ensure the total ASDR line is blue
  scale_color_manual(values = c("Total" = "red")) +
  
  # Clean theme
  theme_minimal() +
  
  # Rotate x-axis labels for better readability
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  
  # Remove legend title and adjust the position
  theme(
    legend.position = "bottom",
    legend.title = element_blank()
  )

datafinal2 <- datafinal2 %>%
  mutate(total_rate1 = (black_f_rate + black_m_rate) / 2)

# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)

# Reshape the data to long format using pivot_longer()
data_longb <- datafinal2 %>%
  pivot_longer(cols = c(black_f_rate, black_m_rate), 
               names_to = "Gender", 
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "black_f_rate", "Female", "Male"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_longb, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.7) +
  
  # Line for total ASDR
  geom_line(data = datafinal2, aes(x = agegroup, y = total_rate1, group = 1, color = "Total"), size = 1) +
  
  # Logarithmic Y-axis
  scale_y_log10() +
  
  # Labels for the plot
  labs(
    title = "Black Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",
    y = "ASDR (per 100,000 population)"
  ) +
  
  # Clean theme
  theme_minimal() +
  
  # Rotate x-axis labels for better readability
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  
  # Ensure the total ASDR line is blue
  scale_color_manual(values = c("Total" = "blue")) +
  
  # Remove legend title and adjust the position
  theme(
    legend.position = "bottom",
    legend.title = element_blank()
  )

datafinal2 <- datafinal2 %>%
  mutate(total_rate2 = (white_f_rate + white_m_rate) / 2)

# Load necessary libraries
library(ggplot2)
library(tidyr)
library(dplyr)

# Reshape the data to long format using pivot_longer()
data_longc <- datafinal2 %>%
  pivot_longer(cols = c(white_f_rate, white_m_rate), 
               names_to = "Gender", 
               values_to = "ASDR") %>%
  mutate(Gender = ifelse(Gender == "white_f_rate", "Female", "Male"))

# Create the plot
ggplot() +
  # Bars for male and female ASDR
  geom_bar(data = data_longc, aes(x = agegroup, y = ASDR, fill = Gender), 
           stat = "identity", position = "dodge", width = 0.7) +
  
  # Line for total ASDR
  geom_line(data = datafinal2, aes(x = agegroup, y = total_rate2, group = 1, color = "Total"), size = 1) +
  
  # Logarithmic Y-axis
  scale_y_log10() +
  
  # Labels for the plot
  labs(
    title = "White Age-Specific Death Rates by Gender, Texas 2018",
    x = "Age Group",
    y = "ASDR (per 100,000 population)"
  ) +
  
  # Custom colors for bars based on the provided colors
  scale_fill_manual(values = c("Female" = "#4e773f",  # Green
                               "Male" = "#002f56")) +  # Dark Blue
  
  # Ensure the total ASDR line is blue
  scale_color_manual(values = c("Total" = "red")) +
  
  # Clean theme
  theme_minimal() +
  
  # Rotate x-axis labels for better readability
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  
  # Remove legend title and adjust the position
  theme(
    legend.position = "bottom",
    legend.title = element_blank()
  )