rm(list=ls())

library(tmap)
library(sf)
library(ggplot2)
library(ggmap)
library(ggthemes)
library(dplyr)

url <- "https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip"

Cleared the environment for better organization and then downloaded the libraries necessary to create the maps.


download.file(url, "./bul.zip")
trying URL 'https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip'
Content type 'application/zip' length 1602712 bytes (1.5 MB)
==================================================
downloaded 1.5 MB

Downloaded the files.

unzip("./bul.zip", exdir = "bul")

Unzipped the files.

bul <- st_read("./bul/gadm40_BGR_2.shp")
Reading layer `gadm40_BGR_2' from data source 
  `/Users/sadieriggs/Downloads/DIDA 325/send 2/bul/gadm40_BGR_2.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 22.34375 ymin: 41.23481 xmax: 28.60903 ymax: 44.21268
Geodetic CRS:  WGS 84

Read the files.

map <- tm_shape(bul) + tm_fill("NAME_1") + tm_borders()

Generating a base map to get an idea of what it should look like.


print(map)

Printed the base map.

codes <- read.csv("simplecodes.csv")
mortalities <- read.csv("Bulgaria_Mortality_Rates_Municipalities_Mortality.csv")

newcodes_data <- merge(bul, codes, by = "ID_2", all.x = TRUE)

I made the .csv files containing the data for Bulgaria into data frames. I then merged together the two Bulgaria data sets which contain codes for different regions.

mortalities <-  mortalities %>%  mutate(Region.en=if_else(Region.en=="Sofia (capital)", "Grad Sofiya", Region.en))

mortalities <- mortalities %>% mutate(Region.en = if_else(Region.en=="Kurdzhali","Kardzhali",Region.en) )

mortalities <- mortalities %>%  mutate(key=tolower(Region.en))

Fixed the alternative spelling issue that comes from disparities in the names of locations.


merged_data <- merge(newcodes_data, mortalities, by= "key", all.y = TRUE)

#merged_data <- merge(bul, mortalities, by.x = "NAME_1", by.y = "Region.en", all.x = TRUE)

bul_list <- unique(bul$NAME_2)
mort_list <- unique(mortalities$Region.en)
merged_list <- unique(merged_data$region)

Merged the data containing the codes for regions and the mortality data, then looked at all the unique values present in the dataframes.

merged_data2 <- merged_data

max(merged_data2$X2019, na.rm=TRUE)
[1] 15635
min(merged_data2$X2019, na.rm=TRUE)
[1] 7
high_outliers <- merged_data2 %>% filter(X2019 > 4000)

Created a new data frame that will later be altered, and looking at the minimum and maximum values to pinpoint outliers and remove them.



ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(merged_data2, mapping = aes( fill = X2019))+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Mortality Rates in Bulgaria")+
  theme(plot.title = element_text(hjust = 0.5))

Plotted a reference map that does not separate the mortality rates by gender.


missing <- merged_data2 %>% filter(NAME_1.x == "Kardzhali")

Creates a new data frame containing only the data for the region called Kardzhali. I was unfortunately missing data for this area, noted by the grey areas on the map.

For the following graphs: I filtered the data so that the data for female and male are put into separate data frames. I plotted the graphs for 2019, 2020, and 2021 by gender for every year, changing the color of the graphs to create more distinction. I also transformed the data into a logarithmic scale to show differences through the scale, since the mortality differences between regions were not very apparent.

library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))

library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))

This was the start of an idea I had to create a bar graph that took the mean of the mortality rates in 2019, 2020, and 2021, across both genders in order to show a visual comparison between them This would have been carried out had there been extra time to work on the project

filtered_data_male <- filtered_data_male %>%
  mutate(mean_male_2019 = mean(X2019, na.rm = TRUE),
         mean_male_2020 = mean(X2020, na.rm = TRUE),
         mean_male_2021 = mean(X2021, na.rm = TRUE))
filtered_data_female <- filtered_data_female %>%
  mutate(mean_female_2019 = mean(X2019, na.rm = TRUE),
         mean_female_2020 = mean(X2020, na.rm = TRUE),
         mean_female_2021 = mean(X2021, na.rm = TRUE))
---
title: "Mortality Rates in Bulgaria from 2019-2021 by Gender"
output: html_notebook
---

```{r}
rm(list=ls())

library(tmap)
library(sf)
library(ggplot2)
library(ggmap)
library(ggthemes)
library(dplyr)

url <- "https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip"
```
Cleared the environment for better organization and then downloaded the libraries necessary to create the maps.


```{r}

download.file(url, "./bul.zip")


```
Downloaded the files.

```{r}
unzip("./bul.zip", exdir = "bul")


```
Unzipped the files.


```{r}
bul <- st_read("./bul/gadm40_BGR_2.shp")

```
Read the files.

```{r}
map <- tm_shape(bul) + tm_fill("NAME_1") + tm_borders()

```
Generating a base map to get an idea of what it should look like.


```{r}

print(map)
```
Printed the base map.


```{r}
codes <- read.csv("simplecodes.csv")
mortalities <- read.csv("Bulgaria_Mortality_Rates_Municipalities_Mortality.csv")

newcodes_data <- merge(bul, codes, by = "ID_2", all.x = TRUE)

```
I made the .csv files containing the data for Bulgaria into data frames. I then
merged together the two Bulgaria data sets which contain codes for different regions.


```{r}
mortalities <-  mortalities %>%  mutate(Region.en=if_else(Region.en=="Sofia (capital)", "Grad Sofiya", Region.en))

mortalities <- mortalities %>% mutate(Region.en = if_else(Region.en=="Kurdzhali","Kardzhali",Region.en) )

mortalities <- mortalities %>%  mutate(key=tolower(Region.en))
```
Fixed the alternative spelling issue that comes from disparities in the names of locations.



```{r}

merged_data <- merge(newcodes_data, mortalities, by= "key", all.y = TRUE)

#merged_data <- merge(bul, mortalities, by.x = "NAME_1", by.y = "Region.en", all.x = TRUE)

bul_list <- unique(bul$NAME_2)
mort_list <- unique(mortalities$Region.en)
merged_list <- unique(merged_data$region)

```
Merged the data containing the codes for regions and the mortality data, then looked at all the unique values present in the dataframes.



```{r}
merged_data2 <- merged_data

max(merged_data2$X2019, na.rm=TRUE)
min(merged_data2$X2019, na.rm=TRUE)
high_outliers <- merged_data2 %>% filter(X2019 > 4000)
```
Created a new data frame that will later be altered, and looking at the minimum and maximum values to pinpoint outliers and remove them.


```{r}
ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(merged_data2, mapping = aes( fill = X2019))+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Mortality Rates in Bulgaria")+
  theme(plot.title = element_text(hjust = 0.5))
```
Plotted a reference map that does not separate the mortality rates by gender.

```{r}
missing <- merged_data2 %>% filter(NAME_1.x == "Kardzhali")
```
Creates a new data frame containing only the data for the region called Kardzhali. I was unfortunately missing data for this area, noted by the grey areas on the map.




For the following graphs:
I filtered the data so that the data for female and male are put into separate data frames. I plotted the graphs for 2019, 2020, and 2021 by gender for every year, changing the color of the graphs to create more distinction. I also transformed the data into a logarithmic scale to show differences through the scale, since the mortality differences between regions were not very apparent.
```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))
```
```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))
```


This was the start of an idea I had to create a bar graph that took the mean of the mortality rates in 2019, 2020, and 2021, across both genders in order to show a visual comparison between them
This would have been carried out had there been extra time to work on the project
```{r}
filtered_data_male <- filtered_data_male %>%
  mutate(mean_male_2019 = mean(X2019, na.rm = TRUE),
         mean_male_2020 = mean(X2020, na.rm = TRUE),
         mean_male_2021 = mean(X2021, na.rm = TRUE))
filtered_data_female <- filtered_data_female %>%
  mutate(mean_female_2019 = mean(X2019, na.rm = TRUE),
         mean_female_2020 = mean(X2020, na.rm = TRUE),
         mean_female_2021 = mean(X2021, na.rm = TRUE))
```





