rm(list=ls())
library(tmap)
library(sf)
library(ggplot2)
library(ggmap)
library(ggthemes)
library(dplyr)
url <- "https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip"
Cleared the environment for better organization and then downloaded
the libraries necessary to create the maps.
download.file(url, "./bul.zip")
trying URL 'https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip'
Content type 'application/zip' length 1602712 bytes (1.5 MB)
==================================================
downloaded 1.5 MB
Downloaded the files.
unzip("./bul.zip", exdir = "bul")
Unzipped the files.
bul <- st_read("./bul/gadm40_BGR_2.shp")
Reading layer `gadm40_BGR_2' from data source
`/Users/sadieriggs/Downloads/DIDA 325/send 2/bul/gadm40_BGR_2.shp'
using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 22.34375 ymin: 41.23481 xmax: 28.60903 ymax: 44.21268
Geodetic CRS: WGS 84
Read the files.
map <- tm_shape(bul) + tm_fill("NAME_1") + tm_borders()
Generating a base map to get an idea of what it should look like.
print(map)

Printed the base map.
codes <- read.csv("simplecodes.csv")
mortalities <- read.csv("Bulgaria_Mortality_Rates_Municipalities_Mortality.csv")
newcodes_data <- merge(bul, codes, by = "ID_2", all.x = TRUE)
I made the .csv files containing the data for Bulgaria into data
frames. I then merged together the two Bulgaria data sets which contain
codes for different regions.
mortalities <- mortalities %>% mutate(Region.en=if_else(Region.en=="Sofia (capital)", "Grad Sofiya", Region.en))
mortalities <- mortalities %>% mutate(Region.en = if_else(Region.en=="Kurdzhali","Kardzhali",Region.en) )
mortalities <- mortalities %>% mutate(key=tolower(Region.en))
Fixed the alternative spelling issue that comes from disparities in
the names of locations.
merged_data <- merge(newcodes_data, mortalities, by= "key", all.y = TRUE)
#merged_data <- merge(bul, mortalities, by.x = "NAME_1", by.y = "Region.en", all.x = TRUE)
bul_list <- unique(bul$NAME_2)
mort_list <- unique(mortalities$Region.en)
merged_list <- unique(merged_data$region)
Merged the data containing the codes for regions and the mortality
data, then looked at all the unique values present in the
dataframes.
merged_data2 <- merged_data
max(merged_data2$X2019, na.rm=TRUE)
[1] 15635
min(merged_data2$X2019, na.rm=TRUE)
[1] 7
high_outliers <- merged_data2 %>% filter(X2019 > 4000)
Created a new data frame that will later be altered, and looking at
the minimum and maximum values to pinpoint outliers and remove them.
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(merged_data2, mapping = aes( fill = X2019))+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Mortality Rates in Bulgaria")+
theme(plot.title = element_text(hjust = 0.5))

Plotted a reference map that does not separate the mortality rates by
gender.
missing <- merged_data2 %>% filter(NAME_1.x == "Kardzhali")
Creates a new data frame containing only the data for the region
called Kardzhali. I was unfortunately missing data for this area, noted
by the grey areas on the map.
For the following graphs: I filtered the data so that the data for
female and male are put into separate data frames. I plotted the graphs
for 2019, 2020, and 2021 by gender for every year, changing the color of
the graphs to create more distinction. I also transformed the data into
a logarithmic scale to show differences through the scale, since the
mortality differences between regions were not very apparent.
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
filter(Sex == "Female")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_female, mapping = aes( fill = X2019))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2019)")+
theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
filter(Sex == "Male")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_male, mapping = aes( fill = X2019))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2019)")+
theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
filter(Sex == "Female")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_female, mapping = aes( fill = X2020))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2020)")+
theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
filter(Sex == "Male")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_male, mapping = aes( fill = X2020))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2020)")+
theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
filter(Sex == "Female")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_female, mapping = aes( fill = X2021))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2021)")+
theme(plot.title = element_text(hjust = 0.5))

library(viridis)
filtered_data_male <- merged_data2 %>%
filter(Sex == "Male")
ggplot()+
geom_sf(bul, mapping = aes(), fill = "gray80")+
geom_sf(filtered_data_male, mapping = aes( fill = X2021))+
scale_fill_gradientn(colours=rev(magma(6)),
name="Mortality Rates",
na.value = "grey100",
trans = "log")+
theme_minimal()+
labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2021)")+
theme(plot.title = element_text(hjust = 0.5))

This was the start of an idea I had to create a bar graph that took
the mean of the mortality rates in 2019, 2020, and 2021, across both
genders in order to show a visual comparison between them This would
have been carried out had there been extra time to work on the
project
filtered_data_male <- filtered_data_male %>%
mutate(mean_male_2019 = mean(X2019, na.rm = TRUE),
mean_male_2020 = mean(X2020, na.rm = TRUE),
mean_male_2021 = mean(X2021, na.rm = TRUE))
filtered_data_female <- filtered_data_female %>%
mutate(mean_female_2019 = mean(X2019, na.rm = TRUE),
mean_female_2020 = mean(X2020, na.rm = TRUE),
mean_female_2021 = mean(X2021, na.rm = TRUE))
---
title: "Mortality Rates in Bulgaria from 2019-2021 by Gender"
output: html_notebook
---

```{r}
rm(list=ls())

library(tmap)
library(sf)
library(ggplot2)
library(ggmap)
library(ggthemes)
library(dplyr)

url <- "https://geodata.ucdavis.edu/gadm/gadm4.0/shp/gadm40_BGR_shp.zip"
```
Cleared the environment for better organization and then downloaded the libraries necessary to create the maps.


```{r}

download.file(url, "./bul.zip")


```
Downloaded the files.

```{r}
unzip("./bul.zip", exdir = "bul")


```
Unzipped the files.


```{r}
bul <- st_read("./bul/gadm40_BGR_2.shp")

```
Read the files.

```{r}
map <- tm_shape(bul) + tm_fill("NAME_1") + tm_borders()

```
Generating a base map to get an idea of what it should look like.


```{r}

print(map)
```
Printed the base map.


```{r}
codes <- read.csv("simplecodes.csv")
mortalities <- read.csv("Bulgaria_Mortality_Rates_Municipalities_Mortality.csv")

newcodes_data <- merge(bul, codes, by = "ID_2", all.x = TRUE)

```
I made the .csv files containing the data for Bulgaria into data frames. I then
merged together the two Bulgaria data sets which contain codes for different regions.


```{r}
mortalities <-  mortalities %>%  mutate(Region.en=if_else(Region.en=="Sofia (capital)", "Grad Sofiya", Region.en))

mortalities <- mortalities %>% mutate(Region.en = if_else(Region.en=="Kurdzhali","Kardzhali",Region.en) )

mortalities <- mortalities %>%  mutate(key=tolower(Region.en))
```
Fixed the alternative spelling issue that comes from disparities in the names of locations.



```{r}

merged_data <- merge(newcodes_data, mortalities, by= "key", all.y = TRUE)

#merged_data <- merge(bul, mortalities, by.x = "NAME_1", by.y = "Region.en", all.x = TRUE)

bul_list <- unique(bul$NAME_2)
mort_list <- unique(mortalities$Region.en)
merged_list <- unique(merged_data$region)

```
Merged the data containing the codes for regions and the mortality data, then looked at all the unique values present in the dataframes.



```{r}
merged_data2 <- merged_data

max(merged_data2$X2019, na.rm=TRUE)
min(merged_data2$X2019, na.rm=TRUE)
high_outliers <- merged_data2 %>% filter(X2019 > 4000)
```
Created a new data frame that will later be altered, and looking at the minimum and maximum values to pinpoint outliers and remove them.


```{r}
ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(merged_data2, mapping = aes( fill = X2019))+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Mortality Rates in Bulgaria")+
  theme(plot.title = element_text(hjust = 0.5))
```
Plotted a reference map that does not separate the mortality rates by gender.

```{r}
missing <- merged_data2 %>% filter(NAME_1.x == "Kardzhali")
```
Creates a new data frame containing only the data for the region called Kardzhali. I was unfortunately missing data for this area, noted by the grey areas on the map.




For the following graphs:
I filtered the data so that the data for female and male are put into separate data frames. I plotted the graphs for 2019, 2020, and 2021 by gender for every year, changing the color of the graphs to create more distinction. I also transformed the data into a logarithmic scale to show differences through the scale, since the mortality differences between regions were not very apparent.
```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2019))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2019)")+
  theme(plot.title = element_text(hjust = 0.5))
```
```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2020))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2020)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(ggplot2)
library(viridis)
filtered_data_female <- merged_data2 %>%
  filter(Sex == "Female")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_female, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Female Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
library(viridis)
filtered_data_male <- merged_data2 %>%
  filter(Sex == "Male")

ggplot()+
  geom_sf(bul, mapping = aes(), fill = "gray80")+
  geom_sf(filtered_data_male, mapping = aes( fill = X2021))+
  scale_fill_gradientn(colours=rev(magma(6)),
                         name="Mortality Rates",
                         na.value = "grey100", 
                         trans = "log")+
  theme_minimal()+
  labs(fill = "Mortality Rates", title = "Adult Male Mortality Rates in Bulgaria (2021)")+
  theme(plot.title = element_text(hjust = 0.5))
```


This was the start of an idea I had to create a bar graph that took the mean of the mortality rates in 2019, 2020, and 2021, across both genders in order to show a visual comparison between them
This would have been carried out had there been extra time to work on the project
```{r}
filtered_data_male <- filtered_data_male %>%
  mutate(mean_male_2019 = mean(X2019, na.rm = TRUE),
         mean_male_2020 = mean(X2020, na.rm = TRUE),
         mean_male_2021 = mean(X2021, na.rm = TRUE))
filtered_data_female <- filtered_data_female %>%
  mutate(mean_female_2019 = mean(X2019, na.rm = TRUE),
         mean_female_2020 = mean(X2020, na.rm = TRUE),
         mean_female_2021 = mean(X2021, na.rm = TRUE))
```





