Install packages and load libraries, set working directory
setwd("/Users/tessaschneider/Desktop/Final Data Analysis")
df <- read.csv("unhcr_popstats_refugee-status.csv", skip = 2, stringsAsFactors = F, na.string=c("", "*"))
df2 <- read.csv("unhcr_popstats_export_persons_of_concern_all_data.csv", skip = 3, stringsAsFactors = F, na.string=c("", "*"))
Convert Total.Population to numeric before merging the datasets (after merging, data before 2000 drops out)
df2$Total.Population[df2$Total.Population=="*"] <- "2.5"
df2$Total.Population <- as.numeric(df2$Total.Population)
merged_data <- merge(df2, df, by = c("Year", "Country...territory.of.asylum.residence", "Origin"))
table(merged_data$Year)
##
## 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
## 4301 4683 4988 5473 5668 5778 5806 6064 6120 6207 7080 7209
## 2012 2013 2014 2015 2016
## 7537 8332 9191 10071 9495
destination_country_total <- merged_data %>%
group_by(Country...territory.of.asylum.residence, Year) %>%
summarise(Total = sum(Total.Population))
top_destcountries <- destination_country_total %>%
group_by(Country...territory.of.asylum.residence) %>%
summarise(Total = sum(Total, na.rm = TRUE)) %>%
top_n(20)
top_destcountries2 <- as.character(top_destcountries$Country...territory.of.asylum.residence)
plot1 <- destination_country_total %>%
filter(Country...territory.of.asylum.residence %in% top_destcountries2) %>%
ggplot(mapping = aes(x = Year, y = Total)) +
geom_line() + coord_cartesian(ylim = c(0, 3e6)) +
facet_wrap( ~ Country...territory.of.asylum.residence, ncol=4)
ggplotly(plot1)
origin_country_total <- merged_data %>%
group_by(Origin, Year) %>%
summarise(Total = sum(Total.Population))
top_origcountries <- origin_country_total %>%
group_by(Origin) %>%
summarise(Total = sum(Total, na.rm = TRUE)) %>%
top_n(20)
top_origcountries2 <- as.character(top_origcountries$Origin)
plot2 <- origin_country_total %>%
filter(Origin %in% top_origcountries2) %>%
ggplot(mapping = aes(x = Year, y = Total)) +
geom_line() + coord_cartesian(ylim = c(0, 1e7)) +
facet_wrap( ~ Origin, ncol=4)
ggplotly(plot2)
table(merged_data$Year)
##
## 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
## 4301 4683 4988 5473 5668 5778 5806 6064 6120 6207 7080 7209
## 2012 2013 2014 2015 2016
## 7537 8332 9191 10071 9495
By “People of Concern”“, subset for only PoC category counts by year change value from character to integer
Year_Pop <- aggregate(merged_data$`Total.Population`, by=list(Year = merged_data$Year), FUN=sum, na.rm = TRUE)
Year_Pop$rate <- NA
Year_Pop$rate[which(Year_Pop$Year>2000)] = 100*(diff(Year_Pop$x)/Year_Pop[-nrow(Year_Pop),]$x)
plot3 <- ggplot(Year_Pop, aes(x= Year, y= rate)) + geom_line() +
labs(title="Percent Change in People of Concern",
subtitle="(2000 - 2016)",
x="Year",
y="Percent Change")
ggplotly(plot3)
PoC_count <- merged_data[c(1,4:10)]
PoC_count <- melt(PoC_count, id=c("Year"))
str(PoC_count)
## 'data.frame': 798021 obs. of 3 variables:
## $ Year : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ variable: Factor w/ 7 levels "Refugees..incl..refugee.like.situations.",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : int NA NA 9 507 2 5 NA 1 5 20 ...
PoC_count$value <- as.integer(PoC_count$value)
Starting from 2013 the number of refugees has increased dramatically and with it pending cases for asylum seekers have also increased
plot4 <- ggplot(PoC_count,aes(Year,value, na.rm = TRUE)) +
geom_bar(aes(fill=variable),stat="identity") +
labs(title="UNHCR Population Statistics Database",
subtitle="(2000 - 2016)",
x="Year",
y="Number of People (Millions)")
ggplotly(plot4)