## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
Load data into R
refugeedata <- read.csv("https://raw.githubusercontent.com/ntlrs/data606finalproject/master/UN%20Data.csv", header = TRUE, stringsAsFactors = FALSE)
Clean Data
refugee<-refugeedata[c(1:3)]
names(refugee)[1] <- "CoO"
names(refugee)[2] <- "Year"
names(refugee)[3] <- "Refugees"
refugee$CoO <- as.factor(refugee$CoO)
head(refugee)
## CoO Year Refugees
## 1 Iraq 2016 1
## 2 Islamic Rep. of Iran 2016 33
## 3 Pakistan 2016 59737
## 4 China 2016 11
## 5 Dem. Rep. of the Congo 2016 3
## 6 Egypt 2016 3
Which countries have the most had biggest increase in refugees since 1975?
The UN has data on Refugees migration from 1975-2016
This is an observational study looking at data from 1975 to 2016 for all countries that have been forced to leave their country or territory of origin.
The UN data on refugees can be found here: http://data.un.org/Data.aspx?d=UNHCR&f=indID%3AType-Ref#UNHCR
The response variable is a numerical value that represents the number of refugees.
The explanatory variable will be calculated from the years and the number of refugees.
summary(refugee)
## CoO Year Refugees
## Various : 2347 Length:96065 Min. : 1
## Somalia : 2243 Class :character 1st Qu.: 3
## Iraq : 2091 Mode :character Median : 14
## Dem. Rep. of the Congo: 2061 Mean : 4947
## Sudan : 1995 3rd Qu.: 129
## Ethiopia : 1937 Max. :3272290
## (Other) :83391 NA's :202
refugeeiraq <- subset(refugee, CoO == "Iraq")
plot(refugeeiraq$Year, refugeeiraq$Refugees, type = "l")
sum(refugeeiraq$Refugees)
## [1] 26456503
describe(refugee$Refugees)
## vars n mean sd median trimmed mad min max range
## X1 1 95863 4947.1 61660.01 14 108.18 19.27 1 3272290 3272289
## skew kurtosis se
## X1 30.24 1182.49 199.15