Loading the packages needed for the assigment

library(tidyverse)
library(broom)
library(mosaic)
library(sjmisc)
library(readr)
library(dplyr)
library(ggplot2)

Extracting the data from SocialExplorer into CSV file. The data set is about European Statistics from 2014

library(readr)
eurostat <- read_csv ("/Users/lasha/Desktop/Queens College/712 Advanced Analytics/R11454080_SL040.csv",  col_names=TRUE)
head(eurostat)
tail(eurostat)

Changing the class of variables to numeric

lapply(eurostat, class)
#changing the class of variable from "character" to "numeric"
id = 37:8 
eurostat [id] = data.matrix(eurostat[id])
sapply(eurostat, class)

Appling the dplyr functions (dropping, renaming, creating, summarizing variables)

#droping variables
eurostat <- eurostat %>% select(-Country, -Region, -World, -`Ind. Country`, -`Name of Area`)
eurostat

#renaming the variables
rename (eurostat, country=`Qualifying Name`, total_population=`Total Population`, total_emigrants=`Total Emigrants`, total_immigrants=`Total Immigrants`)

#generating/creating a new variable (column) called net_migration
mutate(eurostat, net_migration = as.numeric(`Total Emigrants`) - as.numeric(`Total Immigrants`))
#creating smaller data set from "eurostat"
filter(eurostat, `Total Emigrants`>=200000)
hiemigr <- filter(eurostat, `Total Emigrants`>=200000)
head(hiemigr)
#show the dimention, size of the data
dim(eurostat)
## [1]  8 37
dim(hiemigr)
## [1]  5 37
#selecting all columns except "FIBS" in hiemigr
head(select(hiemigr, -FIPS))

#summaring 
class(eurostat$`Total Emigrants`)
hiemigr %>% summarise(avg_emigr = mean(`Total Emigrants`))

Creating a histogram about Migration in Europe using ggplot2

total_emigrants<- c(324221, 400430, 319086, 136328, 36621, 29308, 268299)
total_immigrants<- as.numeric((eurostat$`Total Immigrants`[-1]))
total_population<- as.numeric((eurostat$`Total Population`[-1]))
country<- c('Germany', 'Spain', 'United Kingdom', 'Italy', 'Lithuania', 'Norway', 'Poland')
migr= data.frame(country, total_emigrants, total_immigrants)
migr

#using library(ggplot2) 
ggplot(data = migr) + geom_col(aes(x=country, y= total_population), fill = "blue") + labs(x="Country",y="Number of People", title="2014 Total Population of EU Countries")

library(reshape2)
dfm <- melt(migr, id.vars = 'country')
ggplot(dfm,aes(x = country, y = value)) + geom_bar(aes(fill = variable),stat = "identity",position = "dodge") + ylim(0,500000) + labs(x="Country",y="Number of People", title="2014 Migration in EU Countries")