library(ggplot2)
library(tidyr)
library(gridExtra)
## Loading required package: grid
setwd('C:/Shmuel/Nanodegree/Explorer_Data_R')
per_chil <- read.csv('indicator_total0-4percen.csv')
life_exp <- read.csv('life_expectancy_at_birth.csv')
gdp <- read.csv('GDPC.csv')
# Rearrange the data,
Per_chil <- gather(per_chil,'year','n',2:22)
Life_exp <- gather(life_exp,'year','n',2:206)
GDP <- gather(gdp,'year','n',2:53)
# Remove the x from the year
Per_chil$year <- as.numeric(gsub('X','',Per_chil$year))
Life_exp$year <- as.numeric(gsub('X','',Life_exp$year))
GDP$year <- as.numeric(gsub('X','',GDP$year))
Life_exp$Total <- Life_exp[[1]]
GDP$Total <- GDP[[1]]
# Find the years that included in each data frame
A1 <- levels(factor(Per_chil $year))
A2 <- levels(factor(Life_exp$year))
A3 <- levels(factor(GDP$year))
# Find the countries that included in each data frame
B1 <- levels(factor(Per_chil $Total))
B2 <- levels(factor(Life_exp$Total))
B3 <- levels(factor(GDP$Total))
# Find the years and the countries that are common to all data frame
AT <- Reduce(intersect, list(A1,A3,A2))
BT <- Reduce(intersect, list(B1,B3,B2))
# Subset each data frames according to the common years and countries
Per_chil_A <- subset(Per_chil,Total %in% BT & year%in% AT)
Life_exp_A <- subset(Life_exp,Total %in% BT & year%in% AT,select = c(2:4))
GDP_A <- subset(GDP, Total %in% BT & year%in% AT,select = c(2:4))
# Merge the 3 data frames and give names to the variables
total <- merge(Per_chil_A,Life_exp_A ,by=c('year','Total'))
total <- merge(total,GDP_A ,by=c('year','Total'))
total$chil <- total$n.x
total$Life <- total$n.y
total$GDP <- total$n
# Remove NA values
total <- subset(total,!is.na(n))
ggplot(aes(y = chil, x = factor(year)),
data = total) +
xlab('Years')+
ylab('Percent childrens [%]')+
ggtitle("Percent of childrens age 0-4, years 1960-2010")+
geom_boxplot()+
stat_summary(fun.y = mean, geom = 'point', shape = 4)
The percentage number of children’s average across all countries (Median (-) and Mean (x)) decrease with years.
ggplot(aes(y=GDP, x=factor(year)),
data = total) +
xlab('Years')+
ylab('log GDP [$]')+
ggtitle("Total log GDP $, years 1960-2010")+
geom_boxplot()+
coord_trans( y = "log")+
stat_summary(fun.y = mean, geom = 'point', shape = 4)
ggplot(aes(y=Life,x=factor(year)),
data = total) +
xlab('Years')+
ylab('life expectancy[Years]')+
ggtitle("Life expectancy, years 1960-2010")+
geom_boxplot()+
stat_summary(fun.y = mean, geom = 'point', shape = 4)
ggplot(aes(y = GDP, x = Life),
data = total) +
geom_point(size=2)+
coord_trans( y = "log")
cor.test(total$GDP, total$Life, method = c("pearson"))
##
## Pearson's product-moment correlation
##
## data: total$GDP and total$Life
## t = 25.5536, df = 1362, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5322725 0.6040920
## sample estimates:
## cor
## 0.5692673