library(dplyr)
library(tidyr)
library(ggplot2)
# load data
# dowload file from github, save it locally in your home directory
download <- download.file('https://raw.githubusercontent.com/kelloggjohnd/Data606/master/Aggregates.csv', destfile = "Aggregates.csv", method = "wininet")
# manipulate the data into a data frame
agg_raw <- data.frame(read.csv(file = "Aggregates.csv", header = TRUE, sep = ","))
names(agg_raw)<- c("Name","Country.Code","Series.Name","Series.Code","YR2008","YR2009","YR2010","YR2011","YR2012","YR2013","YR2014","YR2015","YR2016","YR2017")
agg_raw <- agg_raw[,c('Name','Series.Code','YR2008','YR2009','YR2010','YR2011','YR2012','YR2013','YR2014','YR2015','YR2016','YR2017')]
agg_raw <- agg_raw %>%
mutate_all(na_if,"..")%>%
mutate(Code = ifelse (Series.Code == "BX.GSR.CMCP.ZS", "Export", "Import"))%>%
select(-Series.Code)%>%
mutate_if(is.factor, as.character)%>%
mutate(Name = as.factor(Name))%>%
mutate(Code = as.factor(Code))%>%
mutate_if(is.character,as.numeric)
Agg_data <- agg_raw [1:94,]
Agg_data<-mutate(Agg_data, xMEAN = rowMeans(select(Agg_data, starts_with("YR")), na.rm = TRUE))
Agg_data<- Agg_data %>% filter(Name != "Not classified")
Agg_export <- Agg_data %>% filter (Code == "Export")
Agg_import <- Agg_data %>% filter (Code == "Import")
head(Agg_data)
From 2008 to 2017 is a country’s Computer, communications and other technical services (import and export) affected by Geographical area and/or prosperity
There is observation classification: Aggrate pre-defined groups. In the dataset there are 47 groups based on Geographical area, political situation, and monetary status.
Data is generated from The World Bank databank World Development Indicators.
World Development Indicators (WDI) is the primary World Bank collection of development indicators, compiled from officially recognized international sources. It presents the most current and accurate global development data available, and includes national, regional and global estimates. [Note: Even though Global Development Finance (GDF) is no longer listed in the WDI database name, all external debt and financial flows data continue to be included in WDI. The GDF publication has been renamed International Debt Statistics (IDS), and has its own separate database, as well.
This is an observational study
Data can be found at: (https://databank.worldbank.org/source/world-development-indicators). I have applied a focus on Indicator Names: Computer, communications and other services (% of commercial service exports) and Computer, communications and other services (% of commercial service imports). Countries with Blank values for all 10 years will be removed from the dataset.
Import and export of Technical services have been affected over the last 10 years (2008 to 2017). Data from 2018 was incomplete; analysis had to move back one year.
The independent varibles include the Geographical position (Qualitative) and a country’s financial status (Quantitative).
summary(Agg_data)
## Name YR2008
## Arab World : 2 Min. :11.49
## Caribbean small states : 2 1st Qu.:34.66
## Central Europe and the Baltics : 2 Median :38.14
## Early-demographic dividend : 2 Mean :38.23
## East Asia & Pacific : 2 3rd Qu.:42.19
## East Asia & Pacific (excluding high income): 2 Max. :69.37
## (Other) :80 NA's :5
## YR2009 YR2010 YR2011 YR2012
## Min. :10.94 Min. :12.20 Min. :14.17 Min. :14.71
## 1st Qu.:35.67 1st Qu.:32.60 1st Qu.:34.48 1st Qu.:34.49
## Median :40.89 Median :37.81 Median :40.05 Median :38.97
## Mean :39.75 Mean :37.16 Mean :39.16 Mean :38.76
## 3rd Qu.:44.57 3rd Qu.:42.85 3rd Qu.:44.17 3rd Qu.:44.72
## Max. :68.58 Max. :65.16 Max. :62.00 Max. :66.13
## NA's :5 NA's :5 NA's :1 NA's :1
## YR2013 YR2014 YR2015 YR2016
## Min. :16.04 Min. :13.86 Min. :11.93 Min. :11.55
## 1st Qu.:33.81 1st Qu.:32.90 1st Qu.:31.59 1st Qu.:31.48
## Median :38.66 Median :38.55 Median :38.78 Median :38.29
## Mean :38.78 Mean :38.65 Mean :38.31 Mean :38.27
## 3rd Qu.:44.31 3rd Qu.:44.77 3rd Qu.:44.66 3rd Qu.:45.33
## Max. :67.09 Max. :65.72 Max. :67.01 Max. :66.86
## NA's :1
## YR2017 Code xMEAN
## Min. :10.62 Export:46 Min. :13.20
## 1st Qu.:31.17 Import:46 1st Qu.:32.71
## Median :36.97 Median :38.17
## Mean :37.86 Mean :38.33
## 3rd Qu.:45.38 3rd Qu.:43.65
## Max. :62.14 Max. :66.01
## NA's :6
summary(Agg_import)
## Name YR2008
## Arab World : 1 Min. :22.32
## Caribbean small states : 1 1st Qu.:35.43
## Central Europe and the Baltics : 1 Median :38.00
## Early-demographic dividend : 1 Mean :37.82
## East Asia & Pacific : 1 3rd Qu.:42.27
## East Asia & Pacific (excluding high income): 1 Max. :46.91
## (Other) :40 NA's :2
## YR2009 YR2010 YR2011 YR2012
## Min. :26.54 Min. :18.08 Min. :24.53 Min. :26.61
## 1st Qu.:36.62 1st Qu.:32.67 1st Qu.:35.77 1st Qu.:34.69
## Median :41.71 Median :38.57 Median :38.69 Median :37.54
## Mean :40.22 Mean :37.20 Mean :39.12 Mean :38.55
## 3rd Qu.:45.01 3rd Qu.:42.10 3rd Qu.:44.17 3rd Qu.:43.29
## Max. :49.81 Max. :48.57 Max. :48.91 Max. :50.44
## NA's :2 NA's :2
## YR2013 YR2014 YR2015 YR2016
## Min. :26.09 Min. :23.31 Min. :24.85 Min. :25.89
## 1st Qu.:34.51 1st Qu.:33.08 1st Qu.:32.67 1st Qu.:32.96
## Median :37.28 Median :38.55 Median :38.46 Median :37.79
## Mean :38.59 Mean :38.61 Mean :38.18 Mean :38.12
## 3rd Qu.:43.51 3rd Qu.:44.17 3rd Qu.:42.46 3rd Qu.:43.21
## Max. :50.77 Max. :51.93 Max. :55.49 Max. :53.54
##
## YR2017 Code xMEAN
## Min. :21.59 Export: 0 Min. :26.04
## 1st Qu.:32.31 Import:46 1st Qu.:34.33
## Median :35.57 Median :37.67
## Mean :37.29 Mean :38.34
## 3rd Qu.:41.60 3rd Qu.:42.35
## Max. :53.86 Max. :50.95
## NA's :1
summary(Agg_export)
## Name YR2008
## Arab World : 1 Min. :11.49
## Caribbean small states : 1 1st Qu.:33.96
## Central Europe and the Baltics : 1 Median :38.82
## Early-demographic dividend : 1 Mean :38.65
## East Asia & Pacific : 1 3rd Qu.:42.05
## East Asia & Pacific (excluding high income): 1 Max. :69.37
## (Other) :40 NA's :3
## YR2009 YR2010 YR2011 YR2012
## Min. :10.94 Min. :12.20 Min. :14.17 Min. :14.71
## 1st Qu.:33.61 1st Qu.:32.02 1st Qu.:34.04 1st Qu.:34.04
## Median :40.24 Median :37.18 Median :40.65 Median :39.38
## Mean :39.26 Mean :37.11 Mean :39.21 Mean :38.97
## 3rd Qu.:44.27 3rd Qu.:43.48 3rd Qu.:44.17 3rd Qu.:44.81
## Max. :68.58 Max. :65.16 Max. :62.00 Max. :66.13
## NA's :3 NA's :3 NA's :1 NA's :1
## YR2013 YR2014 YR2015 YR2016
## Min. :16.04 Min. :13.86 Min. :11.93 Min. :11.55
## 1st Qu.:32.86 1st Qu.:32.11 1st Qu.:30.37 1st Qu.:30.01
## Median :39.45 Median :39.00 Median :39.39 Median :38.47
## Mean :38.97 Mean :38.69 Mean :38.44 Mean :38.42
## 3rd Qu.:44.61 3rd Qu.:45.29 3rd Qu.:44.91 3rd Qu.:46.10
## Max. :67.09 Max. :65.72 Max. :67.01 Max. :66.86
## NA's :1
## YR2017 Code xMEAN
## Min. :10.62 Export:46 Min. :13.20
## 1st Qu.:29.81 Import: 0 1st Qu.:32.17
## Median :40.16 Median :38.82
## Mean :38.49 Mean :38.32
## 3rd Qu.:46.67 3rd Qu.:44.55
## Max. :62.14 Max. :66.01
## NA's :5
ggplot(Agg_import, aes(x=xMEAN))+ geom_density() +
geom_histogram(aes(x=xMEAN, y= ..density..),
binwidth = 3, fill = "gray", color = "black")+
geom_density(alpha=.2, fill="Red")
The import does not have a specific skew and does seem to follow mostly a normal curve.
ggplot(Agg_import, aes(sample = xMEAN)) +
stat_qq() +
stat_qq_line()
The data seems to stick pretty close to the Stat line.
ggplot(data=Agg_import,
aes(x = Name, y=xMEAN, fill=Name))+
geom_bar(stat = "identity")+
scale_fill_hue(l=50)+
ggtitle(label = "Rate across Countries")+
theme_minimal()+
theme(legend.position = "none")+
theme(axis.text.x = element_text(angle = 55, hjust = 1, face = "bold"))+
xlab("Aggrate Name")+ylab("Average Rate")
Looking at the average rate by group there seems to be a few interesting outlyers
The export side of the data does not fully follow a standard curve. There will be interesting outlyers in the data.
ggplot(Agg_export, aes(x=xMEAN))+ geom_density() +
geom_histogram(aes(x=xMEAN, y= ..density..),
binwidth = 3, fill = "gray", color = "black")+
geom_density(alpha=.2, fill="Red")
ggplot(Agg_export, aes(sample = xMEAN)) +
stat_qq() +
stat_qq_line()
ggplot(data=Agg_export,
aes(x = Name, y=xMEAN, fill=Name))+
geom_bar(stat = "identity")+
scale_fill_hue(l=50)+
ggtitle(label = "Rate across Countries")+
theme_minimal()+
theme(legend.position = "none")+
theme(axis.text.x = element_text(angle = 55, hjust = 1, face = "bold"))+
xlab("Aggrate Name")+ylab("Average Rate")
The Export side of the data has on first apperance more synamic shifts by regions.