#Check the version
#version
#Update if needed
#update.packages()
#Install package
#install.packages("RODBC")
#install.packages('dplyr')
#install.packages('tidyr')
#Use library
library(RODBC)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#List all your ODBC connections
#odbcDataSources(type = c("all", "user", "system"))
#Create connection - Note if you leave uid and pwd blank it works with your existing Windows credentials
Local <- odbcConnect("data", uid = "", pwd = "")
#Query a database (select statement)
MetadataCountries <- sqlQuery(Local, "SELECT * FROM WWI.dbo.MetadataCountries")
TradeDataForTopCountries <- sqlQuery(Local, "SELECT * FROM WWI.dbo.TradeDataForTopCountries")
PivotedData <- sqlQuery(Local, "SELECT * FROM WWI.dbo.PivotedTradeDataForTopCounties")
#View data
head(TradeDataForTopCountries)
## Country Name Country Code Indicator Name
## 1 Indonesia IDN Exports of goods and services (current US$)
## 2 Indonesia IDN Exports of goods and services (current US$)
## 3 Indonesia IDN Exports of goods and services (current US$)
## 4 Indonesia IDN Exports of goods and services (current US$)
## 5 Indonesia IDN Exports of goods and services (current US$)
## 6 Indonesia IDN Exports of goods and services (current US$)
## Indicator Code Years Value
## 1 NE.EXP.GNFS.CD 1967 549939021
## 2 NE.EXP.GNFS.CD 1968 850449461
## 3 NE.EXP.GNFS.CD 1969 831624098
## 4 NE.EXP.GNFS.CD 1970 1298928727
## 5 NE.EXP.GNFS.CD 1971 1481077568
## 6 NE.EXP.GNFS.CD 1972 2030161417
head(PivotedData)
## Country Name Country Code Indicator Name
## 1 Brazil BRA Exports of goods and services (current US$)
## 2 Brazil BRA Exports of goods and services (current US$)
## 3 Brazil BRA Exports of goods and services (current US$)
## 4 Brazil BRA Exports of goods and services (current US$)
## 5 Brazil BRA Exports of goods and services (current US$)
## 6 Brazil BRA Exports of goods and services (current US$)
## Years BX.KLT.DINV.CD.WD FI.RES.TOTL.CD NE.IMP.GNFS.CD NE.EXP.GNFS.CD
## 1 1960 NA NA NA 1070544629
## 2 1961 NA NA NA 1109152823
## 3 1962 NA NA NA 770900520
## 4 1963 NA NA NA 2080325262
## 5 1964 NA NA NA 1354466135
## 6 1965 NA NA NA 1685625122
## NE.TRD.GNFS.ZS NY.GDP.PCAP.KD.ZG
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
head(MetadataCountries)
## Country Code Region IncomeGroup
## 1 ABW Latin America & Caribbean High income
## 2 AFG South Asia Low income
## 3 AGO Sub-Saharan Africa Upper middle income
## 4 ALB Europe & Central Asia Upper middle income
## 5 AND Europe & Central Asia High income
## 6 ARB <NA> <NA>
## SpecialNotes
## 1 SNA data for 2000-2011 are updated from official government statistics; 1994-1999 from UN databases. Base year has changed from 1995 to 2000.
## 2 Fiscal year end: March 20; reporting period for national accounts data: FY (from 2013 are CY). National accounts data are sourced from the IMF and differ from the Central Statistics Organization numbers due to exclusion of the opium economy.
## 3 April 2013 database update: Based on IMF data, national accounts data were revised for 2000 onward; the base year changed to 2002.
## 4 <NA>
## 5 The base year has changed to 2000. Price valuation is in basic prices.
## 6 Arab World aggregate. Arab World is composed of members of the League of Arab States.
## TableName
## 1 Aruba
## 2 Afghanistan
## 3 Angola
## 4 Albania
## 5 Andorra
## 6 Arab World
#Check the structure of the data
str(TradeDataForTopCountries)
## 'data.frame': 1558 obs. of 6 variables:
## $ Country Name : Factor w/ 5 levels "Brazil","China",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Country Code : Factor w/ 5 levels "BRA","CHN","IDN",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Indicator Name: Factor w/ 6 levels "Exports of goods and services (current US$)",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Indicator Code: Factor w/ 6 levels "BX.KLT.DINV.CD.WD",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Years : int 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 ...
## $ Value : num 5.50e+08 8.50e+08 8.32e+08 1.30e+09 1.48e+09 ...
str(PivotedData)
## 'data.frame': 1558 obs. of 10 variables:
## $ Country Name : Factor w/ 5 levels "Brazil","China",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Country Code : Factor w/ 5 levels "BRA","CHN","IDN",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Indicator Name : Factor w/ 6 levels "Exports of goods and services (current US$)",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Years : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ BX.KLT.DINV.CD.WD: num NA NA NA NA NA NA NA NA NA NA ...
## $ FI.RES.TOTL.CD : num NA NA NA NA NA NA NA NA NA NA ...
## $ NE.IMP.GNFS.CD : num NA NA NA NA NA NA NA NA NA NA ...
## $ NE.EXP.GNFS.CD : num 1.07e+09 1.11e+09 7.71e+08 2.08e+09 1.35e+09 ...
## $ NE.TRD.GNFS.ZS : num NA NA NA NA NA NA NA NA NA NA ...
## $ NY.GDP.PCAP.KD.ZG: num NA NA NA NA NA NA NA NA NA NA ...
str(MetadataCountries)
## 'data.frame': 263 obs. of 5 variables:
## $ Country Code: Factor w/ 263 levels "ABW","AFG","AGO",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Region : Factor w/ 7 levels "East Asia & Pacific",..: 3 6 7 2 2 NA 4 3 2 1 ...
## $ IncomeGroup : Factor w/ 4 levels "High income",..: 1 2 4 4 1 NA 1 4 3 4 ...
## $ SpecialNotes: Factor w/ 184 levels "2013 national accounts estimates are based on IMF reports. Based on official government statistics, value added is measured at "| __truncated__,..: 157 92 32 NA 165 37 28 38 NA NA ...
## $ TableName : Factor w/ 263 levels "Afghanistan",..: 11 1 6 2 5 8 249 9 10 4 ...
#Averge Imports
AvgImports <- sqlQuery(Local, "select distinct [Country Name],
avg([NE.IMP.GNFS.CD]) over (partition by [Country Name]) as AverageImports,
IncomeGroup
from WWI.dbo.PivotedTradeDataForTopCounties as T
inner join WWI.dbo.MetadataCountries as C
on t.[Country Code]= C.[Country Code]
order by AverageImports desc")
#Average Exports
AvgExports<- sqlQuery(Local, "select distinct [Country Name],
avg([NE.EXP.GNFS.CD]) over (partition by [Country Name]) as AverageExports,
IncomeGroup
from WWI.dbo.PivotedTradeDataForTopCounties as T
inner join WWI.dbo.MetadataCountries as C
on t.[Country Code]= C.[Country Code]
order by AverageExports desc")
#Average Reserves
AvgReserves<- sqlQuery(Local, "select distinct [Country Name],
avg([FI.RES.TOTL.CD]) over (partition by [Country Name]) as AverageReserves,
IncomeGroup
from WWI.dbo.PivotedTradeDataForTopCounties as T
inner join WWI.dbo.MetadataCountries as C
on t.[Country Code]= C.[Country Code]
order by AverageReserves desc")
# get the min and max of the GDP per capita
GDPComparison<- sqlQuery(Local, "select distinct [Country Name],
min([NE.TRD.GNFS.ZS]) over (partition by [Country Name]) as MinGDPperCapita,
max([NE.TRD.GNFS.ZS]) over (partition by [Country Name]) as MaxGDPperCapita,
IncomeGroup
from WWI.dbo.PivotedTradeDataForTopCounties as T
inner join WWI.dbo.MetadataCountries as C on t.[Country Code]= C.[Country Code]
order by MaxGDPperCapita desc")
AvgExports
## Country Name AverageExports IncomeGroup
## 1 United States 696685832143 High income
## 2 China 394786871658 Upper middle income
## 3 India 79398935340 Lower middle income
## 4 Brazil 68122061014 Upper middle income
## 5 Indonesia 60911119843 Lower middle income
AvgImports
## Country Name AverageImports IncomeGroup
## 1 United States 886916771429 High income
## 2 China 344100100492 Upper middle income
## 3 India 93853766130 Lower middle income
## 4 Brazil 70970074605 Upper middle income
## 5 Indonesia 56181673425 Lower middle income
AvgReserves
## Country Name AverageReserves IncomeGroup
## 1 China 804915634040 Upper middle income
## 2 United States 154768981114 High income
## 3 India 67003324224 Lower middle income
## 4 Brazil 63378183034 Upper middle income
## 5 Indonesia 22941856355 Lower middle income
GDPComparison
## Country Name MinGDPperCapita MaxGDPperCapita IncomeGroup
## 1 Indonesia 10.922909 96.18619 Lower middle income
## 2 China 4.982690 64.76946 Upper middle income
## 3 India 7.529721 55.75262 Lower middle income
## 4 United States 8.929522 30.88516 High income
## 5 Brazil 9.057734 29.67825 Upper middle income
#Finding the correlation between the reserves and the export and reserves and import
Reserves <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'FI.RES.TOTL.CD', 6]
Import <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.IMP.GNFS.CD', 6]
Export <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.EXP.GNFS.CD', 6]
GDPPercapita <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.TRD.GNFS.ZS', 6]
Reserves[264:272] <- NA
cor.test(Reserves, Export)
##
## Pearson's product-moment correlation
##
## data: Reserves and Export
## t = 1.0626, df = 261, p-value = 0.2889
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05576721 0.18511928
## sample estimates:
## cor
## 0.06563219
cor.test(Reserves, Import)
##
## Pearson's product-moment correlation
##
## data: Reserves and Import
## t = 0.79643, df = 261, p-value = 0.4265
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07214866 0.16918679
## sample estimates:
## cor
## 0.04923772
cor.test(Import, Export)
##
## Pearson's product-moment correlation
##
## data: Import and Export
## t = 73.033, df = 270, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9691292 0.9807466
## sample estimates:
## cor
## 0.9756119
Import[273:279] <- NA
Export[273:279] <- NA
Reserves[273:279] <- NA
cor.test(Import, GDPPercapita)
##
## Pearson's product-moment correlation
##
## data: Import and GDPPercapita
## t = 2.51, df = 270, p-value = 0.01266
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03265575 0.26517771
## sample estimates:
## cor
## 0.1510048
cor.test(GDPPercapita, Export)
##
## Pearson's product-moment correlation
##
## data: GDPPercapita and Export
## t = 3.6513, df = 270, p-value = 0.0003133
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1005799 0.3274090
## sample estimates:
## cor
## 0.2169205
cor.test(GDPPercapita, Reserves)
##
## Pearson's product-moment correlation
##
## data: GDPPercapita and Reserves
## t = 1.4245, df = 261, p-value = 0.1555
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03347675 0.20659734
## sample estimates:
## cor
## 0.0878356
#Best practice - don't leave the connection open and ensures you get the latest data
odbcCloseAll()
PivotedDatainR <- TradeDataForTopCountries %>% spread(`Indicator Code`, Value)
write.csv(PivotedDatainR, 'newfile.csv')