#Check the version
#version

#Update if needed
#update.packages()

#Install package
#install.packages("RODBC")
#install.packages('dplyr')
#install.packages('tidyr')


#Use library
library(RODBC)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#List all your ODBC connections
#odbcDataSources(type = c("all", "user", "system"))

#Create connection - Note if you leave uid and pwd blank it works with your existing Windows credentials
Local <- odbcConnect("data", uid = "", pwd = "")

#Query a database (select statement)
MetadataCountries <- sqlQuery(Local, "SELECT * FROM WWI.dbo.MetadataCountries")
TradeDataForTopCountries  <- sqlQuery(Local, "SELECT * FROM WWI.dbo.TradeDataForTopCountries")
PivotedData <- sqlQuery(Local, "SELECT * FROM WWI.dbo.PivotedTradeDataForTopCounties")

#View data
head(TradeDataForTopCountries)
##   Country Name Country Code                              Indicator Name
## 1    Indonesia          IDN Exports of goods and services (current US$)
## 2    Indonesia          IDN Exports of goods and services (current US$)
## 3    Indonesia          IDN Exports of goods and services (current US$)
## 4    Indonesia          IDN Exports of goods and services (current US$)
## 5    Indonesia          IDN Exports of goods and services (current US$)
## 6    Indonesia          IDN Exports of goods and services (current US$)
##   Indicator Code Years      Value
## 1 NE.EXP.GNFS.CD  1967  549939021
## 2 NE.EXP.GNFS.CD  1968  850449461
## 3 NE.EXP.GNFS.CD  1969  831624098
## 4 NE.EXP.GNFS.CD  1970 1298928727
## 5 NE.EXP.GNFS.CD  1971 1481077568
## 6 NE.EXP.GNFS.CD  1972 2030161417
head(PivotedData)
##   Country Name Country Code                              Indicator Name
## 1       Brazil          BRA Exports of goods and services (current US$)
## 2       Brazil          BRA Exports of goods and services (current US$)
## 3       Brazil          BRA Exports of goods and services (current US$)
## 4       Brazil          BRA Exports of goods and services (current US$)
## 5       Brazil          BRA Exports of goods and services (current US$)
## 6       Brazil          BRA Exports of goods and services (current US$)
##   Years BX.KLT.DINV.CD.WD FI.RES.TOTL.CD NE.IMP.GNFS.CD NE.EXP.GNFS.CD
## 1  1960                NA             NA             NA     1070544629
## 2  1961                NA             NA             NA     1109152823
## 3  1962                NA             NA             NA      770900520
## 4  1963                NA             NA             NA     2080325262
## 5  1964                NA             NA             NA     1354466135
## 6  1965                NA             NA             NA     1685625122
##   NE.TRD.GNFS.ZS NY.GDP.PCAP.KD.ZG
## 1             NA                NA
## 2             NA                NA
## 3             NA                NA
## 4             NA                NA
## 5             NA                NA
## 6             NA                NA
head(MetadataCountries)
##   Country Code                    Region         IncomeGroup
## 1          ABW Latin America & Caribbean         High income
## 2          AFG                South Asia          Low income
## 3          AGO        Sub-Saharan Africa Upper middle income
## 4          ALB     Europe & Central Asia Upper middle income
## 5          AND     Europe & Central Asia         High income
## 6          ARB                      <NA>                <NA>
##                                                                                                                                                                                                                                        SpecialNotes
## 1                                                                                                     SNA data for 2000-2011 are updated from official government statistics; 1994-1999 from UN databases. Base year has changed from 1995 to 2000.
## 2 Fiscal year end: March 20; reporting period for national accounts data: FY (from 2013 are CY). National accounts data are sourced from the IMF and differ from the Central Statistics Organization numbers due to exclusion of the opium economy.
## 3                                                                                                                April 2013 database update: Based on IMF data, national accounts data were revised for 2000 onward; the base year changed to 2002.
## 4                                                                                                                                                                                                                                              <NA>
## 5                                                                                                                                                                            The base year has changed to 2000. Price valuation is in basic prices.
## 6                                                                                                                                                             Arab World aggregate. Arab World is composed of members of the League of Arab States.
##     TableName
## 1       Aruba
## 2 Afghanistan
## 3      Angola
## 4     Albania
## 5     Andorra
## 6  Arab World
#Check the structure of the data
str(TradeDataForTopCountries)
## 'data.frame':    1558 obs. of  6 variables:
##  $ Country Name  : Factor w/ 5 levels "Brazil","China",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Country Code  : Factor w/ 5 levels "BRA","CHN","IDN",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Indicator Name: Factor w/ 6 levels "Exports of goods and services (current US$)",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Indicator Code: Factor w/ 6 levels "BX.KLT.DINV.CD.WD",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Years         : int  1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 ...
##  $ Value         : num  5.50e+08 8.50e+08 8.32e+08 1.30e+09 1.48e+09 ...
str(PivotedData)
## 'data.frame':    1558 obs. of  10 variables:
##  $ Country Name     : Factor w/ 5 levels "Brazil","China",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Country Code     : Factor w/ 5 levels "BRA","CHN","IDN",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Indicator Name   : Factor w/ 6 levels "Exports of goods and services (current US$)",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Years            : int  1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
##  $ BX.KLT.DINV.CD.WD: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ FI.RES.TOTL.CD   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ NE.IMP.GNFS.CD   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ NE.EXP.GNFS.CD   : num  1.07e+09 1.11e+09 7.71e+08 2.08e+09 1.35e+09 ...
##  $ NE.TRD.GNFS.ZS   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ NY.GDP.PCAP.KD.ZG: num  NA NA NA NA NA NA NA NA NA NA ...
str(MetadataCountries)
## 'data.frame':    263 obs. of  5 variables:
##  $ Country Code: Factor w/ 263 levels "ABW","AFG","AGO",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Region      : Factor w/ 7 levels "East Asia & Pacific",..: 3 6 7 2 2 NA 4 3 2 1 ...
##  $ IncomeGroup : Factor w/ 4 levels "High income",..: 1 2 4 4 1 NA 1 4 3 4 ...
##  $ SpecialNotes: Factor w/ 184 levels "2013 national accounts estimates are based on IMF reports. Based on official government statistics, value added is measured at "| __truncated__,..: 157 92 32 NA 165 37 28 38 NA NA ...
##  $ TableName   : Factor w/ 263 levels "Afghanistan",..: 11 1 6 2 5 8 249 9 10 4 ...
#Averge Imports
AvgImports <- sqlQuery(Local, "select distinct [Country Name],
                                avg([NE.IMP.GNFS.CD]) over (partition by [Country Name])  as AverageImports, 
                                  IncomeGroup 
                              from WWI.dbo.PivotedTradeDataForTopCounties as T
                              inner join WWI.dbo.MetadataCountries as C
                              on t.[Country Code]= C.[Country Code]
                              order by AverageImports desc")      

#Average Exports

AvgExports<- sqlQuery(Local, "select distinct [Country Name],
                                 avg([NE.EXP.GNFS.CD]) over (partition by [Country Name])  as AverageExports, 
                                 IncomeGroup 
                              from WWI.dbo.PivotedTradeDataForTopCounties as T
                              inner join WWI.dbo.MetadataCountries as C 
                                on t.[Country Code]= C.[Country Code]
                              order by AverageExports desc") 

#Average Reserves

AvgReserves<- sqlQuery(Local, "select distinct [Country Name],
                                   avg([FI.RES.TOTL.CD]) over (partition by [Country Name])  as AverageReserves, 
                                   IncomeGroup 
                              from WWI.dbo.PivotedTradeDataForTopCounties as T
                              inner join WWI.dbo.MetadataCountries as C 
                                on t.[Country Code]= C.[Country Code]
                              order by AverageReserves desc") 


# get the min and max of the GDP per capita 

GDPComparison<- sqlQuery(Local, "select distinct [Country Name],
                                      min([NE.TRD.GNFS.ZS]) over (partition by [Country Name])  as MinGDPperCapita, 
                                      max([NE.TRD.GNFS.ZS]) over (partition by [Country Name])  as MaxGDPperCapita,
                                      IncomeGroup 
                              from WWI.dbo.PivotedTradeDataForTopCounties as T
                              inner join WWI.dbo.MetadataCountries as C on t.[Country Code]= C.[Country Code]
                              order by MaxGDPperCapita desc") 

AvgExports
##    Country Name AverageExports         IncomeGroup
## 1 United States   696685832143         High income
## 2         China   394786871658 Upper middle income
## 3         India    79398935340 Lower middle income
## 4        Brazil    68122061014 Upper middle income
## 5     Indonesia    60911119843 Lower middle income
AvgImports
##    Country Name AverageImports         IncomeGroup
## 1 United States   886916771429         High income
## 2         China   344100100492 Upper middle income
## 3         India    93853766130 Lower middle income
## 4        Brazil    70970074605 Upper middle income
## 5     Indonesia    56181673425 Lower middle income
AvgReserves
##    Country Name AverageReserves         IncomeGroup
## 1         China    804915634040 Upper middle income
## 2 United States    154768981114         High income
## 3         India     67003324224 Lower middle income
## 4        Brazil     63378183034 Upper middle income
## 5     Indonesia     22941856355 Lower middle income
GDPComparison
##    Country Name MinGDPperCapita MaxGDPperCapita         IncomeGroup
## 1     Indonesia       10.922909        96.18619 Lower middle income
## 2         China        4.982690        64.76946 Upper middle income
## 3         India        7.529721        55.75262 Lower middle income
## 4 United States        8.929522        30.88516         High income
## 5        Brazil        9.057734        29.67825 Upper middle income
#Finding the correlation between the reserves and the export and reserves and import

Reserves <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'FI.RES.TOTL.CD', 6]

Import <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.IMP.GNFS.CD', 6]

Export <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.EXP.GNFS.CD', 6]

GDPPercapita <- TradeDataForTopCountries[TradeDataForTopCountries$`Indicator Code` == 'NE.TRD.GNFS.ZS', 6]
Reserves[264:272] <- NA
cor.test(Reserves, Export)
## 
##  Pearson's product-moment correlation
## 
## data:  Reserves and Export
## t = 1.0626, df = 261, p-value = 0.2889
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05576721  0.18511928
## sample estimates:
##        cor 
## 0.06563219
cor.test(Reserves, Import)
## 
##  Pearson's product-moment correlation
## 
## data:  Reserves and Import
## t = 0.79643, df = 261, p-value = 0.4265
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07214866  0.16918679
## sample estimates:
##        cor 
## 0.04923772
cor.test(Import, Export)
## 
##  Pearson's product-moment correlation
## 
## data:  Import and Export
## t = 73.033, df = 270, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9691292 0.9807466
## sample estimates:
##       cor 
## 0.9756119
Import[273:279] <- NA
Export[273:279] <- NA
Reserves[273:279] <- NA
cor.test(Import, GDPPercapita)
## 
##  Pearson's product-moment correlation
## 
## data:  Import and GDPPercapita
## t = 2.51, df = 270, p-value = 0.01266
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03265575 0.26517771
## sample estimates:
##       cor 
## 0.1510048
cor.test(GDPPercapita, Export)
## 
##  Pearson's product-moment correlation
## 
## data:  GDPPercapita and Export
## t = 3.6513, df = 270, p-value = 0.0003133
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1005799 0.3274090
## sample estimates:
##       cor 
## 0.2169205
cor.test(GDPPercapita, Reserves)
## 
##  Pearson's product-moment correlation
## 
## data:  GDPPercapita and Reserves
## t = 1.4245, df = 261, p-value = 0.1555
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03347675  0.20659734
## sample estimates:
##       cor 
## 0.0878356
#Best practice - don't leave the connection open and ensures you get the latest data
odbcCloseAll()

PivotedDatainR <- TradeDataForTopCountries %>% spread(`Indicator Code`, Value)

write.csv(PivotedDatainR, 'newfile.csv')