IS 607 Project 2

Load libraries

library(tidyr) library(dplyr) library(stringr)

Load dataset 1 MLB

MLB <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/6fcd4ca76df1116376365dff961eaba3a85df4fe/MLB.txt", header=TRUE, sep = "\t")
list(MLB)
## [[1]]
##     Year                                   Results                 MVP
## 1   2015                          Royals 4, Mets 1      Salvador Perez
## 2   2014                        Giants 4, Royals 3   Madison Bumgarner
## 3   2013                    Red Sox 4, Cardinals 2         David Ortiz
## 4   2012                        Giants 4, Tigers 0      Pablo Sandoval
## 5   2011                    Cardinals 4, Rangers 3        David Freese
## 6   2010                       Giants 4, Rangers 1      Edgar Renteria
## 7   2009                     Yankees 4, Phillies 2       Hideki Matsui
## 8   2008               Philadelphia 4, Tampa Bay 1         Cole Hamels
## 9   2007                      Boston 4, Colorado 0         Mike Lowell
## 10  2006                    St. Louis 4, Detroit 1      David Eckstein
## 11  2005               Chi. White Sox 4, Houston 0        Jermaine Dye
## 12  2004                     Boston 4, St. Louis 0       Manny Ramirez
## 13  2003                   Florida 4, NY Yankees 2        Josh Beckett
## 14  2002                Anaheim 4, San Francisco 3          Troy Glaus
## 15  2001                   Arizona 4, NY Yankees 3   Schilling/Johnson
## 16  2000                   NY Yankees 4, NY Mets 1         Derek Jeter
## 17                                                                    
## 18  Year                                   Results                 MVP
## 19  1999                   NY Yankees 4, Atlanta 0      Mariano Rivera
## 20  1998                 NY Yankees 4, San Diego 0       Scott Brosius
## 21  1997                    Florida 4, Cleveland 3     Livan Hernandez
## 22  1996                   NY Yankees 4, Atlanta 2      John Wetteland
## 23  1995                    Atlanta 4, Cleveland 2         Tom Glavine
## 24  1994                                  Not Held                 N/A
## 25  1993                 Toronto 4, Philadelphia 2        Paul Molitor
## 26  1992                      Toronto 4, Atlanta 2         Pat Borders
## 27  1991                    Minnesota 4, Atlanta 3         Jack Morris
## 28  1990                   Cincinnati 4, Oakland 0           Jose Rijo
## 29  Year                                   Results                 MVP
## 30  1989                Oakland 4, San Francisco 0        Dave Stewart
## 31  1988                  Los Angeles 4, Oakland 1      Orel Hershiser
## 32  1987                  Minnesota 4, St. Louis 3         Frank Viola
## 33  1986                       NY Mets 4, Boston 3          Ray Knight
## 34  1985                Kansas City 4, St. Louis 3     Bret Saberhagen
## 35  1984                    Detroit 4, San Diego 1       Alan Trammell
## 36  1983               Baltimore 4, Philadelphia 1        Rick Dempsey
## 37  1982                  St. Louis 4, Milwaukee 3      Darrell Porter
## 38  1981               Los Angeles 4, NY Yankees 2 Guerrero/Cey/Yeager
## 39  1980             Philadelphia 4, Kansas City 2        Mike Schmidt
## 40                                                                    
## 41  Year                                   Results                 MVP
## 42  1979                 Pittsburgh 4, Baltimore 3     Willie Stargell
## 43  1978               NY Yankees 4, Los Angeles 2          Bucky Dent
## 44  1977               NY Yankees 4, Los Angeles 2      Reggie Jackson
## 45  1976                Cincinnati 4, NY Yankees 0        Johnny Bench
## 46  1975                    Cincinnati 4, Boston 3           Pete Rose
## 47  1974                  Oakland 4, Los Angeles 1      Rollie Fingers
## 48  1973                      Oakland 4, NY Mets 3      Reggie Jackson
## 49  1972                   Oakland 4, Cincinnati 3         Gene Tenace
## 50  1971                 Pittsburgh 4, Baltimore 3    Roberto Clemente
## 51  1970                 Baltimore 4, Cincinnati 1     Brooks Robinson
## 52  Year                                   Results                 MVP
## 53  1969                    NY Mets 4, Baltimore 1      Donn Clendenon
## 54  1968                    Detroit 4, St. Louis 3       Mickey Lolich
## 55  1967                     St. Louis 4, Boston 3          Bob Gibson
## 56  1966                Baltimore 4, Los Angeles 0      Frank Robinson
## 57  1965                Los Angeles 4, Minnesota 3        Sandy Koufax
## 58  1964                 St. Louis 4, NY Yankees 3          Bob Gibson
## 59  1963               Los Angeles 4, NY Yankees 0        Sandy Koufax
## 60  1962             NY Yankees 4, San Francisco 3         Ralph Terry
## 61  1961                NY Yankees 4, Cincinnati 1         Whitey Ford
## 62  1960                Pittsburgh 4, NY Yankees 3    Bobby Richardson
## 63                                                                    
## 64  Year                                   Results                 MVP
## 65  1959        Los Angeles 4, Chicago White Sox 2        Larry Sherry
## 66  1958               NY Yankees 4, Mil. Braves 3          Bob Turley
## 67  1957               Mil. Braves 4, NY Yankees 3        Lew Burdette
## 68  1956                  NY Yankees 4, Brooklyn 3          Don Larsen
## 69  1955                  Brooklyn 4, NY Yankees 3       Johnny Podres
## 70  1954                  NY Giants 4, Cleveland 0                  --
## 71  1953                  NY Yankees 4, Brooklyn 2                  --
## 72  1952                  NY Yankees 4, Brooklyn 3                  --
## 73  1951                 NY Yankees 4, NY Giants 2                  --
## 74  1950              NY Yankees 4, Philadelphia 0                  --
## 75  Year                                   Results                 MVP
## 76  1949                  NY Yankees 4, Brooklyn 1                  --
## 77  1948              Cleveland 4, Boston Braves 2                  --
## 78  1947                  NY Yankees 4, Brooklyn 3                  --
## 79  1946             St. Louis 4, Boston Red Sox 3                  --
## 80  1945                 Detroit 4, Chicago Cubs 3                  --
## 81  1944 St. Louis Cardinals 4, St. Louis Browns 2                  --
## 82  1943       NY Yankees 4, St. Louis Cardinals 1                  --
## 83  1942       St. Louis Cardinals 4, NY Yankees 1                  --
## 84  1941                  NY Yankees 4, Brooklyn 1                  --
## 85  1940                   Cincinnati 4, Detroit 3                  --
## 86                                                                    
## 87  Year                                   Results                 MVP
## 88  1939                NY Yankees 4, Cincinnati 0                  --
## 89  1938              NY Yankees 4, Chicago Cubs 0                  --
## 90  1937                 NY Yankees 4, NY Giants 1                  --
## 91  1936                 NY Yankees 4, NY Giants 2                  --
## 92  1935                 Detroit 4, Chicago Cubs 2                  --
## 93  1934          St. Louis Cardinals 4, Detroit 3                  --
## 94  1933                 NY Giants 4, Washington 1                  --
## 95  1932              NY Yankees 4, Chicago Cubs 0                  --
## 96  1931 St. Louis Cardinals 4, Philadelphia A's 3                  --
## 97  1930 Philadelphia A's 4, St. Louis Cardinals 2                  --
## 98  Year                                   Results                 MVP
## 99  1929        Philadelphia A's 4, Chicago Cubs 1                  --
## 100 1928       NY Yankees 4, St. Louis Cardinals 0                  --
## 101 1927                NY Yankees 4, Pittsburgh 0                  --
## 102 1926       St. Louis Cardinals 4, NY Yankees 3                  --
## 103 1925                Pittsburgh 4, Washington 3                  --
## 104 1924                 Washington 4, NY Giants 3                  --
## 105 1923                 NY Yankees 4, NY Giants 2                  --
## 106 1922       NY Giants 4, NY Yankees 0 (one tie)                  --
## 107 1921                 NY Giants 5, NY Yankees 3                  --
## 108 1920                   Cleveland 5, Brooklyn 2                  --
## 109                                                                   
## 110 Year                                   Results                 MVP
## 111 1919         Cincinnati 5, Chicago White Sox 3                  --
## 112 1918          Boston Red Sox 4, Chicago Cubs 2                  --
## 113 1917          Chicago White Sox 4, NY Giants 2                  --
## 114 1916              Boston Red Sox 4, Brooklyn 1                  --
## 115 1915 Boston Red Sox 4, Philadelphia Phillies 1                  --
## 116 1914       Boston Braves 4, Philadelphia A's 0                  --
## 117 1913           Philadelphia A's 4, NY Giants 1                  --
## 118 1912   Boston Red Sox 4, NY Giants 3 (one tie)                  --
## 119 1911           Philadelphia A's 4, NY Giants 2                  --
## 120 1910        Philadelphia A's 4, Chicago Cubs 1                  --
## 121 Year                                   Results                 MVP
## 122 1909                   Pittsburgh 4, Detroit 3                  --
## 123 1908                 Chicago Cubs 4, Detroit 1                  --
## 124 1907       Chicago Cubs 4, Detroit 0 (one tie)                  --
## 125 1906       Chicago White Sox 4, Chicago Cubs 2                  --
## 126 1905           NY Giants 4, Philadelphia A's 1                  --
## 127 1904                                  Not Held                 N/A
## 128 1903            Boston Red Sox 5, Pittsburgh 3                  --
df2=data.frame(MLB)

Remove rows repeating header and years World Series was not played

df2 <- df2[-c(17,18,24,29,40,41,52,63,64,75,86,87,98,109,110,121,127),]
head(df2)
##   Year                Results               MVP
## 1 2015       Royals 4, Mets 1    Salvador Perez
## 2 2014     Giants 4, Royals 3 Madison Bumgarner
## 3 2013 Red Sox 4, Cardinals 2       David Ortiz
## 4 2012     Giants 4, Tigers 0    Pablo Sandoval
## 5 2011 Cardinals 4, Rangers 3      David Freese
## 6 2010    Giants 4, Rangers 1    Edgar Renteria

Separate the Results column into Winners and Losers and we find throughout the history of the World Series from 1903 to 2015 there have been 35 distinct winning teams and 42 distinct losing teams

###mutate(df2, Results = gsub(pattern = "\\s|\\d+", replacement = "", x = Results)) %>% separate(col = "Results", into = c("Winner", "Loser"), sep = ",") %>% summarize(w = n_distinct(Winner), l = n_distinct(Loser))

Load dataset 2 Trade

Trade <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/05f302dbab2956d365ffbf80ddc5ed7bf2dc6566/Trade.csv", header=TRUE, sep = ",", na.strings = "?",stringsAsFactors=FALSE)
head(Trade)
##   year CTY_CODE CTYNAME    BJAN    BFEB    BMAR    BAPR    BMAY    BJUN
## 1 2009     3510  Brazil     511     447     345     518     419     464
## 2 2009     1220  Canada  -1,791  -1,893  -1,384  -1,092    -816  -1,854
## 3 2009     5700   China -20,362 -19,006 -20,373 -18,426 -18,407 -17,570
## 4 2009     4279  France    -717    -304    -834    -387    -882    -381
## 5 2009     4280 Germany  -2,283  -2,356  -1,888  -1,977  -1,599  -2,366
## 6 2009     5330   India    -527    -537    -463    -338    -244    -252
##      BJUL    BAUG    BSEP    BOCT    BNOV    BDEC     BQ1     BQ2     BQ3
## 1     440     482     557     754     320     770   1,303   1,400   1,479
## 2  -2,323  -1,516  -1,787  -2,647  -2,023  -2,465  -5,068  -3,762  -5,625
## 3 -18,261 -18,077 -18,413 -19,272 -18,985 -19,726 -59,740 -54,404 -54,751
## 4    -863    -760    -731    -540    -754    -589  -1,855  -1,650  -2,355
## 5  -2,731  -2,269  -2,428  -2,713  -3,179  -2,402  -6,527  -5,941  -7,429
## 6    -257     -22    -417    -430    -613    -624  -1,527    -834    -696
##       BQ4   IJAN   IFEB   IMAR   IAPR   IMAY   IJUN   IJUL   IAUG   ISEP
## 1   1,844  1,799  1,663  1,611  1,421  1,605  1,612  1,682  1,613  1,690
## 2  -7,135 17,863 18,477 16,899 16,677 16,368 17,452 19,832 19,451 19,788
## 3 -57,983 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774 24,629
## 4  -1,883  3,021  2,863  2,915  2,679  2,750  2,874  2,919  2,717  2,860
## 5  -8,294  6,089  6,246  5,402  5,284  5,165  5,527  6,170  5,726  6,019
## 6  -1,667  1,841  1,802  1,694  1,699  1,557  1,595  1,728  1,571  1,952
##     IOCT   INOV   IDEC    IQ1    IQ2    IQ3    IQ4   EJAN   EFEB   EMAR
## 1  1,611  1,984  1,778  5,073  4,638  4,986  5,372  2,310  2,110  1,956
## 2 20,607 21,059 21,774 53,240 50,497 59,071 63,440 16,073 16,584 15,515
## 3 25,585 25,958 27,218 74,358 71,125 72,130 78,761  4,530  4,900  5,188
## 4  2,773  2,980  2,886  8,799  8,303  8,495  8,639  2,304  2,559  2,081
## 5  6,481  6,862  6,527 17,737 15,977 17,914 19,870  3,806  3,890  3,515
## 6  1,802  1,896  2,029  5,337  4,850  5,252  5,727  1,314  1,265  1,231
##     EAPR   EMAY   EJUN   EJUL   EAUG   ESEP   EOCT   ENOV   EDEC    EQ1
## 1  1,939  2,024  2,076  2,123  2,095  2,247  2,364  2,304  2,548  6,376
## 2 15,585 15,552 15,598 17,510 17,935 18,002 17,960 19,036 19,309 48,171
## 3  5,490  5,424  5,808  5,465  5,698  6,216  6,313  6,972  7,493 14,618
## 4  2,292  1,868  2,493  2,056  1,956  2,128  2,233  2,226  2,297  6,944
## 5  3,308  3,566  3,161  3,439  3,456  3,590  3,768  3,682  4,125 11,210
## 6  1,361  1,312  1,344  1,471  1,550  1,535  1,372  1,283  1,404  3,810
##      EQ2    EQ3    EQ4
## 1  6,039  6,464  7,216
## 2 46,735 53,446 56,305
## 3 16,722 17,379 20,778
## 4  6,653  6,140  6,755
## 5 10,036 10,486 11,575
## 6  4,017  4,556  4,059

Interested in the relationship of Korea exports and China imports

KoreaEx <- subset(Trade, Trade$CTYNAME == 'Korea, South', select = c(CTYNAME, year, EJAN:EDEC))
head(KoreaEx)
##         CTYNAME year  EJAN  EFEB  EMAR  EAPR  EMAY  EJUN  EJUL  EAUG  ESEP
## 9  Korea, South 2009 1,807 2,074 1,835 2,024 2,474 2,295 2,372 2,642 2,711
## 27 Korea, South 2010 2,977 3,073 3,350 3,157 3,291 3,230 3,372 3,146 3,161
## 45 Korea, South 2011 3,507 3,146 3,520 3,861 3,881 3,490 3,626 3,785 3,569
## 63 Korea, South 2012 3,472 4,103 3,700 3,666 3,494 3,589 3,516 3,132 3,585
## 81 Korea, South 2013 3,389 3,521 3,413 3,162 3,270 3,446 3,470 3,439 3,164
## 99 Korea, South 2014 3,712 3,617 3,830 3,811 3,683 3,767 3,670 3,801 3,644
##     EOCT  ENOV  EDEC
## 9  2,842 2,797 2,739
## 27 3,309 3,384 3,370
## 45 3,471 3,820 3,785
## 63 3,512 3,195 3,318
## 81 3,582 4,040 3,789
## 99 3,604 3,680 3,654
ChinaIm <- subset(Trade, Trade$CTYNAME == 'China', select = c(CTYNAME, year, IJAN:IDEC))
head(ChinaIm)
##    CTYNAME year   IJAN   IFEB   IMAR   IAPR   IMAY   IJUN   IJUL   IAUG
## 3    China 2009 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21   China 2010 27,758 27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39   China 2011 32,425 33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57   China 2012 34,310 33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75   China 2013 38,829 38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93   China 2014 37,564 37,969 37,515 38,060 38,220 38,932 37,919 39,000
##      ISEP   IOCT   INOV   IDEC
## 3  24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659

Clean up the column names

colnames(KoreaEx) = c("Country", "Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
colnames(ChinaIm) = c("Country", "Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")

Gather monthly trade data

###KoreaEx = KoreaEx %>% gather(Country, Year, Jan:Dec)
###ChinaIm = ChinaIm %>% gather(Country, Year, Jan:Dec)
head(KoreaEx)
##         Country Year   Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep
## 9  Korea, South 2009 1,807 2,074 1,835 2,024 2,474 2,295 2,372 2,642 2,711
## 27 Korea, South 2010 2,977 3,073 3,350 3,157 3,291 3,230 3,372 3,146 3,161
## 45 Korea, South 2011 3,507 3,146 3,520 3,861 3,881 3,490 3,626 3,785 3,569
## 63 Korea, South 2012 3,472 4,103 3,700 3,666 3,494 3,589 3,516 3,132 3,585
## 81 Korea, South 2013 3,389 3,521 3,413 3,162 3,270 3,446 3,470 3,439 3,164
## 99 Korea, South 2014 3,712 3,617 3,830 3,811 3,683 3,767 3,670 3,801 3,644
##      Oct   Nov   Dec
## 9  2,842 2,797 2,739
## 27 3,309 3,384 3,370
## 45 3,471 3,820 3,785
## 63 3,512 3,195 3,318
## 81 3,582 4,040 3,789
## 99 3,604 3,680 3,654
head(ChinaIm)
##    Country Year    Jan    Feb    Mar    Apr    May    Jun    Jul    Aug
## 3    China 2009 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21   China 2010 27,758 27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39   China 2011 32,425 33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57   China 2012 34,310 33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75   China 2013 38,829 38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93   China 2014 37,564 37,969 37,515 38,060 38,220 38,932 37,919 39,000
##       Sep    Oct    Nov    Dec
## 3  24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659

Again rename the columns

colnames(KoreaEx) = c("Country", "Year", "Month", "Exports")
colnames(ChinaIm) = c("Country", "Year", "Month", "Imports")
head(KoreaEx)
##         Country Year Month Exports    NA    NA    NA    NA    NA    NA
## 9  Korea, South 2009 1,807   2,074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977   3,073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507   3,146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472   4,103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389   3,521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712   3,617 3,830 3,811 3,683 3,767 3,670 3,801
##       NA    NA    NA    NA
## 9  2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
head(ChinaIm)
##    Country Year  Month Imports     NA     NA     NA     NA     NA     NA
## 3    China 2009 24,892  23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21   China 2010 27,758  27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39   China 2011 32,425  33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57   China 2012 34,310  33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75   China 2013 38,829  38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93   China 2014 37,564  37,969 37,515 38,060 38,220 38,932 37,919 39,000
##        NA     NA     NA     NA
## 3  24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659

Remove commas from the Exports and made sure Year and Exports are numeric values. Repeat process for Imports

KoreaEx$Exports <- as.numeric(gsub(",","",KoreaEx$Exports))
KoreaEx$Year <- as.numeric(gsub(",","",KoreaEx$Year))
head(KoreaEx)
##         Country Year Month Exports    NA    NA    NA    NA    NA    NA
## 9  Korea, South 2009 1,807    2074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977    3073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507    3146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472    4103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389    3521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712    3617 3,830 3,811 3,683 3,767 3,670 3,801
##       NA    NA    NA    NA
## 9  2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
ChinaIm$Imports <- as.numeric(gsub(",","",ChinaIm$Imports))
ChinaIm$Year <- as.numeric(gsub(",","",ChinaIm$Year))
head(KoreaEx)
##         Country Year Month Exports    NA    NA    NA    NA    NA    NA
## 9  Korea, South 2009 1,807    2074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977    3073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507    3146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472    4103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389    3521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712    3617 3,830 3,811 3,683 3,767 3,670 3,801
##       NA    NA    NA    NA
## 9  2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
head(ChinaIm)
##    Country Year  Month Imports     NA     NA     NA     NA     NA     NA
## 3    China 2009 24,892   23906 25,560 23,916 23,831 23,379 23,726 23,774
## 21   China 2010 27,758   27869 28,639 28,210 30,070 31,916 31,129 32,138
## 39   China 2011 32,425   33802 32,159 32,010 33,039 33,167 33,420 33,500
## 57   China 2012 34,310   33794 37,254 35,548 34,546 35,257 35,683 34,595
## 75   China 2013 38,829   38012 32,893 34,556 36,298 35,929 36,178 37,177
## 93   China 2014 37,564   37969 37,515 38,060 38,220 38,932 37,919 39,000
##        NA     NA     NA     NA
## 3  24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659

Analyze Imports and Exports to see if there is any divergence, e.g. In each January between 2009 and 2014 Korea exports grew while one of it’s largest trading partners (China) imports grew each January from 2009 to 2013 but contracted in Jan 2014. This only scratches the surface and we can look at greater detail to find much more interesting anomolies in these data.

###summarise(KoreaEx, Exports = sum(Exports))
###summarise(ChinaIm, Imports = sum(Imports))

###summarise(KoreaEx, Exports = mean(Exports))
###summarise(ChinaIm, Imports = mean(Imports))

###KoreaEx %>% select(Month, Year, Exports) %>% head
###ChinaIm %>% select(Month, Year, Imports) %>% head

Load dataset 3 NBA

NBA <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/7b51fddb868d151b14cfb69732898d0c49517fcf/NBA.txt", header=TRUE, sep = "\t", na.strings= c("","NA"))
head(NBA)
##   Rk  Season  Lg ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL
## 1  1 2015-16 NBA  37  39  18  32  37  46  33  28  34  59  33  35  42  14
## 2  2 2014-15 NBA  60  40  38  50  33  53  50  30  32  67  56  38  56  21
## 3  3 2013-14 NBA  38  25  44  48  43  33  49  36  29  51  54  56  57  27
## 4  4 2012-13 NBA  44  41  49  45  21  24  41  57  29  47  45  49  56  45
## 5  5 2011-12 NBA  40  39  22  50   7  21  36  38  25  23  34  42  40  41
## 6  6 2010-11 NBA  44  56  24  62  34  19  57  50  30  36  43  37  32  57
##   MEM MIA MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 1  39  38  28  21  24  27  44  28   9  17  35  25  56  44  30  30
## 2  55  37  41  16  45  17  45  25  18  39  51  29  55  49  38  46
## 3  50  54  15  40  34  37  59  23  19  48  54  28  62  48  25  44
## 4  56  66  38  31  27  54  60  20  34  25  33  28  58  34  43  29
## 5  41  46  31  26  21  36  47  37  35  33  28  22  50  23  36  20
## 6  46  58  35  17  46  42  55  52  41  40  48  24  61  22  39  23
df1 <- data.frame(NBA)

Remove rows with repeating headers and current season that is not complete. We can alternatively annualize the results but assume we do not know how many games have been played so far in the 82 game season

df1 <- df1[-c(1,21,42,63),]
head(df1)
##   Rk  Season  Lg ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL
## 2  2 2014-15 NBA  60  40  38  50  33  53  50  30  32  67  56  38  56  21
## 3  3 2013-14 NBA  38  25  44  48  43  33  49  36  29  51  54  56  57  27
## 4  4 2012-13 NBA  44  41  49  45  21  24  41  57  29  47  45  49  56  45
## 5  5 2011-12 NBA  40  39  22  50   7  21  36  38  25  23  34  42  40  41
## 6  6 2010-11 NBA  44  56  24  62  34  19  57  50  30  36  43  37  32  57
## 7  7 2009-10 NBA  53  50  12  41  44  61  55  53  27  26  42  32  29  57
##   MEM MIA MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 2  55  37  41  16  45  17  45  25  18  39  51  29  55  49  38  46
## 3  50  54  15  40  34  37  59  23  19  48  54  28  62  48  25  44
## 4  56  66  38  31  27  54  60  20  34  25  33  28  58  34  43  29
## 5  41  46  31  26  21  36  47  37  35  33  28  22  50  23  36  20
## 6  46  58  35  17  46  42  55  52  41  40  48  24  61  22  39  23
## 7  40  47  46  15  37  29  50  59  27  54  50  25  50  40  53  26

Also remove the Rk and Lg columns

df1 = subset(df1, select = -Rk)
df1 = subset(df1, select = -Lg)
head(df1)
##    Season ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL MEM MIA
## 2 2014-15  60  40  38  50  33  53  50  30  32  67  56  38  56  21  55  37
## 3 2013-14  38  25  44  48  43  33  49  36  29  51  54  56  57  27  50  54
## 4 2012-13  44  41  49  45  21  24  41  57  29  47  45  49  56  45  56  66
## 5 2011-12  40  39  22  50   7  21  36  38  25  23  34  42  40  41  41  46
## 6 2010-11  44  56  24  62  34  19  57  50  30  36  43  37  32  57  46  58
## 7 2009-10  53  50  12  41  44  61  55  53  27  26  42  32  29  57  40  47
##   MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 2  41  16  45  17  45  25  18  39  51  29  55  49  38  46
## 3  15  40  34  37  59  23  19  48  54  28  62  48  25  44
## 4  38  31  27  54  60  20  34  25  33  28  58  34  43  29
## 5  31  26  21  36  47  37  35  33  28  22  50  23  36  20
## 6  35  17  46  42  55  52  41  40  48  24  61  22  39  23
## 7  46  15  37  29  50  59  27  54  50  25  50  40  53  26

Gather the Wins for each team

###NBAwins <- gather(df1, "Team", "Wins", 2:31, na.rm = TRUE)
###colnames(NBAwins) <- c("Season", "Team", "Wins")
###NBAwins$Wins = c(as.numeric(NBAwins$Wins))
###head(NBAwins)

Summarise results. There have been 54,000 NBA games played and the average win total is ~40 games. The NY Knicks have played in 66 seasons and their ups and downs can be isolated below.

###summarise(NBAwins, Wins = sum(Wins))
###summarise(NBAwins, Wins = mean(Wins))
###filter(NBAwins, Team == "NYK")