library(tidyr) library(dplyr) library(stringr)
MLB <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/6fcd4ca76df1116376365dff961eaba3a85df4fe/MLB.txt", header=TRUE, sep = "\t")
list(MLB)
## [[1]]
## Year Results MVP
## 1 2015 Royals 4, Mets 1 Salvador Perez
## 2 2014 Giants 4, Royals 3 Madison Bumgarner
## 3 2013 Red Sox 4, Cardinals 2 David Ortiz
## 4 2012 Giants 4, Tigers 0 Pablo Sandoval
## 5 2011 Cardinals 4, Rangers 3 David Freese
## 6 2010 Giants 4, Rangers 1 Edgar Renteria
## 7 2009 Yankees 4, Phillies 2 Hideki Matsui
## 8 2008 Philadelphia 4, Tampa Bay 1 Cole Hamels
## 9 2007 Boston 4, Colorado 0 Mike Lowell
## 10 2006 St. Louis 4, Detroit 1 David Eckstein
## 11 2005 Chi. White Sox 4, Houston 0 Jermaine Dye
## 12 2004 Boston 4, St. Louis 0 Manny Ramirez
## 13 2003 Florida 4, NY Yankees 2 Josh Beckett
## 14 2002 Anaheim 4, San Francisco 3 Troy Glaus
## 15 2001 Arizona 4, NY Yankees 3 Schilling/Johnson
## 16 2000 NY Yankees 4, NY Mets 1 Derek Jeter
## 17
## 18 Year Results MVP
## 19 1999 NY Yankees 4, Atlanta 0 Mariano Rivera
## 20 1998 NY Yankees 4, San Diego 0 Scott Brosius
## 21 1997 Florida 4, Cleveland 3 Livan Hernandez
## 22 1996 NY Yankees 4, Atlanta 2 John Wetteland
## 23 1995 Atlanta 4, Cleveland 2 Tom Glavine
## 24 1994 Not Held N/A
## 25 1993 Toronto 4, Philadelphia 2 Paul Molitor
## 26 1992 Toronto 4, Atlanta 2 Pat Borders
## 27 1991 Minnesota 4, Atlanta 3 Jack Morris
## 28 1990 Cincinnati 4, Oakland 0 Jose Rijo
## 29 Year Results MVP
## 30 1989 Oakland 4, San Francisco 0 Dave Stewart
## 31 1988 Los Angeles 4, Oakland 1 Orel Hershiser
## 32 1987 Minnesota 4, St. Louis 3 Frank Viola
## 33 1986 NY Mets 4, Boston 3 Ray Knight
## 34 1985 Kansas City 4, St. Louis 3 Bret Saberhagen
## 35 1984 Detroit 4, San Diego 1 Alan Trammell
## 36 1983 Baltimore 4, Philadelphia 1 Rick Dempsey
## 37 1982 St. Louis 4, Milwaukee 3 Darrell Porter
## 38 1981 Los Angeles 4, NY Yankees 2 Guerrero/Cey/Yeager
## 39 1980 Philadelphia 4, Kansas City 2 Mike Schmidt
## 40
## 41 Year Results MVP
## 42 1979 Pittsburgh 4, Baltimore 3 Willie Stargell
## 43 1978 NY Yankees 4, Los Angeles 2 Bucky Dent
## 44 1977 NY Yankees 4, Los Angeles 2 Reggie Jackson
## 45 1976 Cincinnati 4, NY Yankees 0 Johnny Bench
## 46 1975 Cincinnati 4, Boston 3 Pete Rose
## 47 1974 Oakland 4, Los Angeles 1 Rollie Fingers
## 48 1973 Oakland 4, NY Mets 3 Reggie Jackson
## 49 1972 Oakland 4, Cincinnati 3 Gene Tenace
## 50 1971 Pittsburgh 4, Baltimore 3 Roberto Clemente
## 51 1970 Baltimore 4, Cincinnati 1 Brooks Robinson
## 52 Year Results MVP
## 53 1969 NY Mets 4, Baltimore 1 Donn Clendenon
## 54 1968 Detroit 4, St. Louis 3 Mickey Lolich
## 55 1967 St. Louis 4, Boston 3 Bob Gibson
## 56 1966 Baltimore 4, Los Angeles 0 Frank Robinson
## 57 1965 Los Angeles 4, Minnesota 3 Sandy Koufax
## 58 1964 St. Louis 4, NY Yankees 3 Bob Gibson
## 59 1963 Los Angeles 4, NY Yankees 0 Sandy Koufax
## 60 1962 NY Yankees 4, San Francisco 3 Ralph Terry
## 61 1961 NY Yankees 4, Cincinnati 1 Whitey Ford
## 62 1960 Pittsburgh 4, NY Yankees 3 Bobby Richardson
## 63
## 64 Year Results MVP
## 65 1959 Los Angeles 4, Chicago White Sox 2 Larry Sherry
## 66 1958 NY Yankees 4, Mil. Braves 3 Bob Turley
## 67 1957 Mil. Braves 4, NY Yankees 3 Lew Burdette
## 68 1956 NY Yankees 4, Brooklyn 3 Don Larsen
## 69 1955 Brooklyn 4, NY Yankees 3 Johnny Podres
## 70 1954 NY Giants 4, Cleveland 0 --
## 71 1953 NY Yankees 4, Brooklyn 2 --
## 72 1952 NY Yankees 4, Brooklyn 3 --
## 73 1951 NY Yankees 4, NY Giants 2 --
## 74 1950 NY Yankees 4, Philadelphia 0 --
## 75 Year Results MVP
## 76 1949 NY Yankees 4, Brooklyn 1 --
## 77 1948 Cleveland 4, Boston Braves 2 --
## 78 1947 NY Yankees 4, Brooklyn 3 --
## 79 1946 St. Louis 4, Boston Red Sox 3 --
## 80 1945 Detroit 4, Chicago Cubs 3 --
## 81 1944 St. Louis Cardinals 4, St. Louis Browns 2 --
## 82 1943 NY Yankees 4, St. Louis Cardinals 1 --
## 83 1942 St. Louis Cardinals 4, NY Yankees 1 --
## 84 1941 NY Yankees 4, Brooklyn 1 --
## 85 1940 Cincinnati 4, Detroit 3 --
## 86
## 87 Year Results MVP
## 88 1939 NY Yankees 4, Cincinnati 0 --
## 89 1938 NY Yankees 4, Chicago Cubs 0 --
## 90 1937 NY Yankees 4, NY Giants 1 --
## 91 1936 NY Yankees 4, NY Giants 2 --
## 92 1935 Detroit 4, Chicago Cubs 2 --
## 93 1934 St. Louis Cardinals 4, Detroit 3 --
## 94 1933 NY Giants 4, Washington 1 --
## 95 1932 NY Yankees 4, Chicago Cubs 0 --
## 96 1931 St. Louis Cardinals 4, Philadelphia A's 3 --
## 97 1930 Philadelphia A's 4, St. Louis Cardinals 2 --
## 98 Year Results MVP
## 99 1929 Philadelphia A's 4, Chicago Cubs 1 --
## 100 1928 NY Yankees 4, St. Louis Cardinals 0 --
## 101 1927 NY Yankees 4, Pittsburgh 0 --
## 102 1926 St. Louis Cardinals 4, NY Yankees 3 --
## 103 1925 Pittsburgh 4, Washington 3 --
## 104 1924 Washington 4, NY Giants 3 --
## 105 1923 NY Yankees 4, NY Giants 2 --
## 106 1922 NY Giants 4, NY Yankees 0 (one tie) --
## 107 1921 NY Giants 5, NY Yankees 3 --
## 108 1920 Cleveland 5, Brooklyn 2 --
## 109
## 110 Year Results MVP
## 111 1919 Cincinnati 5, Chicago White Sox 3 --
## 112 1918 Boston Red Sox 4, Chicago Cubs 2 --
## 113 1917 Chicago White Sox 4, NY Giants 2 --
## 114 1916 Boston Red Sox 4, Brooklyn 1 --
## 115 1915 Boston Red Sox 4, Philadelphia Phillies 1 --
## 116 1914 Boston Braves 4, Philadelphia A's 0 --
## 117 1913 Philadelphia A's 4, NY Giants 1 --
## 118 1912 Boston Red Sox 4, NY Giants 3 (one tie) --
## 119 1911 Philadelphia A's 4, NY Giants 2 --
## 120 1910 Philadelphia A's 4, Chicago Cubs 1 --
## 121 Year Results MVP
## 122 1909 Pittsburgh 4, Detroit 3 --
## 123 1908 Chicago Cubs 4, Detroit 1 --
## 124 1907 Chicago Cubs 4, Detroit 0 (one tie) --
## 125 1906 Chicago White Sox 4, Chicago Cubs 2 --
## 126 1905 NY Giants 4, Philadelphia A's 1 --
## 127 1904 Not Held N/A
## 128 1903 Boston Red Sox 5, Pittsburgh 3 --
df2=data.frame(MLB)
df2 <- df2[-c(17,18,24,29,40,41,52,63,64,75,86,87,98,109,110,121,127),]
head(df2)
## Year Results MVP
## 1 2015 Royals 4, Mets 1 Salvador Perez
## 2 2014 Giants 4, Royals 3 Madison Bumgarner
## 3 2013 Red Sox 4, Cardinals 2 David Ortiz
## 4 2012 Giants 4, Tigers 0 Pablo Sandoval
## 5 2011 Cardinals 4, Rangers 3 David Freese
## 6 2010 Giants 4, Rangers 1 Edgar Renteria
###mutate(df2, Results = gsub(pattern = "\\s|\\d+", replacement = "", x = Results)) %>% separate(col = "Results", into = c("Winner", "Loser"), sep = ",") %>% summarize(w = n_distinct(Winner), l = n_distinct(Loser))
Trade <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/05f302dbab2956d365ffbf80ddc5ed7bf2dc6566/Trade.csv", header=TRUE, sep = ",", na.strings = "?",stringsAsFactors=FALSE)
head(Trade)
## year CTY_CODE CTYNAME BJAN BFEB BMAR BAPR BMAY BJUN
## 1 2009 3510 Brazil 511 447 345 518 419 464
## 2 2009 1220 Canada -1,791 -1,893 -1,384 -1,092 -816 -1,854
## 3 2009 5700 China -20,362 -19,006 -20,373 -18,426 -18,407 -17,570
## 4 2009 4279 France -717 -304 -834 -387 -882 -381
## 5 2009 4280 Germany -2,283 -2,356 -1,888 -1,977 -1,599 -2,366
## 6 2009 5330 India -527 -537 -463 -338 -244 -252
## BJUL BAUG BSEP BOCT BNOV BDEC BQ1 BQ2 BQ3
## 1 440 482 557 754 320 770 1,303 1,400 1,479
## 2 -2,323 -1,516 -1,787 -2,647 -2,023 -2,465 -5,068 -3,762 -5,625
## 3 -18,261 -18,077 -18,413 -19,272 -18,985 -19,726 -59,740 -54,404 -54,751
## 4 -863 -760 -731 -540 -754 -589 -1,855 -1,650 -2,355
## 5 -2,731 -2,269 -2,428 -2,713 -3,179 -2,402 -6,527 -5,941 -7,429
## 6 -257 -22 -417 -430 -613 -624 -1,527 -834 -696
## BQ4 IJAN IFEB IMAR IAPR IMAY IJUN IJUL IAUG ISEP
## 1 1,844 1,799 1,663 1,611 1,421 1,605 1,612 1,682 1,613 1,690
## 2 -7,135 17,863 18,477 16,899 16,677 16,368 17,452 19,832 19,451 19,788
## 3 -57,983 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774 24,629
## 4 -1,883 3,021 2,863 2,915 2,679 2,750 2,874 2,919 2,717 2,860
## 5 -8,294 6,089 6,246 5,402 5,284 5,165 5,527 6,170 5,726 6,019
## 6 -1,667 1,841 1,802 1,694 1,699 1,557 1,595 1,728 1,571 1,952
## IOCT INOV IDEC IQ1 IQ2 IQ3 IQ4 EJAN EFEB EMAR
## 1 1,611 1,984 1,778 5,073 4,638 4,986 5,372 2,310 2,110 1,956
## 2 20,607 21,059 21,774 53,240 50,497 59,071 63,440 16,073 16,584 15,515
## 3 25,585 25,958 27,218 74,358 71,125 72,130 78,761 4,530 4,900 5,188
## 4 2,773 2,980 2,886 8,799 8,303 8,495 8,639 2,304 2,559 2,081
## 5 6,481 6,862 6,527 17,737 15,977 17,914 19,870 3,806 3,890 3,515
## 6 1,802 1,896 2,029 5,337 4,850 5,252 5,727 1,314 1,265 1,231
## EAPR EMAY EJUN EJUL EAUG ESEP EOCT ENOV EDEC EQ1
## 1 1,939 2,024 2,076 2,123 2,095 2,247 2,364 2,304 2,548 6,376
## 2 15,585 15,552 15,598 17,510 17,935 18,002 17,960 19,036 19,309 48,171
## 3 5,490 5,424 5,808 5,465 5,698 6,216 6,313 6,972 7,493 14,618
## 4 2,292 1,868 2,493 2,056 1,956 2,128 2,233 2,226 2,297 6,944
## 5 3,308 3,566 3,161 3,439 3,456 3,590 3,768 3,682 4,125 11,210
## 6 1,361 1,312 1,344 1,471 1,550 1,535 1,372 1,283 1,404 3,810
## EQ2 EQ3 EQ4
## 1 6,039 6,464 7,216
## 2 46,735 53,446 56,305
## 3 16,722 17,379 20,778
## 4 6,653 6,140 6,755
## 5 10,036 10,486 11,575
## 6 4,017 4,556 4,059
KoreaEx <- subset(Trade, Trade$CTYNAME == 'Korea, South', select = c(CTYNAME, year, EJAN:EDEC))
head(KoreaEx)
## CTYNAME year EJAN EFEB EMAR EAPR EMAY EJUN EJUL EAUG ESEP
## 9 Korea, South 2009 1,807 2,074 1,835 2,024 2,474 2,295 2,372 2,642 2,711
## 27 Korea, South 2010 2,977 3,073 3,350 3,157 3,291 3,230 3,372 3,146 3,161
## 45 Korea, South 2011 3,507 3,146 3,520 3,861 3,881 3,490 3,626 3,785 3,569
## 63 Korea, South 2012 3,472 4,103 3,700 3,666 3,494 3,589 3,516 3,132 3,585
## 81 Korea, South 2013 3,389 3,521 3,413 3,162 3,270 3,446 3,470 3,439 3,164
## 99 Korea, South 2014 3,712 3,617 3,830 3,811 3,683 3,767 3,670 3,801 3,644
## EOCT ENOV EDEC
## 9 2,842 2,797 2,739
## 27 3,309 3,384 3,370
## 45 3,471 3,820 3,785
## 63 3,512 3,195 3,318
## 81 3,582 4,040 3,789
## 99 3,604 3,680 3,654
ChinaIm <- subset(Trade, Trade$CTYNAME == 'China', select = c(CTYNAME, year, IJAN:IDEC))
head(ChinaIm)
## CTYNAME year IJAN IFEB IMAR IAPR IMAY IJUN IJUL IAUG
## 3 China 2009 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21 China 2010 27,758 27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39 China 2011 32,425 33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57 China 2012 34,310 33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75 China 2013 38,829 38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93 China 2014 37,564 37,969 37,515 38,060 38,220 38,932 37,919 39,000
## ISEP IOCT INOV IDEC
## 3 24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659
colnames(KoreaEx) = c("Country", "Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
colnames(ChinaIm) = c("Country", "Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
###KoreaEx = KoreaEx %>% gather(Country, Year, Jan:Dec)
###ChinaIm = ChinaIm %>% gather(Country, Year, Jan:Dec)
head(KoreaEx)
## Country Year Jan Feb Mar Apr May Jun Jul Aug Sep
## 9 Korea, South 2009 1,807 2,074 1,835 2,024 2,474 2,295 2,372 2,642 2,711
## 27 Korea, South 2010 2,977 3,073 3,350 3,157 3,291 3,230 3,372 3,146 3,161
## 45 Korea, South 2011 3,507 3,146 3,520 3,861 3,881 3,490 3,626 3,785 3,569
## 63 Korea, South 2012 3,472 4,103 3,700 3,666 3,494 3,589 3,516 3,132 3,585
## 81 Korea, South 2013 3,389 3,521 3,413 3,162 3,270 3,446 3,470 3,439 3,164
## 99 Korea, South 2014 3,712 3,617 3,830 3,811 3,683 3,767 3,670 3,801 3,644
## Oct Nov Dec
## 9 2,842 2,797 2,739
## 27 3,309 3,384 3,370
## 45 3,471 3,820 3,785
## 63 3,512 3,195 3,318
## 81 3,582 4,040 3,789
## 99 3,604 3,680 3,654
head(ChinaIm)
## Country Year Jan Feb Mar Apr May Jun Jul Aug
## 3 China 2009 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21 China 2010 27,758 27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39 China 2011 32,425 33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57 China 2012 34,310 33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75 China 2013 38,829 38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93 China 2014 37,564 37,969 37,515 38,060 38,220 38,932 37,919 39,000
## Sep Oct Nov Dec
## 3 24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659
colnames(KoreaEx) = c("Country", "Year", "Month", "Exports")
colnames(ChinaIm) = c("Country", "Year", "Month", "Imports")
head(KoreaEx)
## Country Year Month Exports NA NA NA NA NA NA
## 9 Korea, South 2009 1,807 2,074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977 3,073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507 3,146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472 4,103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389 3,521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712 3,617 3,830 3,811 3,683 3,767 3,670 3,801
## NA NA NA NA
## 9 2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
head(ChinaIm)
## Country Year Month Imports NA NA NA NA NA NA
## 3 China 2009 24,892 23,906 25,560 23,916 23,831 23,379 23,726 23,774
## 21 China 2010 27,758 27,869 28,639 28,210 30,070 31,916 31,129 32,138
## 39 China 2011 32,425 33,802 32,159 32,010 33,039 33,167 33,420 33,500
## 57 China 2012 34,310 33,794 37,254 35,548 34,546 35,257 35,683 34,595
## 75 China 2013 38,829 38,012 32,893 34,556 36,298 35,929 36,178 37,177
## 93 China 2014 37,564 37,969 37,515 38,060 38,220 38,932 37,919 39,000
## NA NA NA NA
## 3 24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659
KoreaEx$Exports <- as.numeric(gsub(",","",KoreaEx$Exports))
KoreaEx$Year <- as.numeric(gsub(",","",KoreaEx$Year))
head(KoreaEx)
## Country Year Month Exports NA NA NA NA NA NA
## 9 Korea, South 2009 1,807 2074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977 3073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507 3146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472 4103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389 3521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712 3617 3,830 3,811 3,683 3,767 3,670 3,801
## NA NA NA NA
## 9 2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
ChinaIm$Imports <- as.numeric(gsub(",","",ChinaIm$Imports))
ChinaIm$Year <- as.numeric(gsub(",","",ChinaIm$Year))
head(KoreaEx)
## Country Year Month Exports NA NA NA NA NA NA
## 9 Korea, South 2009 1,807 2074 1,835 2,024 2,474 2,295 2,372 2,642
## 27 Korea, South 2010 2,977 3073 3,350 3,157 3,291 3,230 3,372 3,146
## 45 Korea, South 2011 3,507 3146 3,520 3,861 3,881 3,490 3,626 3,785
## 63 Korea, South 2012 3,472 4103 3,700 3,666 3,494 3,589 3,516 3,132
## 81 Korea, South 2013 3,389 3521 3,413 3,162 3,270 3,446 3,470 3,439
## 99 Korea, South 2014 3,712 3617 3,830 3,811 3,683 3,767 3,670 3,801
## NA NA NA NA
## 9 2,711 2,842 2,797 2,739
## 27 3,161 3,309 3,384 3,370
## 45 3,569 3,471 3,820 3,785
## 63 3,585 3,512 3,195 3,318
## 81 3,164 3,582 4,040 3,789
## 99 3,644 3,604 3,680 3,654
head(ChinaIm)
## Country Year Month Imports NA NA NA NA NA NA
## 3 China 2009 24,892 23906 25,560 23,916 23,831 23,379 23,726 23,774
## 21 China 2010 27,758 27869 28,639 28,210 30,070 31,916 31,129 32,138
## 39 China 2011 32,425 33802 32,159 32,010 33,039 33,167 33,420 33,500
## 57 China 2012 34,310 33794 37,254 35,548 34,546 35,257 35,683 34,595
## 75 China 2013 38,829 38012 32,893 34,556 36,298 35,929 36,178 37,177
## 93 China 2014 37,564 37969 37,515 38,060 38,220 38,932 37,919 39,000
## NA NA NA NA
## 3 24,629 25,585 25,958 27,218
## 21 31,716 31,552 31,972 31,985
## 39 33,168 34,265 34,094 34,322
## 57 35,611 35,479 36,623 36,919
## 75 37,068 37,133 37,533 38,828
## 93 40,223 40,306 40,388 40,659
###summarise(KoreaEx, Exports = sum(Exports))
###summarise(ChinaIm, Imports = sum(Imports))
###summarise(KoreaEx, Exports = mean(Exports))
###summarise(ChinaIm, Imports = mean(Imports))
###KoreaEx %>% select(Month, Year, Exports) %>% head
###ChinaIm %>% select(Month, Year, Imports) %>% head
NBA <- read.csv("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/7b51fddb868d151b14cfb69732898d0c49517fcf/NBA.txt", header=TRUE, sep = "\t", na.strings= c("","NA"))
head(NBA)
## Rk Season Lg ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL
## 1 1 2015-16 NBA 37 39 18 32 37 46 33 28 34 59 33 35 42 14
## 2 2 2014-15 NBA 60 40 38 50 33 53 50 30 32 67 56 38 56 21
## 3 3 2013-14 NBA 38 25 44 48 43 33 49 36 29 51 54 56 57 27
## 4 4 2012-13 NBA 44 41 49 45 21 24 41 57 29 47 45 49 56 45
## 5 5 2011-12 NBA 40 39 22 50 7 21 36 38 25 23 34 42 40 41
## 6 6 2010-11 NBA 44 56 24 62 34 19 57 50 30 36 43 37 32 57
## MEM MIA MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 1 39 38 28 21 24 27 44 28 9 17 35 25 56 44 30 30
## 2 55 37 41 16 45 17 45 25 18 39 51 29 55 49 38 46
## 3 50 54 15 40 34 37 59 23 19 48 54 28 62 48 25 44
## 4 56 66 38 31 27 54 60 20 34 25 33 28 58 34 43 29
## 5 41 46 31 26 21 36 47 37 35 33 28 22 50 23 36 20
## 6 46 58 35 17 46 42 55 52 41 40 48 24 61 22 39 23
df1 <- data.frame(NBA)
df1 <- df1[-c(1,21,42,63),]
head(df1)
## Rk Season Lg ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL
## 2 2 2014-15 NBA 60 40 38 50 33 53 50 30 32 67 56 38 56 21
## 3 3 2013-14 NBA 38 25 44 48 43 33 49 36 29 51 54 56 57 27
## 4 4 2012-13 NBA 44 41 49 45 21 24 41 57 29 47 45 49 56 45
## 5 5 2011-12 NBA 40 39 22 50 7 21 36 38 25 23 34 42 40 41
## 6 6 2010-11 NBA 44 56 24 62 34 19 57 50 30 36 43 37 32 57
## 7 7 2009-10 NBA 53 50 12 41 44 61 55 53 27 26 42 32 29 57
## MEM MIA MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 2 55 37 41 16 45 17 45 25 18 39 51 29 55 49 38 46
## 3 50 54 15 40 34 37 59 23 19 48 54 28 62 48 25 44
## 4 56 66 38 31 27 54 60 20 34 25 33 28 58 34 43 29
## 5 41 46 31 26 21 36 47 37 35 33 28 22 50 23 36 20
## 6 46 58 35 17 46 42 55 52 41 40 48 24 61 22 39 23
## 7 40 47 46 15 37 29 50 59 27 54 50 25 50 40 53 26
df1 = subset(df1, select = -Rk)
df1 = subset(df1, select = -Lg)
head(df1)
## Season ATL BOS BRK CHI CHO CLE DAL DEN DET GSW HOU IND LAC LAL MEM MIA
## 2 2014-15 60 40 38 50 33 53 50 30 32 67 56 38 56 21 55 37
## 3 2013-14 38 25 44 48 43 33 49 36 29 51 54 56 57 27 50 54
## 4 2012-13 44 41 49 45 21 24 41 57 29 47 45 49 56 45 56 66
## 5 2011-12 40 39 22 50 7 21 36 38 25 23 34 42 40 41 41 46
## 6 2010-11 44 56 24 62 34 19 57 50 30 36 43 37 32 57 46 58
## 7 2009-10 53 50 12 41 44 61 55 53 27 26 42 32 29 57 40 47
## MIL MIN NOP NYK OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
## 2 41 16 45 17 45 25 18 39 51 29 55 49 38 46
## 3 15 40 34 37 59 23 19 48 54 28 62 48 25 44
## 4 38 31 27 54 60 20 34 25 33 28 58 34 43 29
## 5 31 26 21 36 47 37 35 33 28 22 50 23 36 20
## 6 35 17 46 42 55 52 41 40 48 24 61 22 39 23
## 7 46 15 37 29 50 59 27 54 50 25 50 40 53 26
###NBAwins <- gather(df1, "Team", "Wins", 2:31, na.rm = TRUE)
###colnames(NBAwins) <- c("Season", "Team", "Wins")
###NBAwins$Wins = c(as.numeric(NBAwins$Wins))
###head(NBAwins)
###summarise(NBAwins, Wins = sum(Wins))
###summarise(NBAwins, Wins = mean(Wins))
###filter(NBAwins, Team == "NYK")