Please resist the urge to spend a your time manually entering data into a spreadsheet. No matter the format, there is almost certainly a way to get your data into R. Here are a few useful techniques:

I. Downloading a .csv and importing it into R.

  1. Download the .csv file (Example)
  2. Upload it into R
  3. Use read.csv()

II. Importing an .xls or .xlsx file

  1. Save it as a .csv file
  2. Upload it into R
  3. Use read.csv()

III. Pasting data into excel and separating into columns

  1. Copy the data (Example)
  2. Paste it into excel (you may need to use “Paste Special”)
  3. If necessary, use “Data/Text to Columns”
  4. Save it as a .csv

Note: An Alternative in this case:

rayfair <- read.table('https://fairmodel.econ.yale.edu/vote2012/pres.txt', header=TRUE)
head(rayfair)
##   YEAR     VP  I DPER   DUR WAR       G     P  Z
## 1 1916 51.682  1    1  0.00   0   2.229 4.252  3
## 2 1920 36.148  1    0  1.00   1 -11.463 0.000  0
## 3 1924 41.737 -1   -1  0.00   0  -3.872 5.161 10
## 4 1928 41.244 -1    0 -1.00   0   4.623 0.183  7
## 5 1932 59.149 -1   -1 -1.25   0 -14.586 7.164  4
## 6 1936 62.226  1    1  0.00   0  11.836 2.475  9

IV. Scraping a Website

library(rvest); library(dplyr)

html_bbref <- read_html('http://www.basketball-reference.com/leagues/NBA_2016_per_game.html')

temp <- html_bbref  %>%
  html_nodes("table") %>% .[[1]] %>%
  html_table()

head(temp)
##   Rk        Player Pos Age  Tm  G GS   MP  FG  FGA  FG%  3P 3PA  3P%  2P
## 1  1    Quincy Acy  PF  25 SAC 59 29 14.8 2.0  3.6 .556 0.3 0.8 .388 1.7
## 2  2  Jordan Adams  SG  21 MEM  2  0  7.5 1.0  3.0 .333 0.0 0.5 .000 1.0
## 3  3  Steven Adams   C  22 OKC 80 80 25.2 3.3  5.3 .613 0.0 0.0      3.3
## 4  4 Arron Afflalo  SG  30 NYK 71 57 33.4 5.0 11.3 .443 1.3 3.4 .382 3.7
## 5  5 Alexis Ajinca   C  27 NOP 59 17 14.6 2.5  5.3 .476 0.0 0.0 .000 2.5
## 6  6  Cole Aldrich   C  27 LAC 60  5 13.3 2.2  3.8 .596 0.0 0.0      2.2
##   2PA  2P% eFG%  FT FTA  FT% ORB DRB TRB AST STL BLK TOV  PF  PTS
## 1 2.8 .606 .600 0.8 1.2 .735 1.1 2.1 3.2 0.5 0.5 0.4 0.5 1.7  5.2
## 2 2.5 .400 .333 1.5 2.5 .600 0.0 1.0 1.0 1.5 1.5 0.0 1.0 1.0  3.5
## 3 5.3 .613 .613 1.4 2.5 .582 2.7 3.9 6.7 0.8 0.5 1.1 1.1 2.8  8.0
## 4 7.9 .469 .500 1.5 1.8 .840 0.3 3.4 3.7 2.0 0.4 0.1 1.2 2.0 12.8
## 5 5.3 .478 .476 0.9 1.1 .839 1.3 3.3 4.6 0.5 0.3 0.6 0.9 2.3  6.0
## 6 3.8 .596 .596 1.0 1.4 .714 1.4 3.4 4.8 0.8 0.8 1.1 1.1 2.3  5.5

V. Scraping a Series of Websites

bbdata <- temp
for (year in 2000:2015){
  url <- paste0("http://www.basketball-reference.com/leagues/NBA_", year, "per_game.html")
  
  temp <- html_bbref  %>%
    html_nodes("table") %>% .[[1]] %>%
    html_table()
  
  bbdata <- rbind(bbdata, temp)
  
}
nrow(bbdata)
## [1] 10217

V. Using an API

library(jsonlite)
primary_markets <- fromJSON("http://table-cache1.predictwise.com/latest/group_23.json")

str(primary_markets)
## List of 3
##  $ id    : int 23
##  $ name  : chr "GOP Primaries"
##  $ tables:'data.frame':  19 obs. of  10 variables:
##   ..$ id                : chr [1:19] "1698" "1699" "1700" "1701" ...
##   ..$ name              : chr [1:19] "Connecticut Primary - DEM" "Delaware Primary - DEM" "Maryland Primary - DEM" "Pennsylvania Primary - DEM" ...
##   ..$ notes             : chr [1:19] "" "" "" "" ...
##   ..$ timestamp         : chr [1:19] "04-27-2016 11:56PM" "04-27-2016 11:56PM" "04-27-2016 11:56PM" "04-28-2016 3:12PM" ...
##   ..$ suppress_timestamp: chr [1:19] "0" "0" "0" "0" ...
##   ..$ header            :List of 19
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   .. ..$ : chr [1:7] "Outcome" "PredictWise" "Derived Betfair Price" "Betfair Back" ...
##   ..$ table             :List of 19
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "0 %" "0 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "0 %" "0 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "0 %" "0 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "0 %" "0 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "0 %" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "0 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "74 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "32 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "96 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "86 %" ...
##   .. ..$ : chr [1:3, 1:7] "Donald Trump" "Ted Cruz" "John Kasich" "74 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "70 %" "30 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "53 %" "47 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "85 %" "15 %" ...
##   .. ..$ : chr [1:2, 1:7] "Hillary Clinton" "Bernie Sanders" "18 %" "82 %" ...
##   ..$ default_sort      : chr [1:19] "2" "2" "2" "2" ...
##   ..$ default_sort_dir  : chr [1:19] "desc" "desc" "desc" "desc" ...
##   ..$ shade_cols        :List of 19
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
##   .. ..$ : chr "1"
primary_markets$tables$name[11]
## [1] "Indiana Primary - GOP"
primary_markets$tables$header[11]
## [[1]]
## [1] "Outcome"               "PredictWise"           "Derived Betfair Price"
## [4] "Betfair Back"          "Betfair Lay"           "Pollster"             
## [7] "Derived PredictIt"
primary_markets$tables$table[11]
## [[1]]
##      [,1]           [,2]   [,3] [,4] [,5] [,6] [,7]     
## [1,] "Donald Trump" "74 %" NA   NA   NA   NA   "$ 0.755"
## [2,] "Ted Cruz"     "26 %" NA   NA   NA   NA   "$ 0.265"
## [3,] "John Kasich"  "0 %"  NA   NA   NA   NA   "$ 0.005"