library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
b=sqldf("select avg(mpg),cyl,gear,avg(hp) from mtcars where cyl=4 group by gear order by gear")
## Loading required package: tcltk
b
##   avg(mpg) cyl gear avg(hp)
## 1   21.500   4    3      97
## 2   26.925   4    4      76
## 3   28.200   4    5     102
diamonds=read.table(
  "BigDiamonds.csv",
  header = T,
  sep = ",",
  nrows =100)
str(diamonds)
## 'data.frame':    100 obs. of  12 variables:
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
##  $ color       : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
##  $ clarity     : Factor w/ 7 levels "I1","I2","SI1",..: 1 1 2 1 1 4 4 1 4 3 ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : Factor w/ 4 levels "EGL","EGL USA",..: 3 3 3 3 1 3 3 3 4 3 ...
##  $ measurements: Factor w/ 100 levels "2.32 x 3.88 x 4.04",..: 59 70 99 29 96 17 12 55 46 38 ...
##  $ price       : logi  NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
library(data.table)
library(readr)
library(microbenchmark)
microbenchmark(
fread("BigDiamonds.csv"),
read_csv("BigDiamonds.csv"),
read.csv("BigDiamonds.csv"),times=1)
## Warning: 597311 problems parsing 'BigDiamonds.csv'. See problems(...) for
## more details.
## 
Read 30.1% of 598024 rows
Read 40.1% of 598024 rows
Read 50.2% of 598024 rows
Read 68.6% of 598024 rows
Read 92.0% of 598024 rows
Read 598024 rows and 12 (of 12) columns from 0.044 GB file in 00:00:07
## Unit: seconds
##                         expr       min        lq      mean    median
##     fread("BigDiamonds.csv")  6.831168  6.831168  6.831168  6.831168
##  read_csv("BigDiamonds.csv")  4.382196  4.382196  4.382196  4.382196
##  read.csv("BigDiamonds.csv") 18.757744 18.757744 18.757744 18.757744
##         uq       max neval
##   6.831168  6.831168     1
##   4.382196  4.382196     1
##  18.757744 18.757744     1
library(readxl)
newdata=read_excel("ie_data.xls",sheet = "Data",skip = 7)
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00 
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 2] got `July 2015 P is July 9
## close`
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 5] got `June July 2015 CPI
## estimated`
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 7] got `July 2015 long rate is
## July 6 value`
head(newdata,30)
##       Date    P      D      E      CPI Fraction Rate GS10    Price
## 1  1871.01 4.44 0.2600 0.4000 12.46406 1871.042  5.320000 85.03410
## 2  1871.02 4.50 0.2600 0.4000 12.84464 1871.125  5.323333 83.62964
## 3  1871.03 4.61 0.2600 0.4000 13.03497 1871.208  5.326667 84.42295
## 4  1871.04 4.74 0.2600 0.4000 12.55923 1871.292  5.330000 90.09178
## 5  1871.05 4.86 0.2600 0.4000 12.27381 1871.375  5.333333 94.52061
## 6  1871.06 4.82 0.2600 0.4000 12.08348 1871.458  5.336667 95.21923
## 7  1871.07 4.73 0.2600 0.4000 12.08348 1871.542  5.340000 93.44128
## 8  1871.08 4.79 0.2600 0.4000 11.89323 1871.625  5.343333 96.14027
## 9  1871.09 4.84 0.2600 0.4000 12.17865 1871.708  5.346667 94.86719
## 10 1871.10 4.59 0.2600 0.4000 12.36890 1871.792  5.350000 88.58322
## 11 1871.11 4.64 0.2600 0.4000 12.36890 1871.875  5.353333 89.54818
## 12 1871.12 4.74 0.2600 0.4000 12.65439 1871.958  5.356667 89.41426
## 13 1872.01 4.86 0.2633 0.4025 12.65439 1872.042  5.360000 91.67791
## 14 1872.02 4.88 0.2667 0.4050 12.65439 1872.125  5.378333 92.05518
## 15 1872.03 5.04 0.2700 0.4075 12.84464 1872.208  5.396667 93.66520
## 16 1872.04 5.18 0.2733 0.4100 13.13014 1872.292  5.415000 94.17382
## 17 1872.05 5.18 0.2767 0.4125 13.13014 1872.375  5.433333 94.17382
## 18 1872.06 5.13 0.2800 0.4150 13.03497 1872.458  5.451667 93.94571
## 19 1872.07 5.10 0.2833 0.4175 12.84464 1872.542  5.470000 94.78026
## 20 1872.08 5.04 0.2867 0.4200 12.93981 1872.625  5.488333 92.97634
## 21 1872.09 4.95 0.2900 0.4225 13.03497 1872.708  5.506667 90.64937
## 22 1872.10 4.97 0.2933 0.4250 12.74948 1872.792  5.525000 93.05372
## 23 1872.11 4.95 0.2967 0.4275 13.13014 1872.875  5.543333 89.99236
## 24 1872.12 5.07 0.3000 0.4300 12.93981 1872.958  5.561667 93.52977
## 25 1873.01 5.11 0.3025 0.4325 12.93981 1873.042  5.580000 94.26768
## 26 1873.02 5.15 0.3050 0.4350 13.22522 1873.125  5.570833 92.95526
## 27 1873.03 5.11 0.3075 0.4375 13.22522 1873.208  5.561667 92.23328
## 28 1873.04 5.04 0.3100 0.4400 13.22522 1873.292  5.552500 90.96981
## 29 1873.05 5.05 0.3125 0.4425 12.93981 1873.375  5.543333 93.16082
## 30 1873.06 4.98 0.3150 0.4450 12.55923 1873.458  5.534167 94.65339
##    Dividend Earnings CAPE
## 1  4.979474 7.660729   NA
## 2  4.831935 7.433746   NA
## 3  4.761381 7.325202   NA
## 4  4.941743 7.602682   NA
## 5  5.056658 7.779474   NA
## 6  5.136307 7.902011   NA
## 7  5.136307 7.902011   NA
## 8  5.218470 8.028415   NA
## 9  5.096171 7.840264   NA
## 10 5.017786 7.719670   NA
## 11 5.017786 7.719670   NA
## 12 4.904579 7.545507   NA
## 13 4.966830 7.592666   NA
## 14 5.030967 7.639826   NA
## 15 5.017778 7.573129   NA
## 16 4.968669 7.453913   NA
## 17 5.030482 7.499363   NA
## 18 5.127641 7.599897   NA
## 19 5.264951 7.758972   NA
## 20 5.288952 7.748028   NA
## 21 5.310771 7.737244   NA
## 22 5.491480 7.957310   NA
## 23 5.394087 7.772067   NA
## 24 5.534306 7.932505   NA
## 25 5.580425 7.978625   NA
## 26 5.505117 7.851561   NA
## 27 5.550241 7.896685   NA
## 28 5.595365 7.941809   NA
## 29 5.764902 8.163101   NA
## 30 5.987112 8.457983   NA
newdata2=newdata[1:1730,]
newdata3=sqldf("select * from newdata limit 1729")
newdata4=sqldf("select   * from newdata limit 1728")


str(newdata3)
## 'data.frame':    1729 obs. of  11 variables:
##  $ Date     : num  1871 1871 1871 1871 1871 ...
##  $ P        : num  4.44 4.5 4.61 4.74 4.86 4.82 4.73 4.79 4.84 4.59 ...
##  $ D        : num  0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 ...
##  $ E        : num  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 ...
##  $ CPI      : num  12.5 12.8 13 12.6 12.3 ...
##  $ Fraction : num  1871 1871 1871 1871 1871 ...
##  $ Rate GS10: num  5.32 5.32 5.33 5.33 5.33 ...
##  $ Price    : num  85 83.6 84.4 90.1 94.5 ...
##  $ Dividend : num  4.98 4.83 4.76 4.94 5.06 ...
##  $ Earnings : num  7.66 7.43 7.33 7.6 7.78 ...
##  $ CAPE     : chr  "NA" "NA" "NA" "NA" ...