library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
b=sqldf("select avg(mpg),cyl,gear,avg(hp) from mtcars where cyl=4 group by gear order by gear")
## Loading required package: tcltk
b
## avg(mpg) cyl gear avg(hp)
## 1 21.500 4 3 97
## 2 26.925 4 4 76
## 3 28.200 4 5 102
diamonds=read.table(
"BigDiamonds.csv",
header = T,
sep = ",",
nrows =100)
str(diamonds)
## 'data.frame': 100 obs. of 12 variables:
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
## $ clarity : Factor w/ 7 levels "I1","I2","SI1",..: 1 1 2 1 1 4 4 1 4 3 ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : Factor w/ 4 levels "EGL","EGL USA",..: 3 3 3 3 1 3 3 3 4 3 ...
## $ measurements: Factor w/ 100 levels "2.32 x 3.88 x 4.04",..: 59 70 99 29 96 17 12 55 46 38 ...
## $ price : logi NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
library(data.table)
library(readr)
library(microbenchmark)
microbenchmark(
fread("BigDiamonds.csv"),
read_csv("BigDiamonds.csv"),
read.csv("BigDiamonds.csv"),times=1)
## Warning: 597311 problems parsing 'BigDiamonds.csv'. See problems(...) for
## more details.
##
Read 30.1% of 598024 rows
Read 40.1% of 598024 rows
Read 50.2% of 598024 rows
Read 68.6% of 598024 rows
Read 92.0% of 598024 rows
Read 598024 rows and 12 (of 12) columns from 0.044 GB file in 00:00:07
## Unit: seconds
## expr min lq mean median
## fread("BigDiamonds.csv") 6.831168 6.831168 6.831168 6.831168
## read_csv("BigDiamonds.csv") 4.382196 4.382196 4.382196 4.382196
## read.csv("BigDiamonds.csv") 18.757744 18.757744 18.757744 18.757744
## uq max neval
## 6.831168 6.831168 1
## 4.382196 4.382196 1
## 18.757744 18.757744 1
library(readxl)
newdata=read_excel("ie_data.xls",sheet = "Data",skip = 7)
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0f 03 00 00 00 00 00 00 00 00 00 00 5f 52 65 67 72 65 73 73 69 6f 6e 5f 49 6e 74 1e 01 00
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 00 00 00 00 00 00 00 06 3b 00 00 09 06 14 06 00 00 0b 00
## DEFINEDNAME: 00 00 00 0d 0b 00 00 00 00 00 00 00 00 00 00 50 72 69 6e 74 5f 41 72 65 61 5f 4d 49 3b 00 00 09 06 14 06 00 00 0b 00
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 2] got `July 2015 P is July 9
## close`
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 5] got `June July 2015 CPI
## estimated`
## Warning in xls_cols(path, sheet, col_names = col_names, col_types =
## col_types, : Expecting numeric in [1736, 7] got `July 2015 long rate is
## July 6 value`
head(newdata,30)
## Date P D E CPI Fraction Rate GS10 Price
## 1 1871.01 4.44 0.2600 0.4000 12.46406 1871.042 5.320000 85.03410
## 2 1871.02 4.50 0.2600 0.4000 12.84464 1871.125 5.323333 83.62964
## 3 1871.03 4.61 0.2600 0.4000 13.03497 1871.208 5.326667 84.42295
## 4 1871.04 4.74 0.2600 0.4000 12.55923 1871.292 5.330000 90.09178
## 5 1871.05 4.86 0.2600 0.4000 12.27381 1871.375 5.333333 94.52061
## 6 1871.06 4.82 0.2600 0.4000 12.08348 1871.458 5.336667 95.21923
## 7 1871.07 4.73 0.2600 0.4000 12.08348 1871.542 5.340000 93.44128
## 8 1871.08 4.79 0.2600 0.4000 11.89323 1871.625 5.343333 96.14027
## 9 1871.09 4.84 0.2600 0.4000 12.17865 1871.708 5.346667 94.86719
## 10 1871.10 4.59 0.2600 0.4000 12.36890 1871.792 5.350000 88.58322
## 11 1871.11 4.64 0.2600 0.4000 12.36890 1871.875 5.353333 89.54818
## 12 1871.12 4.74 0.2600 0.4000 12.65439 1871.958 5.356667 89.41426
## 13 1872.01 4.86 0.2633 0.4025 12.65439 1872.042 5.360000 91.67791
## 14 1872.02 4.88 0.2667 0.4050 12.65439 1872.125 5.378333 92.05518
## 15 1872.03 5.04 0.2700 0.4075 12.84464 1872.208 5.396667 93.66520
## 16 1872.04 5.18 0.2733 0.4100 13.13014 1872.292 5.415000 94.17382
## 17 1872.05 5.18 0.2767 0.4125 13.13014 1872.375 5.433333 94.17382
## 18 1872.06 5.13 0.2800 0.4150 13.03497 1872.458 5.451667 93.94571
## 19 1872.07 5.10 0.2833 0.4175 12.84464 1872.542 5.470000 94.78026
## 20 1872.08 5.04 0.2867 0.4200 12.93981 1872.625 5.488333 92.97634
## 21 1872.09 4.95 0.2900 0.4225 13.03497 1872.708 5.506667 90.64937
## 22 1872.10 4.97 0.2933 0.4250 12.74948 1872.792 5.525000 93.05372
## 23 1872.11 4.95 0.2967 0.4275 13.13014 1872.875 5.543333 89.99236
## 24 1872.12 5.07 0.3000 0.4300 12.93981 1872.958 5.561667 93.52977
## 25 1873.01 5.11 0.3025 0.4325 12.93981 1873.042 5.580000 94.26768
## 26 1873.02 5.15 0.3050 0.4350 13.22522 1873.125 5.570833 92.95526
## 27 1873.03 5.11 0.3075 0.4375 13.22522 1873.208 5.561667 92.23328
## 28 1873.04 5.04 0.3100 0.4400 13.22522 1873.292 5.552500 90.96981
## 29 1873.05 5.05 0.3125 0.4425 12.93981 1873.375 5.543333 93.16082
## 30 1873.06 4.98 0.3150 0.4450 12.55923 1873.458 5.534167 94.65339
## Dividend Earnings CAPE
## 1 4.979474 7.660729 NA
## 2 4.831935 7.433746 NA
## 3 4.761381 7.325202 NA
## 4 4.941743 7.602682 NA
## 5 5.056658 7.779474 NA
## 6 5.136307 7.902011 NA
## 7 5.136307 7.902011 NA
## 8 5.218470 8.028415 NA
## 9 5.096171 7.840264 NA
## 10 5.017786 7.719670 NA
## 11 5.017786 7.719670 NA
## 12 4.904579 7.545507 NA
## 13 4.966830 7.592666 NA
## 14 5.030967 7.639826 NA
## 15 5.017778 7.573129 NA
## 16 4.968669 7.453913 NA
## 17 5.030482 7.499363 NA
## 18 5.127641 7.599897 NA
## 19 5.264951 7.758972 NA
## 20 5.288952 7.748028 NA
## 21 5.310771 7.737244 NA
## 22 5.491480 7.957310 NA
## 23 5.394087 7.772067 NA
## 24 5.534306 7.932505 NA
## 25 5.580425 7.978625 NA
## 26 5.505117 7.851561 NA
## 27 5.550241 7.896685 NA
## 28 5.595365 7.941809 NA
## 29 5.764902 8.163101 NA
## 30 5.987112 8.457983 NA
newdata2=newdata[1:1730,]
newdata3=sqldf("select * from newdata limit 1729")
newdata4=sqldf("select * from newdata limit 1728")
str(newdata3)
## 'data.frame': 1729 obs. of 11 variables:
## $ Date : num 1871 1871 1871 1871 1871 ...
## $ P : num 4.44 4.5 4.61 4.74 4.86 4.82 4.73 4.79 4.84 4.59 ...
## $ D : num 0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 0.26 ...
## $ E : num 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 ...
## $ CPI : num 12.5 12.8 13 12.6 12.3 ...
## $ Fraction : num 1871 1871 1871 1871 1871 ...
## $ Rate GS10: num 5.32 5.32 5.33 5.33 5.33 ...
## $ Price : num 85 83.6 84.4 90.1 94.5 ...
## $ Dividend : num 4.98 4.83 4.76 4.94 5.06 ...
## $ Earnings : num 7.66 7.43 7.33 7.6 7.78 ...
## $ CAPE : chr "NA" "NA" "NA" "NA" ...