library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
sqldf("select * from mtcars where mpg<15 order by mpg")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## 2 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## 3 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
## 4 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
## 5 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
sqldf("select mpg,
case
when mpg >15 then 'gas guzzler'
else 'fuel economy'
end typeofcar
from mtcars")
## mpg typeofcar
## 1 21.0 gas guzzler
## 2 21.0 gas guzzler
## 3 22.8 gas guzzler
## 4 21.4 gas guzzler
## 5 18.7 gas guzzler
## 6 18.1 gas guzzler
## 7 14.3 fuel economy
## 8 24.4 gas guzzler
## 9 22.8 gas guzzler
## 10 19.2 gas guzzler
## 11 17.8 gas guzzler
## 12 16.4 gas guzzler
## 13 17.3 gas guzzler
## 14 15.2 gas guzzler
## 15 10.4 fuel economy
## 16 10.4 fuel economy
## 17 14.7 fuel economy
## 18 32.4 gas guzzler
## 19 30.4 gas guzzler
## 20 33.9 gas guzzler
## 21 21.5 gas guzzler
## 22 15.5 gas guzzler
## 23 15.2 gas guzzler
## 24 13.3 fuel economy
## 25 19.2 gas guzzler
## 26 27.3 gas guzzler
## 27 26.0 gas guzzler
## 28 30.4 gas guzzler
## 29 15.8 gas guzzler
## 30 19.7 gas guzzler
## 31 15.0 fuel economy
## 32 21.4 gas guzzler
sqldf("select avg(mpg) from mtcars")
## avg(mpg)
## 1 20.09062
sqldf("select * from mtcars
where mpg >
(select avg(mpg) from mtcars)")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 6 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 7 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 8 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 9 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 10 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 11 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 12 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 13 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 14 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
sqldf("select * from mtcars
where mpg >
(select median(mpg) from mtcars)")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 6 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 7 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 8 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 9 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 10 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 11 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 12 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 13 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 14 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 15 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
sqldf("select * from mtcars
where mpg >
(select median(mpg) from mtcars) order by mpg desc")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 2 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 3 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 4 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 5 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 6 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 8 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 11 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 12 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## 13 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 14 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 15 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
sqldf("select * from mtcars limit 5")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
sqldf("select avg(mpg),cyl from mtcars group by cyl")
## avg(mpg) cyl
## 1 26.66364 4
## 2 19.74286 6
## 3 15.10000 8
sqldf("select distinct(cyl) from mtcars ")
## cyl
## 1 6
## 2 4
## 3 8
sqldf("select count(cyl),cyl from mtcars group by cyl")
## count(cyl) cyl
## 1 11 4
## 2 7 6
## 3 14 8
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
rm(list=ls())
df1=NULL
df1$var1=sample(100,20,F)
df1$var2=sample(LETTERS,20,T)
df1=as.data.frame(df1)
df1
## var1 var2
## 1 24 M
## 2 31 Z
## 3 51 C
## 4 72 Q
## 5 94 U
## 6 98 O
## 7 83 C
## 8 91 H
## 9 13 O
## 10 52 G
## 11 36 T
## 12 79 C
## 13 5 B
## 14 97 C
## 15 60 I
## 16 47 A
## 17 20 R
## 18 10 C
## 19 67 G
## 20 8 T
df2=NULL
df2$var1=sample(100,60,F)
df2$var2=sample(LETTERS,60,T)
df2=as.data.frame(df2)
df2
## var1 var2
## 1 98 Z
## 2 14 A
## 3 60 C
## 4 41 C
## 5 54 A
## 6 66 R
## 7 72 G
## 8 91 P
## 9 86 L
## 10 90 J
## 11 26 D
## 12 58 F
## 13 62 C
## 14 81 U
## 15 93 V
## 16 53 U
## 17 39 V
## 18 80 P
## 19 23 T
## 20 61 R
## 21 38 H
## 22 1 D
## 23 99 V
## 24 3 E
## 25 78 U
## 26 27 R
## 27 85 F
## 28 67 U
## 29 36 P
## 30 7 H
## 31 71 C
## 32 33 I
## 33 11 A
## 34 29 S
## 35 59 Q
## 36 68 Y
## 37 34 Q
## 38 48 B
## 39 51 I
## 40 30 T
## 41 82 F
## 42 6 R
## 43 9 T
## 44 17 Y
## 45 44 L
## 46 69 W
## 47 4 T
## 48 31 R
## 49 87 R
## 50 25 N
## 51 42 K
## 52 83 X
## 53 8 T
## 54 13 S
## 55 70 L
## 56 47 V
## 57 100 O
## 58 97 E
## 59 10 J
## 60 52 O
library(sqldf)
#Left Join
#A=DF1 B=DF2
#Values in B corresponding to values in A
sqldf('select df1.*, df2.* from
df1 left join df2 on df1.var1=df2.var1')
## var1 var2 var1 var2
## 1 24 M NA <NA>
## 2 31 Z 31 R
## 3 51 C 51 I
## 4 72 Q 72 G
## 5 94 U NA <NA>
## 6 98 O 98 Z
## 7 83 C 83 X
## 8 91 H 91 P
## 9 13 O 13 S
## 10 52 G 52 O
## 11 36 T 36 P
## 12 79 C NA <NA>
## 13 5 B NA <NA>
## 14 97 C 97 E
## 15 60 I 60 C
## 16 47 A 47 V
## 17 20 R NA <NA>
## 18 10 C 10 J
## 19 67 G 67 U
## 20 8 T 8 T
#Left Outer
merge(x = df1, y = df2, by = "var1", all.x = TRUE)
## var1 var2.x var2.y
## 1 5 B <NA>
## 2 8 T T
## 3 10 C J
## 4 13 O S
## 5 20 R <NA>
## 6 24 M <NA>
## 7 31 Z R
## 8 36 T P
## 9 47 A V
## 10 51 C I
## 11 52 G O
## 12 60 I C
## 13 67 G U
## 14 72 Q G
## 15 79 C <NA>
## 16 83 C X
## 17 91 H P
## 18 94 U <NA>
## 19 97 C E
## 20 98 O Z
#Values only in A and not in B
sqldf('select df1.*, df2.* from
df1 left join df2 on df1.var1=df2.var1
where df2.var1 is NULL')
## var1 var2 var1 var2
## 1 24 M NA <NA>
## 2 94 U NA <NA>
## 3 79 C NA <NA>
## 4 5 B NA <NA>
## 5 20 R NA <NA>
#Trying Right Join- not supported so interchange the datasets
#A=DF1 B=DF2
#Values in B corresponding to values in A
sqldf('select df1.*, df2.* from
df2 left join df1 on df1.var1=df2.var1')
## var1 var2 var1 var2
## 1 98 O 98 Z
## 2 NA <NA> 14 A
## 3 60 I 60 C
## 4 NA <NA> 41 C
## 5 NA <NA> 54 A
## 6 NA <NA> 66 R
## 7 72 Q 72 G
## 8 91 H 91 P
## 9 NA <NA> 86 L
## 10 NA <NA> 90 J
## 11 NA <NA> 26 D
## 12 NA <NA> 58 F
## 13 NA <NA> 62 C
## 14 NA <NA> 81 U
## 15 NA <NA> 93 V
## 16 NA <NA> 53 U
## 17 NA <NA> 39 V
## 18 NA <NA> 80 P
## 19 NA <NA> 23 T
## 20 NA <NA> 61 R
## 21 NA <NA> 38 H
## 22 NA <NA> 1 D
## 23 NA <NA> 99 V
## 24 NA <NA> 3 E
## 25 NA <NA> 78 U
## 26 NA <NA> 27 R
## 27 NA <NA> 85 F
## 28 67 G 67 U
## 29 36 T 36 P
## 30 NA <NA> 7 H
## 31 NA <NA> 71 C
## 32 NA <NA> 33 I
## 33 NA <NA> 11 A
## 34 NA <NA> 29 S
## 35 NA <NA> 59 Q
## 36 NA <NA> 68 Y
## 37 NA <NA> 34 Q
## 38 NA <NA> 48 B
## 39 51 C 51 I
## 40 NA <NA> 30 T
## 41 NA <NA> 82 F
## 42 NA <NA> 6 R
## 43 NA <NA> 9 T
## 44 NA <NA> 17 Y
## 45 NA <NA> 44 L
## 46 NA <NA> 69 W
## 47 NA <NA> 4 T
## 48 31 Z 31 R
## 49 NA <NA> 87 R
## 50 NA <NA> 25 N
## 51 NA <NA> 42 K
## 52 83 C 83 X
## 53 8 T 8 T
## 54 13 O 13 S
## 55 NA <NA> 70 L
## 56 47 A 47 V
## 57 NA <NA> 100 O
## 58 97 C 97 E
## 59 10 C 10 J
## 60 52 G 52 O
#inner join
sqldf('select df1.*, df2.* from
df1 inner join df2 on df1.var1=df2.var1')
## var1 var2 var1 var2
## 1 31 Z 31 R
## 2 51 C 51 I
## 3 72 Q 72 G
## 4 98 O 98 Z
## 5 83 C 83 X
## 6 91 H 91 P
## 7 13 O 13 S
## 8 52 G 52 O
## 9 36 T 36 P
## 10 97 C 97 E
## 11 60 I 60 C
## 12 47 A 47 V
## 13 10 C 10 J
## 14 67 G 67 U
## 15 8 T 8 T
merge(df1,df2,by="var1")
## var1 var2.x var2.y
## 1 8 T T
## 2 10 C J
## 3 13 O S
## 4 31 Z R
## 5 36 T P
## 6 47 A V
## 7 51 C I
## 8 52 G O
## 9 60 I C
## 10 67 G U
## 11 72 Q G
## 12 83 C X
## 13 91 H P
## 14 97 C E
## 15 98 O Z
#trying outerjoin
merge(x = df1, y = df2, by = "var1", all = TRUE)
## var1 var2.x var2.y
## 1 1 <NA> D
## 2 3 <NA> E
## 3 4 <NA> T
## 4 5 B <NA>
## 5 6 <NA> R
## 6 7 <NA> H
## 7 8 T T
## 8 9 <NA> T
## 9 10 C J
## 10 11 <NA> A
## 11 13 O S
## 12 14 <NA> A
## 13 17 <NA> Y
## 14 20 R <NA>
## 15 23 <NA> T
## 16 24 M <NA>
## 17 25 <NA> N
## 18 26 <NA> D
## 19 27 <NA> R
## 20 29 <NA> S
## 21 30 <NA> T
## 22 31 Z R
## 23 33 <NA> I
## 24 34 <NA> Q
## 25 36 T P
## 26 38 <NA> H
## 27 39 <NA> V
## 28 41 <NA> C
## 29 42 <NA> K
## 30 44 <NA> L
## 31 47 A V
## 32 48 <NA> B
## 33 51 C I
## 34 52 G O
## 35 53 <NA> U
## 36 54 <NA> A
## 37 58 <NA> F
## 38 59 <NA> Q
## 39 60 I C
## 40 61 <NA> R
## 41 62 <NA> C
## 42 66 <NA> R
## 43 67 G U
## 44 68 <NA> Y
## 45 69 <NA> W
## 46 70 <NA> L
## 47 71 <NA> C
## 48 72 Q G
## 49 78 <NA> U
## 50 79 C <NA>
## 51 80 <NA> P
## 52 81 <NA> U
## 53 82 <NA> F
## 54 83 C X
## 55 85 <NA> F
## 56 86 <NA> L
## 57 87 <NA> R
## 58 90 <NA> J
## 59 91 H P
## 60 93 <NA> V
## 61 94 U <NA>
## 62 97 C E
## 63 98 O Z
## 64 99 <NA> V
## 65 100 <NA> O
#cbind(df1,df2)