library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
sqldf("select * from mtcars where mpg<15 order by mpg")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
##    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1 10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## 2 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## 3 13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## 4 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## 5 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
sqldf("select mpg,
       case
       when mpg >15 then 'gas guzzler'
       else 'fuel economy'
      end typeofcar
      from mtcars")
##     mpg    typeofcar
## 1  21.0  gas guzzler
## 2  21.0  gas guzzler
## 3  22.8  gas guzzler
## 4  21.4  gas guzzler
## 5  18.7  gas guzzler
## 6  18.1  gas guzzler
## 7  14.3 fuel economy
## 8  24.4  gas guzzler
## 9  22.8  gas guzzler
## 10 19.2  gas guzzler
## 11 17.8  gas guzzler
## 12 16.4  gas guzzler
## 13 17.3  gas guzzler
## 14 15.2  gas guzzler
## 15 10.4 fuel economy
## 16 10.4 fuel economy
## 17 14.7 fuel economy
## 18 32.4  gas guzzler
## 19 30.4  gas guzzler
## 20 33.9  gas guzzler
## 21 21.5  gas guzzler
## 22 15.5  gas guzzler
## 23 15.2  gas guzzler
## 24 13.3 fuel economy
## 25 19.2  gas guzzler
## 26 27.3  gas guzzler
## 27 26.0  gas guzzler
## 28 30.4  gas guzzler
## 29 15.8  gas guzzler
## 30 19.7  gas guzzler
## 31 15.0 fuel economy
## 32 21.4  gas guzzler
sqldf("select avg(mpg) from mtcars")
##   avg(mpg)
## 1 20.09062
sqldf("select * from mtcars
where mpg >
      (select avg(mpg) from mtcars)")
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 6  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 7  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 8  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 9  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 10 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 11 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 12 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 13 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 14 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
sqldf("select * from mtcars
where mpg >
      (select median(mpg) from mtcars)")
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 6  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 7  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 8  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 9  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 10 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 11 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 12 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 13 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 14 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 15 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
sqldf("select * from mtcars
where mpg >
      (select median(mpg) from mtcars) order by mpg desc")
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 2  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 3  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 4  30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 5  27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 6  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 7  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 8  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 11 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 12 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 13 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 14 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 15 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
sqldf("select * from mtcars limit 5")
##    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
sqldf("select avg(mpg),cyl from mtcars group by cyl")
##   avg(mpg) cyl
## 1 26.66364   4
## 2 19.74286   6
## 3 15.10000   8
sqldf("select distinct(cyl) from mtcars ")
##   cyl
## 1   6
## 2   4
## 3   8
sqldf("select count(cyl),cyl from mtcars group by cyl")
##   count(cyl) cyl
## 1         11   4
## 2          7   6
## 3         14   8
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
rm(list=ls())

df1=NULL
df1$var1=sample(100,20,F)
df1$var2=sample(LETTERS,20,T)
df1=as.data.frame(df1)
df1
##    var1 var2
## 1    24    M
## 2    31    Z
## 3    51    C
## 4    72    Q
## 5    94    U
## 6    98    O
## 7    83    C
## 8    91    H
## 9    13    O
## 10   52    G
## 11   36    T
## 12   79    C
## 13    5    B
## 14   97    C
## 15   60    I
## 16   47    A
## 17   20    R
## 18   10    C
## 19   67    G
## 20    8    T
df2=NULL
df2$var1=sample(100,60,F)
df2$var2=sample(LETTERS,60,T)
df2=as.data.frame(df2)
df2
##    var1 var2
## 1    98    Z
## 2    14    A
## 3    60    C
## 4    41    C
## 5    54    A
## 6    66    R
## 7    72    G
## 8    91    P
## 9    86    L
## 10   90    J
## 11   26    D
## 12   58    F
## 13   62    C
## 14   81    U
## 15   93    V
## 16   53    U
## 17   39    V
## 18   80    P
## 19   23    T
## 20   61    R
## 21   38    H
## 22    1    D
## 23   99    V
## 24    3    E
## 25   78    U
## 26   27    R
## 27   85    F
## 28   67    U
## 29   36    P
## 30    7    H
## 31   71    C
## 32   33    I
## 33   11    A
## 34   29    S
## 35   59    Q
## 36   68    Y
## 37   34    Q
## 38   48    B
## 39   51    I
## 40   30    T
## 41   82    F
## 42    6    R
## 43    9    T
## 44   17    Y
## 45   44    L
## 46   69    W
## 47    4    T
## 48   31    R
## 49   87    R
## 50   25    N
## 51   42    K
## 52   83    X
## 53    8    T
## 54   13    S
## 55   70    L
## 56   47    V
## 57  100    O
## 58   97    E
## 59   10    J
## 60   52    O
library(sqldf)

#Left Join
#A=DF1 B=DF2
#Values in B corresponding to values in A
sqldf('select df1.*, df2.* from 
      df1 left join df2  on df1.var1=df2.var1')
##    var1 var2 var1 var2
## 1    24    M   NA <NA>
## 2    31    Z   31    R
## 3    51    C   51    I
## 4    72    Q   72    G
## 5    94    U   NA <NA>
## 6    98    O   98    Z
## 7    83    C   83    X
## 8    91    H   91    P
## 9    13    O   13    S
## 10   52    G   52    O
## 11   36    T   36    P
## 12   79    C   NA <NA>
## 13    5    B   NA <NA>
## 14   97    C   97    E
## 15   60    I   60    C
## 16   47    A   47    V
## 17   20    R   NA <NA>
## 18   10    C   10    J
## 19   67    G   67    U
## 20    8    T    8    T
#Left Outer
merge(x = df1, y = df2, by = "var1", all.x = TRUE)
##    var1 var2.x var2.y
## 1     5      B   <NA>
## 2     8      T      T
## 3    10      C      J
## 4    13      O      S
## 5    20      R   <NA>
## 6    24      M   <NA>
## 7    31      Z      R
## 8    36      T      P
## 9    47      A      V
## 10   51      C      I
## 11   52      G      O
## 12   60      I      C
## 13   67      G      U
## 14   72      Q      G
## 15   79      C   <NA>
## 16   83      C      X
## 17   91      H      P
## 18   94      U   <NA>
## 19   97      C      E
## 20   98      O      Z
#Values only in A and not in B

sqldf('select df1.*, df2.* from 
      df1 left join df2  on df1.var1=df2.var1
      where df2.var1 is NULL')
##   var1 var2 var1 var2
## 1   24    M   NA <NA>
## 2   94    U   NA <NA>
## 3   79    C   NA <NA>
## 4    5    B   NA <NA>
## 5   20    R   NA <NA>
#Trying Right Join- not supported so interchange the datasets
#A=DF1 B=DF2
#Values in B corresponding to values in A
sqldf('select df1.*, df2.* from 
      df2 left join df1  on df1.var1=df2.var1')
##    var1 var2 var1 var2
## 1    98    O   98    Z
## 2    NA <NA>   14    A
## 3    60    I   60    C
## 4    NA <NA>   41    C
## 5    NA <NA>   54    A
## 6    NA <NA>   66    R
## 7    72    Q   72    G
## 8    91    H   91    P
## 9    NA <NA>   86    L
## 10   NA <NA>   90    J
## 11   NA <NA>   26    D
## 12   NA <NA>   58    F
## 13   NA <NA>   62    C
## 14   NA <NA>   81    U
## 15   NA <NA>   93    V
## 16   NA <NA>   53    U
## 17   NA <NA>   39    V
## 18   NA <NA>   80    P
## 19   NA <NA>   23    T
## 20   NA <NA>   61    R
## 21   NA <NA>   38    H
## 22   NA <NA>    1    D
## 23   NA <NA>   99    V
## 24   NA <NA>    3    E
## 25   NA <NA>   78    U
## 26   NA <NA>   27    R
## 27   NA <NA>   85    F
## 28   67    G   67    U
## 29   36    T   36    P
## 30   NA <NA>    7    H
## 31   NA <NA>   71    C
## 32   NA <NA>   33    I
## 33   NA <NA>   11    A
## 34   NA <NA>   29    S
## 35   NA <NA>   59    Q
## 36   NA <NA>   68    Y
## 37   NA <NA>   34    Q
## 38   NA <NA>   48    B
## 39   51    C   51    I
## 40   NA <NA>   30    T
## 41   NA <NA>   82    F
## 42   NA <NA>    6    R
## 43   NA <NA>    9    T
## 44   NA <NA>   17    Y
## 45   NA <NA>   44    L
## 46   NA <NA>   69    W
## 47   NA <NA>    4    T
## 48   31    Z   31    R
## 49   NA <NA>   87    R
## 50   NA <NA>   25    N
## 51   NA <NA>   42    K
## 52   83    C   83    X
## 53    8    T    8    T
## 54   13    O   13    S
## 55   NA <NA>   70    L
## 56   47    A   47    V
## 57   NA <NA>  100    O
## 58   97    C   97    E
## 59   10    C   10    J
## 60   52    G   52    O
#inner join
sqldf('select df1.*, df2.* from 
      df1 inner join df2  on df1.var1=df2.var1')
##    var1 var2 var1 var2
## 1    31    Z   31    R
## 2    51    C   51    I
## 3    72    Q   72    G
## 4    98    O   98    Z
## 5    83    C   83    X
## 6    91    H   91    P
## 7    13    O   13    S
## 8    52    G   52    O
## 9    36    T   36    P
## 10   97    C   97    E
## 11   60    I   60    C
## 12   47    A   47    V
## 13   10    C   10    J
## 14   67    G   67    U
## 15    8    T    8    T
merge(df1,df2,by="var1")
##    var1 var2.x var2.y
## 1     8      T      T
## 2    10      C      J
## 3    13      O      S
## 4    31      Z      R
## 5    36      T      P
## 6    47      A      V
## 7    51      C      I
## 8    52      G      O
## 9    60      I      C
## 10   67      G      U
## 11   72      Q      G
## 12   83      C      X
## 13   91      H      P
## 14   97      C      E
## 15   98      O      Z
#trying outerjoin
merge(x = df1, y = df2, by = "var1", all = TRUE)
##    var1 var2.x var2.y
## 1     1   <NA>      D
## 2     3   <NA>      E
## 3     4   <NA>      T
## 4     5      B   <NA>
## 5     6   <NA>      R
## 6     7   <NA>      H
## 7     8      T      T
## 8     9   <NA>      T
## 9    10      C      J
## 10   11   <NA>      A
## 11   13      O      S
## 12   14   <NA>      A
## 13   17   <NA>      Y
## 14   20      R   <NA>
## 15   23   <NA>      T
## 16   24      M   <NA>
## 17   25   <NA>      N
## 18   26   <NA>      D
## 19   27   <NA>      R
## 20   29   <NA>      S
## 21   30   <NA>      T
## 22   31      Z      R
## 23   33   <NA>      I
## 24   34   <NA>      Q
## 25   36      T      P
## 26   38   <NA>      H
## 27   39   <NA>      V
## 28   41   <NA>      C
## 29   42   <NA>      K
## 30   44   <NA>      L
## 31   47      A      V
## 32   48   <NA>      B
## 33   51      C      I
## 34   52      G      O
## 35   53   <NA>      U
## 36   54   <NA>      A
## 37   58   <NA>      F
## 38   59   <NA>      Q
## 39   60      I      C
## 40   61   <NA>      R
## 41   62   <NA>      C
## 42   66   <NA>      R
## 43   67      G      U
## 44   68   <NA>      Y
## 45   69   <NA>      W
## 46   70   <NA>      L
## 47   71   <NA>      C
## 48   72      Q      G
## 49   78   <NA>      U
## 50   79      C   <NA>
## 51   80   <NA>      P
## 52   81   <NA>      U
## 53   82   <NA>      F
## 54   83      C      X
## 55   85   <NA>      F
## 56   86   <NA>      L
## 57   87   <NA>      R
## 58   90   <NA>      J
## 59   91      H      P
## 60   93   <NA>      V
## 61   94      U   <NA>
## 62   97      C      E
## 63   98      O      Z
## 64   99   <NA>      V
## 65  100   <NA>      O
#cbind(df1,df2)