library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
sqldf("select * from iris limit 2") ##first 2 vlues
## Loading required package: tcltk
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
sqldf("select count(*) from iris ")
## count(*)
## 1 150
sqldf("select Species,count(*) from iris group by Species")
## Species count(*)
## 1 setosa 50
## 2 versicolor 50
## 3 virginica 50
set.seed(42)
df1=data.frame(id=1:10,class=rep(c("case","ctrl"),5))
df1
## id class
## 1 1 case
## 2 2 ctrl
## 3 3 case
## 4 4 ctrl
## 5 5 case
## 6 6 ctrl
## 7 7 case
## 8 8 ctrl
## 9 9 case
## 10 10 ctrl
df2=data.frame(id=1:10,cov=round(runif(10)*10,1))
df2
## id cov
## 1 1 9.1
## 2 2 9.4
## 3 3 2.9
## 4 4 8.3
## 5 5 6.4
## 6 6 5.2
## 7 7 7.4
## 8 8 1.3
## 9 9 6.6
## 10 10 7.1
sqldf("select * from df1 join df2 on df1.id=df2.id") ###inner join
## id class id cov
## 1 1 case 1 9.1
## 2 2 ctrl 2 9.4
## 3 3 case 3 2.9
## 4 4 ctrl 4 8.3
## 5 5 case 5 6.4
## 6 6 ctrl 6 5.2
## 7 7 case 7 7.4
## 8 8 ctrl 8 1.3
## 9 9 case 9 6.6
## 10 10 ctrl 10 7.1
sqldf("select * from df1 join df2 on df1.id=df2.id where class='case'")
## id class id cov
## 1 1 case 1 9.1
## 2 3 case 3 2.9
## 3 5 case 5 6.4
## 4 7 case 7 7.4
## 5 9 case 9 6.6
sqldf("select df1.id,df2.cov as covariate from df1 join df2 on df1.id=df2.id where class='case' and cov>3 order by cov")
## id covariate
## 1 5 6.4
## 2 9 6.6
## 3 7 7.4
## 4 1 9.1
## starting httpd help server ... done
## [1] 0.8068949
##
## Call:
## lm(formula = iris$Sepal.Length ~ iris$Sepal.Width)
##
## Coefficients:
## (Intercept) iris$Sepal.Width
## 6.5262 -0.2234
##
## Call:
## lm(formula = iris$Sepal.Length ~ iris$Sepal.Width)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5561 -0.6333 -0.1120 0.5579 2.2226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5262 0.4789 13.63 <2e-16 ***
## iris$Sepal.Width -0.2234 0.1551 -1.44 0.152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8251 on 148 degrees of freedom
## Multiple R-squared: 0.01382, Adjusted R-squared: 0.007159
## F-statistic: 2.074 on 1 and 148 DF, p-value: 0.1519
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.