Steph Locke (@stefflocke)
March 22nd, 2014
# Connection courtesy of Redgate bit.ly/1jrl7V6
# Activate the ODBC package for use
library("RODBC")
# Build a connection to the DB for reuse
azure <- odbcDriverConnect(
"Driver={SQL Server Native Client 11.0};
Server=mhknbn2kdz.database.windows.net;
Database=AdventureWorks2012;
Uid=sqlfamily;
Pwd=sqlf@m1ly;")
# Store simple SELECT statement results
person <- sqlQuery(azure,
"SELECT TOP 10 * FROM [Person].[Person]")
# SQL statement used by some SSRS reports
sales <- sqlQuery(azure, "
SELECT
soh.OrderDate,
t.Name as Territory,
cr.countryregioncode as Code,
cr.Name as Region,
sum(TotalDue) as Value,
count(*) as Volume
FROM [Sales].[SalesOrderHeader] soh
INNER JOIN [Sales].[SalesTerritory] t
on t.territoryid=soh.territoryid
INNER JOIN [Person].[CountryRegion] cr
on cr.countryregioncode=t.countryregioncode
WHERE soh.OrderDate < '2008-07-01'
GROUP BY soh.OrderDate,
t.Name ,
cr.Name,
cr.countryregioncode")
library("ggplot2")
#using the most basic method
qplot(OrderDate, Value, colour=Region, data=sales)
Use data.table - feels like SQL and is REALLY fast
DT[ where, select | update, group by] [ having ] [ order by ] [ ]…[ ]
library("data.table")
library("zoo")
library("scales")
# summarise data
sales.sum <- data.table( sales )[,
list( Value = sum(Value), Volume = sum(Volume)),
by = list( YearQtr = as.yearqtr(OrderDate),
Region, Code)]
p<-ggplot( data = sales.sum, aes( x = YearQtr, y = Value, group = Region,
colour = Region )) + geom_line() +
scale_x_yearqtr()
p
p <- p + theme_minimal() + theme( legend.position = "bottom" ) +
labs( x = "Order Quarter", y = "", title = "Regional orders over time" ) +
scale_y_continuous( labels = dollar ) + scale_x_yearqtr( format = "%YQ%q" )
p
library("ggthemes")
p<- p+facet_wrap( ~Region, ncol = 3) +
theme_few()+
theme( legend.position = "none", axis.text.x =
element_text( angle = 45, hjust = 1 ))
p
p + aes(y=Volume) + scale_y_continuous()
Insert or update
sqlSave(azure, sales.sum, salessumary)
Can do xlsx but I prefer keeping things in CSV
write.csv(sales, "sales.csv")
knit2pdf(salesanalysis.Rnw)
knit2html(salesanalysis.Rmd)
RODBC
for a single layer of accessdata.table
my go-to package for all data manipulation
zoo
adds some excellent date/time functionalityggplot2
my go-to charting package
xtable
tables in documentationknitr
documentationshiny
interactive reportstestthat
unit testingdevtools
general package dev