Explore The Data
# Load text file into local variable called 'data'
data = read.delim(file = 'purchases.txt', header = FALSE, sep = '\t', dec = '.')
# Display what has been loaded
head(data)
# FOR EACH COLUMN prints min, quadrants, median, mean, max
summary(data)
V1 V2 V3
Min. : 10 Min. : 5.00 2013-12-31: 864
1st Qu.: 57720 1st Qu.: 25.00 2006-12-31: 584
Median :102440 Median : 30.00 2012-12-31: 583
Mean :108935 Mean : 62.34 2011-12-31: 510
3rd Qu.:160525 3rd Qu.: 60.00 2008-12-31: 503
Max. :264200 Max. :4500.00 2014-12-31: 485
(Other) :47714
Use SQL and visualize the Data
# To explore the data using simple SQL statements
# we'll use sqldf package which allows us to use a
# SQL database like an R dataframe & a dataframe like a SQL DB
library(sqldf)
# Number of purchases per year
## SELECT sql statement which is used to fetch the data from a database
## and returns results in the form of a data table
# see how many purchases were made each year from data set 'data':
# 'SELECT each year_of_purchase and COUNT each instance of year_of_purchase
x = sqldf("SELECT year_of_purchase, COUNT(year_of_purchase) AS 'counter' FROM data GROUP BY 1 ORDER BY 1")
# now plot the counted number of purchases by year as a bar plot
barplot(x$counter, names.arg = x$year_of_purchase)

# Average purchase amount per year
x = sqldf("SELECT year_of_purchase, AVG(purchase_amount) AS 'avg_amount' FROM data GROUP BY 1 ORDER BY 1")
barplot(x$avg_amount, names.arg = x$year_of_purchase)

# Total purchase amounts per year
x = sqldf("SELECT year_of_purchase, SUM(purchase_amount) AS 'sum_amount' FROM data GROUP BY 1 ORDER BY 1")
barplot(x$sum_amount, names.arg = x$year_of_purchase)

# All in one
x = sqldf("SELECT year_of_purchase,
COUNT(year_of_purchase) AS 'counter',
AVG(purchase_amount) AS 'avg_amount',
SUM(purchase_amount) AS 'sum_amount'
FROM data GROUP BY 1 ORDER BY 1")
print(x)
LS0tCnRpdGxlOiAiV2VlayAxIE5vdGVib29rIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyBFeHBsb3JlIFRoZSBEYXRhCgpgYGB7ciBleHBsb3JlLWRhdGF9CiMgTG9hZCB0ZXh0IGZpbGUgaW50byBsb2NhbCB2YXJpYWJsZSBjYWxsZWQgJ2RhdGEnCmRhdGEgPSByZWFkLmRlbGltKGZpbGUgPSAncHVyY2hhc2VzLnR4dCcsIGhlYWRlciA9IEZBTFNFLCBzZXAgPSAnXHQnLCBkZWMgPSAnLicpCgojIERpc3BsYXkgd2hhdCBoYXMgYmVlbiBsb2FkZWQKaGVhZChkYXRhKQojIEZPUiBFQUNIIENPTFVNTiBwcmludHMgbWluLCBxdWFkcmFudHMsIG1lZGlhbiwgbWVhbiwgbWF4CnN1bW1hcnkoZGF0YSkKYGBgCgoKIyMgVHJhbnNmb3JtIFRoZSBEYXRhCgpgYGB7ciB0cmFuc2Zvcm0tZGF0YX0KIyBBZGQgaGVhZGVycyBhbmQgaW50ZXJwcmV0IHRoZSBsYXN0IGNvbHVtbiBhcyBhIGRhdGUsIGV4dHJhY3QgeWVhciBvZiBwdXJjaGFzZQojIHJlbmFtZSBjb2x1bW5zIHRvIGh1bWFuIHVuZGVyc3RhbmRhYmxlIHJlcHJlc2VudGF0aW9ucwpjb2xuYW1lcyhkYXRhKSA9IGMoJ2N1c3RvbWVyX2lkJywgJ3B1cmNoYXNlX2Ftb3VudCcsICdkYXRlX29mX3B1cmNoYXNlJykKIyB0ZWxsIFIgdGhpcyBjb2x1bW4gaXMgYSBkYXRlIHZhcmlhYmxlCmRhdGEkZGF0ZV9vZl9wdXJjaGFzZSA9IGFzLkRhdGUoZGF0YSRkYXRlX29mX3B1cmNoYXNlLCAiJVktJW0tJWQiKQojIGNyZWF0ZSBhIG5ldyBjb2x1bW4gdGhhdCBjb250YWlucyB0aGUgeWVhciBvZiBwdXJjaGFzZQpkYXRhJHllYXJfb2ZfcHVyY2hhc2UgPSBhcy5udW1lcmljKGZvcm1hdChkYXRhJGRhdGVfb2ZfcHVyY2hhc2UsICIlWSIpKQpgYGAKCmBgYHtyIGRpc3BsYXktdHJhbnNmb3JtfQojIERpc3BsYXkgdGhlIGRhdGEgc2V0IGFmdGVyIHRyYW5zZm9ybWF0aW9uCmhlYWQoZGF0YSkKc3VtbWFyeShkYXRhKQpgYGAKCgojIyBVc2UgU1FMIGFuZCB2aXN1YWxpemUgdGhlIERhdGEKCmBgYHtyIHVzaW5nLXNxbCwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KIyBUbyBleHBsb3JlIHRoZSBkYXRhIHVzaW5nIHNpbXBsZSBTUUwgc3RhdGVtZW50cwojIHdlJ2xsIHVzZSBzcWxkZiBwYWNrYWdlIHdoaWNoIGFsbG93cyB1cyB0byB1c2UgYQojIFNRTCBkYXRhYmFzZSBsaWtlIGFuIFIgZGF0YWZyYW1lICYgYSBkYXRhZnJhbWUgbGlrZSBhIFNRTCBEQgpsaWJyYXJ5KHNxbGRmKQoKCiMgTnVtYmVyIG9mIHB1cmNoYXNlcyBwZXIgeWVhcgojIyBTRUxFQ1Qgc3FsIHN0YXRlbWVudCB3aGljaCBpcyB1c2VkIHRvIGZldGNoIHRoZSBkYXRhIGZyb20gYSBkYXRhYmFzZQojIyBhbmQgcmV0dXJucyByZXN1bHRzIGluIHRoZSBmb3JtIG9mIGEgZGF0YSB0YWJsZQojIHNlZSBob3cgbWFueSBwdXJjaGFzZXMgd2VyZSBtYWRlIGVhY2ggeWVhciBmcm9tIGRhdGEgc2V0ICdkYXRhJzoKIyAnU0VMRUNUIGVhY2ggeWVhcl9vZl9wdXJjaGFzZSBhbmQgQ09VTlQgZWFjaCBpbnN0YW5jZSBvZiB5ZWFyX29mX3B1cmNoYXNlCnggPSBzcWxkZigiU0VMRUNUIHllYXJfb2ZfcHVyY2hhc2UsIENPVU5UKHllYXJfb2ZfcHVyY2hhc2UpIEFTICdjb3VudGVyJyBGUk9NIGRhdGEgR1JPVVAgQlkgMSBPUkRFUiBCWSAxIikKCiMgbm93IHBsb3QgdGhlIGNvdW50ZWQgbnVtYmVyIG9mIHB1cmNoYXNlcyBieSB5ZWFyIGFzIGEgYmFyIHBsb3QKYmFycGxvdCh4JGNvdW50ZXIsIG5hbWVzLmFyZyA9IHgkeWVhcl9vZl9wdXJjaGFzZSkKCiMgQXZlcmFnZSBwdXJjaGFzZSBhbW91bnQgcGVyIHllYXIKeCA9IHNxbGRmKCJTRUxFQ1QgeWVhcl9vZl9wdXJjaGFzZSwgQVZHKHB1cmNoYXNlX2Ftb3VudCkgQVMgJ2F2Z19hbW91bnQnIEZST00gZGF0YSBHUk9VUCBCWSAxIE9SREVSIEJZIDEiKQpiYXJwbG90KHgkYXZnX2Ftb3VudCwgbmFtZXMuYXJnID0geCR5ZWFyX29mX3B1cmNoYXNlKQoKIyBUb3RhbCBwdXJjaGFzZSBhbW91bnRzIHBlciB5ZWFyCnggPSBzcWxkZigiU0VMRUNUIHllYXJfb2ZfcHVyY2hhc2UsIFNVTShwdXJjaGFzZV9hbW91bnQpIEFTICdzdW1fYW1vdW50JyBGUk9NIGRhdGEgR1JPVVAgQlkgMSBPUkRFUiBCWSAxIikKYmFycGxvdCh4JHN1bV9hbW91bnQsIG5hbWVzLmFyZyA9IHgkeWVhcl9vZl9wdXJjaGFzZSkKCiMgQWxsIGluIG9uZQp4ID0gc3FsZGYoIlNFTEVDVCB5ZWFyX29mX3B1cmNoYXNlLAogICAgICAgICAgICAgICAgICBDT1VOVCh5ZWFyX29mX3B1cmNoYXNlKSBBUyAnY291bnRlcicsCiAgICAgICAgICAgICAgICAgIEFWRyhwdXJjaGFzZV9hbW91bnQpIEFTICdhdmdfYW1vdW50JywKICAgICAgICAgICAgICAgICAgU1VNKHB1cmNoYXNlX2Ftb3VudCkgQVMgJ3N1bV9hbW91bnQnCiAgICAgICAgICAgRlJPTSBkYXRhIEdST1VQIEJZIDEgT1JERVIgQlkgMSIpCnByaW50KHgpCmBgYA==