Data Preparation

rm(list=ls())
#loading file for analysis
vmeEXP <- read.csv("C:/Users/vmwangon/Desktop/HU/ANLY 512/Project/Family_ExpenseTransactions.csv",header =TRUE,sep=",",stringsAsFactors =FALSE  )
#Help section
?read.csv
## starting httpd help server ... done
#data Structure
str(vmeEXP)
## 'data.frame':    555 obs. of  13 variables:
##  $ date        : chr  "11/24/2017" "11/23/2017" "11/22/2017" "11/22/2017" ...
##  $ amount      : num  17.82 14 1.49 157.5 42.07 ...
##  $ balance     : num  58.4 76.2 90.2 91.7 249.2 ...
##  $ amount_type : chr  "D" "D" "D" "D" ...
##  $ balance_type: chr  "C" "C" "C" "C" ...
##  $ category    : chr  "Groceries" "PersonalCare" "DiningOut" "Children" ...
##  $ description : chr  "POS Withdrawal KROGER" "Card Withdrawal - 9087 GREAT CLIPS AT GA" "Card Withdrawal - MARKET@WORK 2067379149" "Card Withdrawal - WEST BLOOMFIELD SCHOOL" ...
##  $ memo        : chr  "2905 UNION LAKE RD. COMMERCE TWP MI" "7164 Orchard Lake Rd W BLOOMFIELD MI" "1217 SW 7TH ST STE 100 2067379149 WA" "5810 COMMERCE RD 248-8656457 MI" ...
##  $ notes       : logi  NA NA NA NA NA NA ...
##  $ Acct_type   : chr  "C" "C" "C" "C" ...
##  $ Day         : int  24 23 22 22 22 21 21 21 20 20 ...
##  $ Month       : chr  "Nov" "Nov" "Nov" "Nov" ...
##  $ Year        : int  2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

What home items are frequently bought before and after my family join me?

part 1: 2017

#Data type convertion
vmeEXP$category <- factor(vmeEXP$category )
vmeEXP$Month <- factor(vmeEXP$Month )
vmeEXP$Year <- factor(vmeEXP$Year )
vmeEXP_2017 <- subset(vmeEXP, Year='2017')
p1 <- ggplot(subset(vmeEXP, Year='2017'), aes(x=Month,y=amount, colour=category,group=Month)) +
    geom_point() +
    ggtitle("The  Most Purchased Items 2017")
p1

    Oct-2017 ATM transaction has highest amount  >2000

part 2: 2016

p2 <- ggplot(subset(vmeEXP, Year='2016'), aes(x=Month,y=amount, colour=category,group=Month)) +
    geom_point() +
    ggtitle("The  Most Purchased Items 2016")
p2

Oct-2016 ATM transaction has highest amount >2000

What is the general increase in monthly expense 2017?

part1 : Monthy highest Expense 2017

p3 <- ggplot(subset(vmeEXP, Year='2017'), aes(x=Month, y=amount, fill=category)) + geom_bar(stat="identity") +   ggtitle("Monthy highest Expense 2017")
p3

August and October show the highest expense. while March, may, april have the lowest. March, May and April I was leaving single without family.

part2 : Monthy highest Expense 2016

p4 <- ggplot(subset(vmeEXP, Year='2016'), aes(x=Month, y=amount, fill=category)) + geom_bar(stat="identity") +   ggtitle("Monthy highest Expense 2016")
p4

help("multiplot")
## No documentation for 'multiplot' in specified packages and libraries:
## you could try '??multiplot'

What is the general comparison before and after joining with the family 2016/2017?

p5 <- ggplot(vmeEXP, aes(x=Year, y=amount)) + geom_bar(stat="identity", colour="#FF9999")  + ggtitle("Compare  Buying Habits  between 2016 and 2017")
p5

catSumary <- vmeEXP %>% group_by(category,Year)
catSumary1 <- catSumary %>% summarise(Total = sum(amount))

What is the highest items bought?

p7 <- ggplot(subset(catSumary1), aes(x=Total, fill=category)) +
    geom_histogram(colour="black", binwidth=50) +
    facet_grid(Year ~ .) +
    ggtitle("highest items bought") +
    theme(legend.position="bottom")
p7

There is increase in my spending habits 2017 due to the children expense and clothing expense.

monthSumary <- vmeEXP %>% group_by(Month,Year)
monthSumary1 <- monthSumary %>% summarise(Total = sum(amount))

What is the month with the highest month spending?

p8 <- ggplot(subset(monthSumary1), aes(x=Total, fill=Month)) +
    geom_histogram(colour="black", binwidth=50) +
    facet_grid(Year ~ .) +
    ggtitle("highest month spending") +
    theme(legend.position="bottom")
p8

September, October and November 2017, I had higher spending habits compared to 2016 only one month has has expense approximate to 6000.

Dashboard:Personal Buying Habits Analysis for Period Ending Nov-2017

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
multiplot(p1, p2, cols=1)

multiplot(p3, p4, cols=1)