rm(list=ls())
#loading file for analysis
vmeEXP <- read.csv("C:/Users/vmwangon/Desktop/HU/ANLY 512/Project/Family_ExpenseTransactions.csv",header =TRUE,sep=",",stringsAsFactors =FALSE )
#Help section
?read.csv
## starting httpd help server ... done
#data Structure
str(vmeEXP)
## 'data.frame': 555 obs. of 13 variables:
## $ date : chr "11/24/2017" "11/23/2017" "11/22/2017" "11/22/2017" ...
## $ amount : num 17.82 14 1.49 157.5 42.07 ...
## $ balance : num 58.4 76.2 90.2 91.7 249.2 ...
## $ amount_type : chr "D" "D" "D" "D" ...
## $ balance_type: chr "C" "C" "C" "C" ...
## $ category : chr "Groceries" "PersonalCare" "DiningOut" "Children" ...
## $ description : chr "POS Withdrawal KROGER" "Card Withdrawal - 9087 GREAT CLIPS AT GA" "Card Withdrawal - MARKET@WORK 2067379149" "Card Withdrawal - WEST BLOOMFIELD SCHOOL" ...
## $ memo : chr "2905 UNION LAKE RD. COMMERCE TWP MI" "7164 Orchard Lake Rd W BLOOMFIELD MI" "1217 SW 7TH ST STE 100 2067379149 WA" "5810 COMMERCE RD 248-8656457 MI" ...
## $ notes : logi NA NA NA NA NA NA ...
## $ Acct_type : chr "C" "C" "C" "C" ...
## $ Day : int 24 23 22 22 22 21 21 21 20 20 ...
## $ Month : chr "Nov" "Nov" "Nov" "Nov" ...
## $ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Data type convertion
vmeEXP$category <- factor(vmeEXP$category )
vmeEXP$Month <- factor(vmeEXP$Month )
vmeEXP$Year <- factor(vmeEXP$Year )
vmeEXP_2017 <- subset(vmeEXP, Year='2017')
p1 <- ggplot(subset(vmeEXP, Year='2017'), aes(x=Month,y=amount, colour=category,group=Month)) +
geom_point() +
ggtitle("The Most Purchased Items 2017")
p1
Oct-2017 ATM transaction has highest amount >2000
p2 <- ggplot(subset(vmeEXP, Year='2016'), aes(x=Month,y=amount, colour=category,group=Month)) +
geom_point() +
ggtitle("The Most Purchased Items 2016")
p2
Oct-2016 ATM transaction has highest amount >2000
p3 <- ggplot(subset(vmeEXP, Year='2017'), aes(x=Month, y=amount, fill=category)) + geom_bar(stat="identity") + ggtitle("Monthy highest Expense 2017")
p3
August and October show the highest expense. while March, may, april have the lowest. March, May and April I was leaving single without family.
p4 <- ggplot(subset(vmeEXP, Year='2016'), aes(x=Month, y=amount, fill=category)) + geom_bar(stat="identity") + ggtitle("Monthy highest Expense 2016")
p4
help("multiplot")
## No documentation for 'multiplot' in specified packages and libraries:
## you could try '??multiplot'
p5 <- ggplot(vmeEXP, aes(x=Year, y=amount)) + geom_bar(stat="identity", colour="#FF9999") + ggtitle("Compare Buying Habits between 2016 and 2017")
p5
catSumary <- vmeEXP %>% group_by(category,Year)
catSumary1 <- catSumary %>% summarise(Total = sum(amount))
p7 <- ggplot(subset(catSumary1), aes(x=Total, fill=category)) +
geom_histogram(colour="black", binwidth=50) +
facet_grid(Year ~ .) +
ggtitle("highest items bought") +
theme(legend.position="bottom")
p7
There is increase in my spending habits 2017 due to the children expense and clothing expense.
monthSumary <- vmeEXP %>% group_by(Month,Year)
monthSumary1 <- monthSumary %>% summarise(Total = sum(amount))
p8 <- ggplot(subset(monthSumary1), aes(x=Total, fill=Month)) +
geom_histogram(colour="black", binwidth=50) +
facet_grid(Year ~ .) +
ggtitle("highest month spending") +
theme(legend.position="bottom")
p8
September, October and November 2017, I had higher spending habits compared to 2016 only one month has has expense approximate to 6000.
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
multiplot(p1, p2, cols=1)
multiplot(p3, p4, cols=1)