#Name: Tanjil Azad
#StudentID: 2012193630
#Final Project
Data<-read.csv('Fraud Directory.csv')
head(Data,10)
tail(Data,10)
Data['amount']
Data[c("amount", "nameOrig")]
#summary
summary(Data)
#Dataframe
summary(Data$amount)
#read.csv
View(Data)
str(Data)
Data$amount
Data3<-data[,c(1:3)]
Data[1:9]
#summaries and Tables
summary(Data$nameOrig)
mean(Data$oldbalanceOrg)
sd(Data$newbalanceOrig)
median(Data$newbalanceOrig)
#summarize categorical data
x<-table(Data$amount)
x
table(Data$amount, Data$isFraud)
boxplot(Data$newbalanceOrig,
col="blue")
boxplot(Data$amount ~ Data$oldbalanceDest,
col="blue")
#histograms
hist(Data$newbalanceOrig, breaks=10,col="red")
#Data visualiziation with ggplot2
library("ggplot2")
p<-ggplot(Data, aes(amount, type))
p+geom_point()
p + geom_point(colour = "red", size = 3)
p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="blue" )
p +geom_point(mapping = aes(x=newbalanceDest,y=amount, shape=type),col="green" )
#Scatterplots for categorical
p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="red" )
#facets
p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="cyan" )+
facet_wrap(~type,nrow=2)
#barchart
f<-ggplot(Data, aes(type))
f + geom_bar(mapping = aes(x=isFraud,fill=type))
f + geom_bar(mapping = aes(x=isFlaggedFraud,fill=type))
f+
geom_bar(mapping = aes(x=isFraud),fill="blue")
p+geom_boxplot()+coord_flip()
p+geom_boxplot(notch=TRUE,col="red")
p+geom_boxplot(varwidth = TRUE)
p+geom_boxplot(fill="blue", col="magenta")
p+geom_boxplot(outlier.shape = NA)+
geom_jitter(width = 0.2)
#histograms
q<-ggplot(Data, aes(newbalanceDest))
q+geom_histogram()+
geom_freqpoly()
library(ggplot2)
library(plotly)
#Scatterplot
ggplot(Data, aes(x=newbalanceDest, y=amount)) +
geom_point()
d<-ggplot(Data, aes(x=newbalanceDest, y=amount))
d + geom_point()
#Adding a regression line
d +
geom_point() +
geom_smooth(method="lm")
#X and Y axis
r <- ggplot(Data, aes(x=newbalanceDest, y=amount)) +
geom_point() +
geom_smooth(method="lm")
r
# Delete the points outside the limits
r +
xlim(c(0, 0.1)) +
ylim(c(0, 1000000))
r
#Change color and size of points
j<-ggplot(Data, aes(x=newbalanceDest, y=amount)) +
geom_point(col="steelblue", size=2) +
geom_smooth(method="lm", col="red") +
coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) +
labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
j
ggplotly(j)
#Color to reflect categories
jj<-ggplot(Data, aes(x=newbalanceDest, y=amount)) +
geom_point(aes(col=type), size=2) + # Set color to vary based on state categories.
geom_smooth(method="lm", col="red", size=0.5) +
coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) +
labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
jj
ggplotly(jj)
#color, but size, shape, stroke (thickness of boundary) and fill (fill color) can be used to discriminate groupings.
jj + theme(legend.position="None")
jj + scale_colour_brewer(palette = "Set1") + theme(legend.position="None")
# Base plot
jj<-ggplot(Data, aes(x=newbalanceDest, y=amount)) +
geom_point(aes(col=type), size=1) + # Set color to vary based on state categories.
geom_smooth(method="lm", col="red", size=0.5) +
coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) +
labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
jj
ggplotly(jj)
# Change breaks
jj +
scale_x_continuous(breaks=seq(0, 0.1, 0.01))
#Change labels
jj +
scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = letters[1:11])
# Change Axis Texts
jj +
scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = sprintf("%1.2f%%", seq(0, 0.1, 0.01))) +
scale_y_continuous(breaks=seq(0, 1000000, 200000), labels = function(x){paste0(x/1000, 'K')})
#Animate
library(tidyverse)
library(gganimate)
Data
payment<-Data %>%
ggplot(aes(amount, newbalanceDest, color=type)) +
geom_point() +
labs(title = 'Payment: {frame_time}', x = 'Amount', y = 'New Balance Dest') +
#here comes the gganimate code
transition_time(Time) +
ease_aes('linear')+
theme_minimal()
payment
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.