Final Project Publish

#Name: Tanjil Azad
#StudentID: 2012193630

#Final Project

Data<-read.csv('Fraud Directory.csv')

head(Data,10)
tail(Data,10)

Data['amount']
Data[c("amount", "nameOrig")]
#summary
summary(Data)

#Dataframe
summary(Data$amount)

#read.csv
View(Data)
str(Data)
Data$amount
Data3<-data[,c(1:3)]
Data[1:9]


#summaries and Tables
summary(Data$nameOrig)
mean(Data$oldbalanceOrg)
sd(Data$newbalanceOrig)
median(Data$newbalanceOrig)


#summarize categorical data
x<-table(Data$amount)
x
table(Data$amount, Data$isFraud)


boxplot(Data$newbalanceOrig,
        col="blue")

boxplot(Data$amount ~ Data$oldbalanceDest,
        col="blue")

#histograms
hist(Data$newbalanceOrig, breaks=10,col="red")

#Data visualiziation with ggplot2

library("ggplot2")


p<-ggplot(Data, aes(amount, type))

p+geom_point()

p + geom_point(colour = "red", size = 3)

p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="blue" )

p +geom_point(mapping = aes(x=newbalanceDest,y=amount, shape=type),col="green" )

#Scatterplots for categorical

p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="red" )

#facets

p + geom_point(mapping = aes(x=newbalanceDest,y=amount, alpha=type),col="cyan" )+
  facet_wrap(~type,nrow=2)

#barchart

f<-ggplot(Data, aes(type))


f + geom_bar(mapping = aes(x=isFraud,fill=type))

f + geom_bar(mapping = aes(x=isFlaggedFraud,fill=type))

f+
  geom_bar(mapping = aes(x=isFraud),fill="blue")

p+geom_boxplot()+coord_flip()

p+geom_boxplot(notch=TRUE,col="red")

p+geom_boxplot(varwidth = TRUE)

p+geom_boxplot(fill="blue", col="magenta")

p+geom_boxplot(outlier.shape = NA)+
  geom_jitter(width = 0.2)

#histograms
q<-ggplot(Data, aes(newbalanceDest))

q+geom_histogram()+
  geom_freqpoly()


library(ggplot2)
library(plotly)

#Scatterplot

ggplot(Data, aes(x=newbalanceDest, y=amount)) + 
  geom_point()

d<-ggplot(Data, aes(x=newbalanceDest, y=amount))

d + geom_point()

#Adding a regression line

d +  
  geom_point() + 
  geom_smooth(method="lm")


#X and Y axis

r <- ggplot(Data, aes(x=newbalanceDest, y=amount)) + 
  geom_point() + 
  geom_smooth(method="lm") 
r

# Delete the points outside the limits
r + 
  xlim(c(0, 0.1)) + 
  ylim(c(0, 1000000))
r

#Change color and size of points

j<-ggplot(Data, aes(x=newbalanceDest, y=amount)) + 
  geom_point(col="steelblue", size=2) +   
  geom_smooth(method="lm", col="red") +  
  coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) + 
  labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
j
ggplotly(j)


#Color to reflect categories

jj<-ggplot(Data, aes(x=newbalanceDest, y=amount)) + 
  geom_point(aes(col=type), size=2) +  # Set color to vary based on state categories.
  geom_smooth(method="lm", col="red", size=0.5) + 
  coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) + 
  labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
jj
ggplotly(jj)


#color, but size, shape, stroke (thickness of boundary) and fill (fill color) can be used to discriminate groupings.

jj + theme(legend.position="None")  

jj + scale_colour_brewer(palette = "Set1") + theme(legend.position="None") 
# Base plot
jj<-ggplot(Data, aes(x=newbalanceDest, y=amount)) + 
  geom_point(aes(col=type), size=1) +  # Set color to vary based on state categories.
  geom_smooth(method="lm", col="red", size=0.5) + 
  coord_cartesian(xlim=c(0, 1000000), ylim=c(0, 1000000)) + 
  labs(title="New balance Dest Vs Amount", subtitle="From Data dataset", y="Amount", x="New balance Dest", caption="Dataset of Payment")
jj
ggplotly(jj)


# Change breaks
jj + 
  scale_x_continuous(breaks=seq(0, 0.1, 0.01))

#Change labels

jj + 
  scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = letters[1:11])

# Change Axis Texts
jj + 
  scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = sprintf("%1.2f%%", seq(0, 0.1, 0.01))) + 
  scale_y_continuous(breaks=seq(0, 1000000, 200000), labels = function(x){paste0(x/1000, 'K')})
#Animate

library(tidyverse)
library(gganimate)

Data

payment<-Data %>% 
  ggplot(aes(amount, newbalanceDest, color=type)) +
  geom_point() +
  labs(title = 'Payment: {frame_time}', x = 'Amount', y = 'New Balance Dest') +
  #here comes the gganimate code
  transition_time(Time) +
  ease_aes('linear')+
  theme_minimal()

payment 

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.