library(readxl)
## Warning: package 'readxl' was built under R version 3.4.4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hw= read_excel("C:/Users/Sachin/Desktop/HU/512 kevin/problem 5/ccrb_datatransparencyinitiative.xlsx", sheet = "Complaints_Allegations")
summary(hw)
##    DateStamp          UniqueComplaintId   Close Year   Received Year 
##  Min.   :2016-11-29   Min.   :    1     Min.   :2006   Min.   :1999  
##  1st Qu.:2016-11-29   1st Qu.:17356     1st Qu.:2008   1st Qu.:2007  
##  Median :2016-11-29   Median :34794     Median :2010   Median :2009  
##  Mean   :2016-11-29   Mean   :34778     Mean   :2010   Mean   :2010  
##  3rd Qu.:2016-11-29   3rd Qu.:52204     3rd Qu.:2013   3rd Qu.:2012  
##  Max.   :2016-11-29   Max.   :69492     Max.   :2016   Max.   :2016  
##  Borough of Occurrence Is Full Investigation Complaint Has Video Evidence
##  Length:204397         Mode :logical         Mode :logical               
##  Class :character      FALSE:107084          FALSE:195530                
##  Mode  :character      TRUE :97313           TRUE :8867                  
##                                                                          
##                                                                          
##                                                                          
##  Complaint Filed Mode Complaint Filed Place
##  Length:204397        Length:204397        
##  Class :character     Class :character     
##  Mode  :character     Mode  :character     
##                                            
##                                            
##                                            
##  Complaint Contains Stop & Frisk Allegations Incident Location 
##  Mode :logical                               Length:204397     
##  FALSE:119856                                Class :character  
##  TRUE :84541                                 Mode  :character  
##                                                                
##                                                                
##                                                                
##  Incident Year  Encounter Outcome  Reason For Initial Contact
##  Min.   :1999   Length:204397      Length:204397             
##  1st Qu.:2007   Class :character   Class :character          
##  Median :2009   Mode  :character   Mode  :character          
##  Mean   :2010                                                
##  3rd Qu.:2012                                                
##  Max.   :2016                                                
##  Allegation FADO Type Allegation Description
##  Length:204397        Length:204397         
##  Class :character     Class :character      
##  Mode  :character     Mode  :character      
##                                             
##                                             
## 
  1. Location by Borough of occurence
hw1=table(hw$`Borough of Occurrence`)
l1=paste(names(hw1),"\n",hw1,sep="")
v1=pie(hw1,labels=l1,col=rainbow(length(l1)),main="Location by Borough")

  1. Yearly complaints received
knitr::opts_chunk$set(echo = TRUE)

hw2<- unique(hw[c("UniqueComplaintId","Received Year")])
hw2<-data.frame(hw2)

ggplot(hw2,aes(Received.Year))+geom_bar()+ labs(x="Year", y="Number of Complaints") + ggtitle("Yearly complaints received")

  1. Places where complaints were filed
library(magrittr)
hw3<- unique(hw[c("UniqueComplaintId","Received Year")])
hw3<-data.frame(hw3)
ggplot(hw3, aes(Received.Year))+
  geom_bar()+labs(title ="Places where complaints filed", x = "Year", y = "Place of Complain Filed ")

  1. Mode of complaints
hw4<- unique(hw[c("UniqueComplaintId","Received Year")])
hw4<-data.frame(hw4)
ggplot(hw4,aes(Received.Year))+
  geom_histogram(stat="count")+
  coord_flip() +
  labs(title = "Mode of Complaints", x = "Received Year", y = "Count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

  1. Type of Allegations
hw5<-table(hw$`Allegation FADO Type`)
lbls<-paste(names(hw5),"\n",hw5,sep="")
hw5 <- data.frame(hw5)
colnames(hw5) <- c("Type", "Freq")
ggplot(hw5, aes(x="", y=Freq,fill=Type)) + geom_bar(width = 1, stat = "identity")+
  coord_polar("y", start=0) + theme_minimal()+
  labs(fill="Type", 
       x=NULL, 
       y=NULL, 
       title="Types of Allegations")+
theme(plot.title = element_text(hjust = 0.5))

hw6= unique(hw[c("UniqueComplaintId","Incident Year","Complaint Has Video Evidence")])
hw6<- data.frame(hw6)
ggplot(hw6,aes(Complaint.Has.Video.Evidence,fill=Complaint.Has.Video.Evidence)) + geom_bar()+ theme(legend.position = "right",legend.title=element_blank()) + ggtitle("Video Evidence") + labs(x="Complaints", y="Number")

7.Complaint Filed Mode

hw7= unique(hw[c("UniqueComplaintId","Complaint Filed Mode")])
hw7<- data.frame(hw7)

ggplot(hw7, aes(Complaint.Filed.Mode, fill = Complaint.Filed.Mode)) +geom_bar(position = "stack")  + labs(title = "Complaint Filed Mode")

8.Frequent incident types

hw8= unique(hw[c("UniqueComplaintId","Allegation Description")])
hw8<- data.frame(hw8)
ggplot(hw8, aes(Allegation.Description)) +geom_bar(width = 1, position = position_dodge(width = 1)) + theme(axis.text.x=element_text(angle=90, hjust=1)) + labs(title = "Frequent Incidents Types")

9.Outcome

hw9<- unique(hw[c("UniqueComplaintId","Encounter Outcome")])
hw9<- data.frame(hw9)
ggplot(hw9,aes(Encounter.Outcome)) +  geom_bar(position="dodge") +  ggtitle('Graph 9: Cases by Encounter Outcome') +   xlab('Encounter Outcome') +   ylab('Number of Cases')

10.Trends

hw10<- unique(hw[c("UniqueComplaintId","Received Year")])
hw10<- data.frame(hw10)

ggplot(hw10, aes(x=Received.Year, y=UniqueComplaintId))+
geom_line(group=1,size=1)