Data Manipulation in R ( Base R/ dplyr)

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
crime.by.state <- read.csv("CrimeStatebyState.csv")
names(crime.by.state)
## [1] "State"         "Type.of.Crime" "Crime"         "Year"         
## [5] "Count"
dim(crime.by.state)
## [1] 16422     5
summary(crime.by.state)
##         State              Type.of.Crime 
##  Alabama   :  322   Property Crime:7038  
##  Alaska    :  322   Violent Crime :9384  
##  Arizona   :  322                        
##  Arkansas  :  322                        
##  California:  322                        
##  Colorado  :  322                        
##  (Other)   :14490                        
##                                   Crime           Year     
##  Aggravated assault                  :2346   Min.   :1960  
##  Burglary                            :2346   1st Qu.:1971  
##  Forcible rape                       :2346   Median :1982  
##  Larceny-theft                       :2346   Mean   :1982  
##  Motor vehicle theft                 :2346   3rd Qu.:1994  
##  Murder and nonnegligent Manslaughter:2346   Max.   :2005  
##  Robbery                             :2346                 
##      Count       
##  Min.   :     0  
##  1st Qu.:   669  
##  Median :  4620  
##  Mean   : 29606  
##  3rd Qu.: 24114  
##  Max.   :986120  
## 
# df[r,c]

Using Base R

Filtering rows
crime.ny.2005 <- crime.by.state[crime.by.state$Year==2005 &
                                crime.by.state$State=="New York", ]
# Alternate using subset()
crime.ny.2005 <- subset(crime.by.state, crime.by.state$Year==2005 &
                                crime.by.state$State=="New York")
Arranging and ordering
crime.ny.2005 <- crime.ny.2005[order(crime.ny.2005$Count,
                                     decreasing=TRUE), ]
Selecting columns
crime.ny.2005 <- crime.ny.2005[, c("Type.of.Crime", "Count")]
Creating new columns
crime.ny.2005$Proportion <- crime.ny.2005$Count /
                            sum(crime.ny.2005$Count)
Aggregation and summarization
summary1 <- aggregate(Count ~ Type.of.Crime,
                      data=crime.ny.2005,
                      FUN=sum)
summary2 <- aggregate(Count ~ Type.of.Crime,
                      data=crime.ny.2005,
                      FUN=length)
summary.crime.ny.2005 <- merge(summary1, summary2,
                               by="Type.of.Crime")

Using dplyr package

Filtering rows
crime.ny.2005 <- filter(crime.by.state, State=="New York", Year==2005)
## Warning: package 'bindrcpp' was built under R version 3.4.2
Arranging and ordering
crime.ny.2005 <- arrange(crime.ny.2005, desc(Count))
Selecting columns
crime.ny.2005 <- select(crime.ny.2005, Type.of.Crime, Count)
Creating new columns
crime.ny.2005 <- mutate(crime.ny.2005, Proportion=Count/sum(Count))
Aggregation and summarization
by.type <- group_by(crime.ny.2005, Type.of.Crime)
summary.crime.ny.2005 <- summarise(by.type,
                                   num.types = n(),
                                   counts = sum(Count))

Chaining Rule

final<-mutate(
              arrange(
                      filter(crime.by.state, State=="New York", Year==2005),
                      desc(Count)
                      ),
                      Proportion=Count/sum(Count)
              ) 

Piping or using pipe operator

final.piped<-crime.by.state %>%
           filter(State=="New York", Year==2005) %>%
           arrange(desc(Count)) %>%
           mutate(Proportion=Count/sum(Count))