Step 1: Load the data

HWfiveStepOneurl <- "https://opendata.maryland.gov/api/views/pdvh-tf2u/rows.json?accessType=DOWNLOAD"
readJsonFromURL <- function(url){
   data_fromJson <- fromJSON(url)
   df <- data_fromJson$data
   return (df)
 }
HWfiveStepOnedf <- readJsonFromURL(HWfiveStepOneurl)
summary(HWfiveStepOnedf)
##       V1                 V2                 V3                 V4           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##       V5                 V6                 V7                 V8           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##       V9                V10                V11                V12           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      V13                V14                V15                V16           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      V17                V18                V19                V20           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      V21                V22                V23                V24           
##  Length:18638       Length:18638       Length:18638       Length:18638      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      V25                V26           
##  Length:18638       Length:18638      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character

Step 2: Clean the data

After you load the data, remove the first 8 columns, and then, to make it easier to work with, name the rest of the columns as follows: Note, not surprisingly, it is in JSON format. You should be able to see that the first result is the metadata (information about the data) and the second is the actual data.

namesOfColumns <- c(“CASE_NUMBER”,“BARRACK”,“ACC_DATE”,“ACC_TIME”,“ACC_TIME_CODE”,“DAY_OF_WEEK”,“ROAD”,“INTERSECT_ROAD”, “DIST_FROM_INTERSECT”,“DIST_DIRECTION”,“CITY_NAME”,“COUNTY_CODE”, “COUNTY_NAME”,“VEHICLE_COUNT”,“PROP_DEST”,“INJURY”,“COLLI SION_WITH_1”,“COLLISION_WITH_2”)

HWfiveclean <- HWfiveStepOnedf[,-(1:8)]
namesOfColumns <- c("CASE_NUMBER","BARRACK","ACC_DATE","ACC_TIME","ACC_TIME_CODE","DAY_OF_WEEK","ROAD","INTERSECT_ROAD",  "DIST_FROM_INTERSECT","DIST_DIRECTION","CITY_NAME","COUNTY_CODE",  "COUNTY_NAME","VEHICLE_COUNT","PROP_DEST","INJURY","COLLI SION_WITH_1","COLLISION_WITH_2")
HWfiveCleandf <- function(df,namesOfColumns){
  colnames(df)<-namesOfColumns
  return(data.frame(df))
}
HWfiveNewColumnsdf <-(HWfiveCleandf(HWfiveclean,namesOfColumns))
print(head(HWfiveNewColumnsdf,9))
##   CASE_NUMBER          BARRACK            ACC_DATE ACC_TIME ACC_TIME_CODE
## 1  1363000002        Rockville 2012-01-01T00:00:00     2:01             1
## 2  1296000023           Berlin 2012-01-01T00:00:00    18:01             5
## 3  1283000016 Prince Frederick 2012-01-01T00:00:00     7:01             2
## 4  1282000006      Leonardtown 2012-01-01T00:00:00     0:01             1
## 5  1267000007            Essex 2012-01-01T00:00:00     1:01             1
## 6  1267000006            Essex 2012-01-01T00:00:00     1:01             1
## 7  1267000005            Essex 2012-01-01T00:00:00     1:01             1
## 8  1266000090     College Park 2012-01-01T00:00:00     6:01             2
## 9  1266000025     College Park 2012-01-01T00:00:00    14:01             4
##   DAY_OF_WEEK                      ROAD               INTERSECT_ROAD
## 1   SUNDAY     IS 00495 CAPITAL BELTWAY IS 00270 EISENHOWER MEMORIAL
## 2   SUNDAY    MD 00090 OCEAN CITY EXPWY  CO 00220 ST MARTINS NECK RD
## 3   SUNDAY             MD 00765 MAIN ST             CO 00208 DUKE ST
## 4   SUNDAY     MD 00944 MERVELL DEAN RD      MD 00235 THREE NOTCH RD
## 5   SUNDAY       IS 00695 BALTO BELTWAY    IS 00083 HARRISBURG EXPWY
## 6   SUNDAY    IS 00083 HARRISBURG EXPWY        MD 00137 MT CARMEL RD
## 7   SUNDAY             IS 00070 NO NAME       IS 00695 BALTO BELTWAY
## 8   SUNDAY             IS 00095 NO NAME      CO 00014 CHERRY HILL RD
## 9   SUNDAY     US 00050 JOHN HANSON HWY       MD 00410 VETERANS PKWY
##   DIST_FROM_INTERSECT DIST_DIRECTION      CITY_NAME COUNTY_CODE    COUNTY_NAME
## 1                   0              U Not Applicable          15     Montgomery
## 2                0.25              W Not Applicable          23      Worcester
## 3                 100              S Not Applicable           4        Calvert
## 4                  10              E Not Applicable          18      St. Marys
## 5                 100              S Not Applicable           3      Baltimore
## 6                0.25              S Not Applicable           3      Baltimore
## 7                 1.5              S Not Applicable           3      Baltimore
## 8                0.25              S Not Applicable          16 Prince Georges
## 9                0.25              E Not Applicable          16 Prince Georges
##   VEHICLE_COUNT PROP_DEST INJURY COLLI.SION_WITH_1 COLLISION_WITH_2
## 1             2       YES     NO               VEH  OTHER-COLLISION
## 2             1       YES     NO         FIXED OBJ  OTHER-COLLISION
## 3             1       YES     NO         FIXED OBJ        FIXED OBJ
## 4             1       YES     NO         FIXED OBJ  OTHER-COLLISION
## 5             2       YES     NO               VEH  OTHER-COLLISION
## 6          <NA>        NO    YES         FIXED OBJ  OTHER-COLLISION
## 7             1       YES     NO         FIXED OBJ  OTHER-COLLISION
## 8             2       YES     NO               VEH  OTHER-COLLISION
## 9             1       YES    YES         FIXED OBJ  OTHER-COLLISION

Step 3: Understand the data using SQL (via SQLDF)

How many accidents happen on SUNDAY

SundayAccidents <-sqldf("select RTRIM(DAY_OF_WEEK), COUNT(RTRIM(DAY_OF_WEEK)) as Occurances
                        FROM HWfiveNewColumnsdf
                        WHERE RTRIM(DAY_OF_WEEK) = 'SUNDAY'
                        Group BY 'SUNDAY'")
print(SundayAccidents)
##   RTRIM(DAY_OF_WEEK) Occurances
## 1             SUNDAY       2373

How many accidents had injuries (might need to remove NAs from the data). List the injuries by day.

NOTE: Explore the data using str(). You’ll notice SUNDAY has extra spaces. These need to be removed… or ignored. To ignore the extra space research and use the sql TRIM() command in your query. This transforms " SUNDAY" to “SUNDAY” internally.

InjuriesPerDay <-sqldf("select RTRIM(DAY_OF_WEEK), COUNT(RTRIM(DAY_OF_WEEK)) as NumofAccidents
                        FROM HWfiveNewColumnsdf
                        WHERE INJURY = 'YES'
                        Group BY RTRIM(DAY_OF_WEEK)")
print(InjuriesPerDay)
##   RTRIM(DAY_OF_WEEK) NumofAccidents
## 1             FRIDAY           1043
## 2             MONDAY            915
## 3           SATURDAY            950
## 4             SUNDAY            818
## 5           THURSDAY            968
## 6            TUESDAY            843
## 7          WEDNESDAY            896

Step 4: Understand the data using tapply

Answer the following questions (same as before) – compare results:

How many accidents happen on Sunday

TAPSundayAccidents<-tapply(HWfiveNewColumnsdf$CASE_NUMBER,
                           HWfiveNewColumnsdf$DAY_OF_WEEK,
                           length,simplify = FALSE)
cat("Sunday Accidents = ",TAPSundayAccidents$SUNDAY, "\n")
## Sunday Accidents =  2373

How many accidents had injuries (might need to remove NAs from the data). List the count of injuries by day.

YESInjury<- HWfiveNewColumnsdf[which(HWfiveNewColumnsdf$INJURY == 'YES'),]
TAPInjury <-tapply(YESInjury$INJURY,YESInjury$DAY_OF_WEEK,length)
print(TAPInjury)
## FRIDAY    MONDAY    SATURDAY  SUNDAY    THURSDAY  TUESDAY   WEDNESDAY 
##      1043       915       950       818       968       843       896