HWfiveStepOneurl <- "https://opendata.maryland.gov/api/views/pdvh-tf2u/rows.json?accessType=DOWNLOAD"
readJsonFromURL <- function(url){
data_fromJson <- fromJSON(url)
df <- data_fromJson$data
return (df)
}
HWfiveStepOnedf <- readJsonFromURL(HWfiveStepOneurl)
summary(HWfiveStepOnedf)
## V1 V2 V3 V4
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V5 V6 V7 V8
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V9 V10 V11 V12
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V13 V14 V15 V16
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V17 V18 V19 V20
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V21 V22 V23 V24
## Length:18638 Length:18638 Length:18638 Length:18638
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## V25 V26
## Length:18638 Length:18638
## Class :character Class :character
## Mode :character Mode :character
After you load the data, remove the first 8 columns, and then, to make it easier to work with, name the rest of the columns as follows: Note, not surprisingly, it is in JSON format. You should be able to see that the first result is the metadata (information about the data) and the second is the actual data.
namesOfColumns <- c(“CASE_NUMBER”,“BARRACK”,“ACC_DATE”,“ACC_TIME”,“ACC_TIME_CODE”,“DAY_OF_WEEK”,“ROAD”,“INTERSECT_ROAD”, “DIST_FROM_INTERSECT”,“DIST_DIRECTION”,“CITY_NAME”,“COUNTY_CODE”, “COUNTY_NAME”,“VEHICLE_COUNT”,“PROP_DEST”,“INJURY”,“COLLI SION_WITH_1”,“COLLISION_WITH_2”)
HWfiveclean <- HWfiveStepOnedf[,-(1:8)]
namesOfColumns <- c("CASE_NUMBER","BARRACK","ACC_DATE","ACC_TIME","ACC_TIME_CODE","DAY_OF_WEEK","ROAD","INTERSECT_ROAD", "DIST_FROM_INTERSECT","DIST_DIRECTION","CITY_NAME","COUNTY_CODE", "COUNTY_NAME","VEHICLE_COUNT","PROP_DEST","INJURY","COLLI SION_WITH_1","COLLISION_WITH_2")
HWfiveCleandf <- function(df,namesOfColumns){
colnames(df)<-namesOfColumns
return(data.frame(df))
}
HWfiveNewColumnsdf <-(HWfiveCleandf(HWfiveclean,namesOfColumns))
print(head(HWfiveNewColumnsdf,9))
## CASE_NUMBER BARRACK ACC_DATE ACC_TIME ACC_TIME_CODE
## 1 1363000002 Rockville 2012-01-01T00:00:00 2:01 1
## 2 1296000023 Berlin 2012-01-01T00:00:00 18:01 5
## 3 1283000016 Prince Frederick 2012-01-01T00:00:00 7:01 2
## 4 1282000006 Leonardtown 2012-01-01T00:00:00 0:01 1
## 5 1267000007 Essex 2012-01-01T00:00:00 1:01 1
## 6 1267000006 Essex 2012-01-01T00:00:00 1:01 1
## 7 1267000005 Essex 2012-01-01T00:00:00 1:01 1
## 8 1266000090 College Park 2012-01-01T00:00:00 6:01 2
## 9 1266000025 College Park 2012-01-01T00:00:00 14:01 4
## DAY_OF_WEEK ROAD INTERSECT_ROAD
## 1 SUNDAY IS 00495 CAPITAL BELTWAY IS 00270 EISENHOWER MEMORIAL
## 2 SUNDAY MD 00090 OCEAN CITY EXPWY CO 00220 ST MARTINS NECK RD
## 3 SUNDAY MD 00765 MAIN ST CO 00208 DUKE ST
## 4 SUNDAY MD 00944 MERVELL DEAN RD MD 00235 THREE NOTCH RD
## 5 SUNDAY IS 00695 BALTO BELTWAY IS 00083 HARRISBURG EXPWY
## 6 SUNDAY IS 00083 HARRISBURG EXPWY MD 00137 MT CARMEL RD
## 7 SUNDAY IS 00070 NO NAME IS 00695 BALTO BELTWAY
## 8 SUNDAY IS 00095 NO NAME CO 00014 CHERRY HILL RD
## 9 SUNDAY US 00050 JOHN HANSON HWY MD 00410 VETERANS PKWY
## DIST_FROM_INTERSECT DIST_DIRECTION CITY_NAME COUNTY_CODE COUNTY_NAME
## 1 0 U Not Applicable 15 Montgomery
## 2 0.25 W Not Applicable 23 Worcester
## 3 100 S Not Applicable 4 Calvert
## 4 10 E Not Applicable 18 St. Marys
## 5 100 S Not Applicable 3 Baltimore
## 6 0.25 S Not Applicable 3 Baltimore
## 7 1.5 S Not Applicable 3 Baltimore
## 8 0.25 S Not Applicable 16 Prince Georges
## 9 0.25 E Not Applicable 16 Prince Georges
## VEHICLE_COUNT PROP_DEST INJURY COLLI.SION_WITH_1 COLLISION_WITH_2
## 1 2 YES NO VEH OTHER-COLLISION
## 2 1 YES NO FIXED OBJ OTHER-COLLISION
## 3 1 YES NO FIXED OBJ FIXED OBJ
## 4 1 YES NO FIXED OBJ OTHER-COLLISION
## 5 2 YES NO VEH OTHER-COLLISION
## 6 <NA> NO YES FIXED OBJ OTHER-COLLISION
## 7 1 YES NO FIXED OBJ OTHER-COLLISION
## 8 2 YES NO VEH OTHER-COLLISION
## 9 1 YES YES FIXED OBJ OTHER-COLLISION
How many accidents happen on SUNDAY
SundayAccidents <-sqldf("select RTRIM(DAY_OF_WEEK), COUNT(RTRIM(DAY_OF_WEEK)) as Occurances
FROM HWfiveNewColumnsdf
WHERE RTRIM(DAY_OF_WEEK) = 'SUNDAY'
Group BY 'SUNDAY'")
print(SundayAccidents)
## RTRIM(DAY_OF_WEEK) Occurances
## 1 SUNDAY 2373
How many accidents had injuries (might need to remove NAs from the data). List the injuries by day.
NOTE: Explore the data using str(). You’ll notice SUNDAY has extra spaces. These need to be removed… or ignored. To ignore the extra space research and use the sql TRIM() command in your query. This transforms " SUNDAY" to “SUNDAY” internally.
InjuriesPerDay <-sqldf("select RTRIM(DAY_OF_WEEK), COUNT(RTRIM(DAY_OF_WEEK)) as NumofAccidents
FROM HWfiveNewColumnsdf
WHERE INJURY = 'YES'
Group BY RTRIM(DAY_OF_WEEK)")
print(InjuriesPerDay)
## RTRIM(DAY_OF_WEEK) NumofAccidents
## 1 FRIDAY 1043
## 2 MONDAY 915
## 3 SATURDAY 950
## 4 SUNDAY 818
## 5 THURSDAY 968
## 6 TUESDAY 843
## 7 WEDNESDAY 896
Answer the following questions (same as before) – compare results:
How many accidents happen on Sunday
TAPSundayAccidents<-tapply(HWfiveNewColumnsdf$CASE_NUMBER,
HWfiveNewColumnsdf$DAY_OF_WEEK,
length,simplify = FALSE)
cat("Sunday Accidents = ",TAPSundayAccidents$SUNDAY, "\n")
## Sunday Accidents = 2373
How many accidents had injuries (might need to remove NAs from the data). List the count of injuries by day.
YESInjury<- HWfiveNewColumnsdf[which(HWfiveNewColumnsdf$INJURY == 'YES'),]
TAPInjury <-tapply(YESInjury$INJURY,YESInjury$DAY_OF_WEEK,length)
print(TAPInjury)
## FRIDAY MONDAY SATURDAY SUNDAY THURSDAY TUESDAY WEDNESDAY
## 1043 915 950 818 968 843 896