sx— title: “MATH2349 Semester 1, 2018” author: “Mohammad Hanif Razzak (s3650497)” subtitle: Assignment 1 output: html_notebook: default —
library(readr)
library(readxl)
library(foreign)
library(RODBC)
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
##Reading Data into R and deleting the First Row##
getwd()
## [1] "/Users/mohammadrazzak/Documents/University/RMIT/data preprocessing/assignment1"
library(readr)
bitrearddfatalitiesfebruary2018 <-read_csv("bitrearddfatalitiesfebruary2018.csv")
## Parsed with column specification:
## cols(
## `Crash ID` = col_character(),
## State = col_character(),
## Month = col_character(),
## Year = col_character(),
## Dayweek = col_character(),
## Time = col_character(),
## `Crash Type` = col_character(),
## `Bus
## Involvement` = col_character(),
## `Rigid Truck
## Involvement` = col_character(),
## `Articulated Truck
## Involvement` = col_character(),
## `Speed Limit` = col_integer(),
## `Road User` = col_character(),
## Gender = col_character(),
## Age = col_integer()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 2)
## Warning: 1 parsing failure.
## row # A tibble: 1 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 1 <NA> 14 columns 7 columns 'bitrearddfatalitiesfebruary2018.csv' file # A tibble: 1 x 5
View(bitrearddfatalitiesfebruary2018)
RoadFatalitiesfebruary2018 <- bitrearddfatalitiesfebruary2018[-1,]
View(RoadFatalitiesfebruary2018)
##View data class and head##
class(RoadFatalitiesfebruary2018)
## [1] "tbl_df" "tbl" "data.frame"
head(class)
##
## 1 .Primitive("class")
head(RoadFatalitiesfebruary2018)
## # A tibble: 6 x 14
## `Crash ID` State Month Year Dayweek Time `Crash Type` `Bus \nInvolvem…
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 201830006 QLD Febr… 2018 Tuesday 19:00 Single vehi… No
## 2 201850008 WA Febr… 2018 Tuesday 16:30 Single vehi… No
## 3 201810056 NSW Febr… 2018 Monday 12:20 Pedestrian No
## 4 201820024 VIC Febr… 2018 Monday 21:55 Pedestrian -9
## 5 201830032 QLD Febr… 2018 Monday 13:00 Multiple ve… No
## 6 201810012 NSW Febr… 2018 Sunday 13:20 Single vehi… No
## # ... with 6 more variables: `Rigid Truck \nInvolvement` <chr>,
## # `Articulated Truck \nInvolvement` <chr>, `Speed Limit` <int>, `Road
## # User` <chr>, Gender <chr>, Age <int>
##View the data dimension##
dim(RoadFatalitiesfebruary2018)
## [1] 48782 14
##Check Double Variable##
dbl_var <-(RoadFatalitiesfebruary2018)
typeof(dbl_var)
## [1] "list"
##Checking Datatpes of Variables##
class(RoadFatalitiesfebruary2018$Crash.Type)
## Warning: Unknown or uninitialised column: 'Crash.Type'.
## [1] "NULL"
class(RoadFatalitiesfebruary2018$Crash.ID)
## Warning: Unknown or uninitialised column: 'Crash.ID'.
## [1] "NULL"
class(RoadFatalitiesfebruary2018$Month)
## [1] "character"
class(RoadFatalitiesfebruary2018$Year)
## [1] "character"
class(RoadFatalitiesfebruary2018$State)
## [1] "character"
class(RoadFatalitiesfebruary2018$Dayweek)
## [1] "character"
class(RoadFatalitiesfebruary2018$Time)
## [1] "character"
class(RoadFatalitiesfebruary2018$Bus..Involvement)
## Warning: Unknown or uninitialised column: 'Bus..Involvement'.
## [1] "NULL"
##Checking the levels of Factor Variable##
RoadFatalitiesfebruary2018$State<- as.factor(RoadFatalitiesfebruary2018$State)
levels(RoadFatalitiesfebruary2018$State)
## [1] "ACT" "NSW" "NT" "QLD" "SA" "TAS" "VIC" "WA"
RoadFatalitiesfebruary2018$Month<- as.factor(RoadFatalitiesfebruary2018$Month)
levels(RoadFatalitiesfebruary2018$Month)
## [1] "April" "August" "December" "February" "January"
## [6] "July" "June" "March" "May" "November"
## [11] "October" "September"
RoadFatalitiesfebruary2018$Year<- as.factor(RoadFatalitiesfebruary2018$Year)
levels(RoadFatalitiesfebruary2018$Year)
## [1] "1989" "1990" "1991" "1992" "1993" "1994" "1995" "1996" "1997" "1998"
## [11] "1999" "2000" "2001" "2002" "2003" "2004" "2005" "2006" "2007" "2008"
## [21] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
RoadFatalitiesfebruary2018$Dayweek<- as.factor(RoadFatalitiesfebruary2018$Dayweek)
levels(RoadFatalitiesfebruary2018$Dayweek)
## [1] "Friday" "Monday" "Saturday" "Sunday" "Thursday" "Tuesday"
## [7] "Wednesday"
RoadFatalitiesfebruary2018$Time<- as.factor(RoadFatalitiesfebruary2018$Time)
levels(RoadFatalitiesfebruary2018$Time)
## [1] "-9" "0:00" "0:01" "0:02" "0:03" "0:04" "0:05" "0:06"
## [9] "0:07" "0:08" "0:09" "0:10" "0:11" "0:12" "0:13" "0:14"
## [17] "0:15" "0:16" "0:17" "0:18" "0:20" "0:21" "0:22" "0:23"
## [25] "0:24" "0:25" "0:26" "0:27" "0:28" "0:29" "0:30" "0:31"
## [33] "0:32" "0:33" "0:34" "0:35" "0:36" "0:37" "0:38" "0:39"
## [41] "0:40" "0:41" "0:42" "0:43" "0:45" "0:46" "0:47" "0:48"
## [49] "0:49" "0:50" "0:51" "0:52" "0:53" "0:54" "0:55" "0:56"
## [57] "0:57" "0:58" "0:59" "1:00" "1:01" "1:02" "1:03" "1:04"
## [65] "1:05" "1:06" "1:07" "1:08" "1:09" "1:10" "1:11" "1:12"
## [73] "1:13" "1:14" "1:15" "1:16" "1:17" "1:18" "1:19" "1:20"
## [81] "1:21" "1:22" "1:23" "1:25" "1:26" "1:27" "1:28" "1:29"
## [89] "1:30" "1:32" "1:33" "1:34" "1:35" "1:36" "1:37" "1:38"
## [97] "1:39" "1:40" "1:41" "1:42" "1:43" "1:44" "1:45" "1:46"
## [105] "1:47" "1:48" "1:49" "1:50" "1:51" "1:52" "1:53" "1:54"
## [113] "1:55" "1:56" "1:57" "1:58" "1:59" "10:00" "10:01" "10:02"
## [121] "10:03" "10:04" "10:05" "10:06" "10:07" "10:08" "10:09" "10:10"
## [129] "10:11" "10:12" "10:13" "10:14" "10:15" "10:16" "10:17" "10:18"
## [137] "10:19" "10:20" "10:22" "10:23" "10:24" "10:25" "10:26" "10:27"
## [145] "10:28" "10:29" "10:30" "10:31" "10:32" "10:33" "10:34" "10:35"
## [153] "10:36" "10:37" "10:38" "10:39" "10:40" "10:41" "10:42" "10:43"
## [161] "10:44" "10:45" "10:46" "10:47" "10:48" "10:49" "10:50" "10:51"
## [169] "10:52" "10:53" "10:54" "10:55" "10:56" "10:57" "10:58" "10:59"
## [177] "11:00" "11:01" "11:02" "11:03" "11:04" "11:05" "11:06" "11:07"
## [185] "11:08" "11:09" "11:10" "11:11" "11:12" "11:13" "11:14" "11:15"
## [193] "11:16" "11:17" "11:18" "11:19" "11:20" "11:21" "11:22" "11:23"
## [201] "11:24" "11:25" "11:26" "11:27" "11:28" "11:29" "11:30" "11:31"
## [209] "11:32" "11:33" "11:34" "11:35" "11:36" "11:37" "11:38" "11:39"
## [217] "11:40" "11:41" "11:42" "11:43" "11:44" "11:45" "11:46" "11:47"
## [225] "11:48" "11:49" "11:50" "11:51" "11:52" "11:53" "11:54" "11:55"
## [233] "11:56" "11:57" "11:58" "11:59" "12:00" "12:01" "12:02" "12:03"
## [241] "12:04" "12:05" "12:06" "12:07" "12:08" "12:09" "12:10" "12:11"
## [249] "12:12" "12:13" "12:14" "12:15" "12:16" "12:17" "12:18" "12:19"
## [257] "12:20" "12:21" "12:22" "12:23" "12:24" "12:25" "12:26" "12:27"
## [265] "12:28" "12:29" "12:30" "12:31" "12:32" "12:33" "12:34" "12:35"
## [273] "12:36" "12:37" "12:38" "12:39" "12:40" "12:41" "12:42" "12:43"
## [281] "12:44" "12:45" "12:46" "12:47" "12:48" "12:49" "12:50" "12:52"
## [289] "12:53" "12:54" "12:55" "12:56" "12:57" "12:58" "12:59" "13:00"
## [297] "13:01" "13:02" "13:03" "13:04" "13:05" "13:06" "13:07" "13:08"
## [305] "13:09" "13:10" "13:11" "13:12" "13:13" "13:14" "13:15" "13:17"
## [313] "13:18" "13:19" "13:20" "13:21" "13:22" "13:23" "13:24" "13:25"
## [321] "13:26" "13:27" "13:28" "13:29" "13:30" "13:31" "13:32" "13:33"
## [329] "13:34" "13:35" "13:36" "13:37" "13:38" "13:39" "13:40" "13:41"
## [337] "13:42" "13:43" "13:44" "13:45" "13:46" "13:47" "13:48" "13:49"
## [345] "13:50" "13:51" "13:52" "13:53" "13:54" "13:55" "13:56" "13:57"
## [353] "13:58" "13:59" "14:00" "14:01" "14:02" "14:03" "14:04" "14:05"
## [361] "14:06" "14:07" "14:08" "14:09" "14:10" "14:11" "14:12" "14:13"
## [369] "14:14" "14:15" "14:16" "14:17" "14:18" "14:19" "14:20" "14:21"
## [377] "14:22" "14:23" "14:24" "14:25" "14:26" "14:27" "14:28" "14:29"
## [385] "14:30" "14:31" "14:32" "14:33" "14:34" "14:35" "14:36" "14:37"
## [393] "14:38" "14:39" "14:40" "14:41" "14:42" "14:43" "14:44" "14:45"
## [401] "14:46" "14:47" "14:48" "14:49" "14:50" "14:51" "14:52" "14:53"
## [409] "14:54" "14:55" "14:56" "14:57" "14:58" "14:59" "15:00" "15:01"
## [417] "15:02" "15:03" "15:04" "15:05" "15:06" "15:07" "15:08" "15:09"
## [425] "15:10" "15:11" "15:12" "15:13" "15:14" "15:15" "15:16" "15:17"
## [433] "15:18" "15:19" "15:20" "15:21" "15:22" "15:23" "15:24" "15:25"
## [441] "15:26" "15:27" "15:28" "15:29" "15:30" "15:31" "15:32" "15:33"
## [449] "15:34" "15:35" "15:36" "15:37" "15:38" "15:39" "15:40" "15:41"
## [457] "15:42" "15:43" "15:44" "15:45" "15:46" "15:47" "15:48" "15:49"
## [465] "15:50" "15:51" "15:52" "15:53" "15:54" "15:55" "15:56" "15:57"
## [473] "15:58" "15:59" "16:00" "16:01" "16:02" "16:03" "16:04" "16:05"
## [481] "16:06" "16:07" "16:08" "16:09" "16:10" "16:11" "16:12" "16:13"
## [489] "16:14" "16:15" "16:16" "16:17" "16:18" "16:19" "16:20" "16:21"
## [497] "16:22" "16:23" "16:24" "16:25" "16:26" "16:27" "16:28" "16:29"
## [505] "16:30" "16:32" "16:33" "16:34" "16:35" "16:36" "16:37" "16:38"
## [513] "16:39" "16:40" "16:41" "16:42" "16:43" "16:44" "16:45" "16:46"
## [521] "16:47" "16:48" "16:49" "16:50" "16:51" "16:52" "16:53" "16:54"
## [529] "16:55" "16:56" "16:57" "16:58" "16:59" "17:00" "17:01" "17:02"
## [537] "17:03" "17:04" "17:05" "17:06" "17:07" "17:08" "17:09" "17:10"
## [545] "17:11" "17:12" "17:13" "17:14" "17:15" "17:16" "17:17" "17:18"
## [553] "17:19" "17:20" "17:21" "17:22" "17:23" "17:24" "17:25" "17:26"
## [561] "17:27" "17:28" "17:29" "17:30" "17:31" "17:32" "17:33" "17:34"
## [569] "17:35" "17:36" "17:37" "17:38" "17:39" "17:40" "17:41" "17:42"
## [577] "17:43" "17:44" "17:45" "17:46" "17:47" "17:48" "17:49" "17:50"
## [585] "17:51" "17:52" "17:53" "17:54" "17:55" "17:56" "17:57" "17:58"
## [593] "17:59" "18:00" "18:01" "18:02" "18:03" "18:04" "18:05" "18:06"
## [601] "18:07" "18:08" "18:09" "18:10" "18:11" "18:12" "18:13" "18:14"
## [609] "18:15" "18:16" "18:17" "18:18" "18:19" "18:20" "18:21" "18:22"
## [617] "18:23" "18:24" "18:25" "18:26" "18:27" "18:28" "18:29" "18:30"
## [625] "18:31" "18:32" "18:33" "18:34" "18:35" "18:36" "18:37" "18:38"
## [633] "18:39" "18:40" "18:41" "18:42" "18:43" "18:44" "18:45" "18:46"
## [641] "18:47" "18:48" "18:49" "18:50" "18:51" "18:52" "18:53" "18:54"
## [649] "18:55" "18:56" "18:57" "18:58" "18:59" "19:00" "19:01" "19:02"
## [657] "19:03" "19:04" "19:05" "19:06" "19:07" "19:08" "19:09" "19:10"
## [665] "19:11" "19:12" "19:13" "19:14" "19:15" "19:16" "19:17" "19:18"
## [673] "19:19" "19:20" "19:21" "19:22" "19:23" "19:24" "19:25" "19:26"
## [681] "19:27" "19:28" "19:29" "19:30" "19:31" "19:32" "19:33" "19:34"
## [689] "19:35" "19:36" "19:37" "19:38" "19:39" "19:40" "19:41" "19:42"
## [697] "19:43" "19:44" "19:45" "19:46" "19:47" "19:48" "19:49" "19:50"
## [705] "19:51" "19:52" "19:53" "19:54" "19:55" "19:56" "19:57" "19:58"
## [713] "19:59" "2:00" "2:01" "2:03" "2:04" "2:05" "2:06" "2:07"
## [721] "2:08" "2:09" "2:10" "2:11" "2:13" "2:14" "2:15" "2:16"
## [729] "2:17" "2:18" "2:19" "2:20" "2:22" "2:23" "2:24" "2:25"
## [737] "2:26" "2:27" "2:28" "2:29" "2:30" "2:31" "2:32" "2:33"
## [745] "2:34" "2:35" "2:36" "2:38" "2:39" "2:40" "2:41" "2:42"
## [753] "2:43" "2:44" "2:45" "2:46" "2:47" "2:48" "2:49" "2:50"
## [761] "2:51" "2:52" "2:53" "2:54" "2:55" "2:57" "2:58" "2:59"
## [769] "20:00" "20:02" "20:03" "20:04" "20:05" "20:06" "20:07" "20:08"
## [777] "20:09" "20:10" "20:11" "20:12" "20:13" "20:14" "20:15" "20:16"
## [785] "20:17" "20:18" "20:19" "20:20" "20:21" "20:22" "20:23" "20:24"
## [793] "20:25" "20:26" "20:27" "20:28" "20:29" "20:30" "20:31" "20:32"
## [801] "20:33" "20:34" "20:35" "20:36" "20:37" "20:38" "20:40" "20:41"
## [809] "20:42" "20:43" "20:44" "20:45" "20:46" "20:47" "20:48" "20:49"
## [817] "20:50" "20:51" "20:52" "20:53" "20:54" "20:55" "20:56" "20:57"
## [825] "20:58" "20:59" "21:00" "21:01" "21:02" "21:03" "21:04" "21:05"
## [833] "21:06" "21:07" "21:08" "21:09" "21:10" "21:11" "21:12" "21:13"
## [841] "21:14" "21:15" "21:16" "21:17" "21:18" "21:19" "21:20" "21:21"
## [849] "21:22" "21:23" "21:24" "21:25" "21:26" "21:27" "21:28" "21:29"
## [857] "21:30" "21:31" "21:32" "21:33" "21:34" "21:35" "21:36" "21:37"
## [865] "21:38" "21:39" "21:40" "21:41" "21:42" "21:43" "21:44" "21:45"
## [873] "21:46" "21:47" "21:48" "21:49" "21:50" "21:51" "21:52" "21:53"
## [881] "21:54" "21:55" "21:56" "21:57" "21:58" "21:59" "22:00" "22:01"
## [889] "22:02" "22:03" "22:04" "22:05" "22:06" "22:07" "22:08" "22:09"
## [897] "22:10" "22:11" "22:12" "22:13" "22:14" "22:15" "22:16" "22:17"
## [905] "22:18" "22:19" "22:20" "22:21" "22:22" "22:23" "22:24" "22:25"
## [913] "22:26" "22:27" "22:28" "22:29" "22:30" "22:31" "22:33" "22:34"
## [921] "22:35" "22:36" "22:37" "22:38" "22:39" "22:40" "22:41" "22:42"
## [929] "22:43" "22:44" "22:45" "22:46" "22:48" "22:49" "22:50" "22:51"
## [937] "22:52" "22:53" "22:54" "22:55" "22:56" "22:58" "22:59" "23:00"
## [945] "23:01" "23:02" "23:03" "23:04" "23:05" "23:06" "23:07" "23:08"
## [953] "23:09" "23:10" "23:11" "23:12" "23:13" "23:14" "23:15" "23:16"
## [961] "23:17" "23:18" "23:20" "23:21" "23:22" "23:23" "23:24" "23:25"
## [969] "23:26" "23:27" "23:28" "23:29" "23:30" "23:31" "23:32" "23:33"
## [977] "23:34" "23:35" "23:36" "23:37" "23:38" "23:39" "23:40" "23:41"
## [985] "23:42" "23:44" "23:45" "23:46" "23:47" "23:48" "23:49" "23:50"
## [993] "23:51" "23:52" "23:54" "23:55" "23:56" "23:57" "23:58" "23:59"
## [1001] "3:00" "3:01" "3:02" "3:03" "3:04" "3:05" "3:06" "3:07"
## [1009] "3:08" "3:09" "3:10" "3:11" "3:12" "3:13" "3:14" "3:15"
## [1017] "3:16" "3:17" "3:18" "3:19" "3:20" "3:21" "3:22" "3:23"
## [1025] "3:24" "3:25" "3:26" "3:28" "3:30" "3:32" "3:33" "3:34"
## [1033] "3:35" "3:36" "3:37" "3:38" "3:39" "3:40" "3:42" "3:44"
## [1041] "3:45" "3:46" "3:47" "3:48" "3:49" "3:50" "3:52" "3:53"
## [1049] "3:54" "3:55" "3:56" "3:57" "3:58" "3:59" "4:00" "4:01"
## [1057] "4:02" "4:04" "4:05" "4:06" "4:07" "4:08" "4:09" "4:10"
## [1065] "4:11" "4:12" "4:13" "4:14" "4:15" "4:16" "4:18" "4:19"
## [1073] "4:20" "4:21" "4:22" "4:24" "4:25" "4:27" "4:28" "4:30"
## [1081] "4:33" "4:34" "4:35" "4:36" "4:37" "4:38" "4:40" "4:41"
## [1089] "4:42" "4:43" "4:44" "4:45" "4:46" "4:47" "4:49" "4:50"
## [1097] "4:51" "4:52" "4:54" "4:55" "4:56" "4:58" "5:00" "5:02"
## [1105] "5:03" "5:04" "5:05" "5:06" "5:07" "5:08" "5:09" "5:10"
## [1113] "5:11" "5:12" "5:13" "5:14" "5:15" "5:16" "5:17" "5:18"
## [1121] "5:19" "5:20" "5:21" "5:24" "5:25" "5:26" "5:28" "5:29"
## [1129] "5:30" "5:31" "5:32" "5:33" "5:34" "5:35" "5:36" "5:37"
## [1137] "5:38" "5:39" "5:40" "5:41" "5:42" "5:43" "5:44" "5:45"
## [1145] "5:46" "5:47" "5:48" "5:50" "5:51" "5:52" "5:53" "5:54"
## [1153] "5:55" "5:56" "5:58" "5:59" "6:00" "6:01" "6:02" "6:03"
## [1161] "6:04" "6:05" "6:06" "6:07" "6:08" "6:09" "6:10" "6:11"
## [1169] "6:12" "6:13" "6:14" "6:15" "6:16" "6:17" "6:18" "6:19"
## [1177] "6:20" "6:21" "6:22" "6:23" "6:24" "6:25" "6:26" "6:27"
## [1185] "6:28" "6:29" "6:30" "6:31" "6:32" "6:33" "6:34" "6:35"
## [1193] "6:36" "6:37" "6:38" "6:39" "6:40" "6:41" "6:42" "6:43"
## [1201] "6:44" "6:45" "6:46" "6:47" "6:48" "6:49" "6:50" "6:51"
## [1209] "6:52" "6:53" "6:54" "6:55" "6:56" "6:57" "6:58" "6:59"
## [1217] "7:00" "7:02" "7:03" "7:04" "7:05" "7:06" "7:07" "7:08"
## [1225] "7:09" "7:10" "7:11" "7:12" "7:13" "7:15" "7:16" "7:17"
## [1233] "7:18" "7:19" "7:20" "7:21" "7:22" "7:23" "7:24" "7:25"
## [1241] "7:26" "7:27" "7:28" "7:29" "7:30" "7:31" "7:32" "7:33"
## [1249] "7:34" "7:35" "7:36" "7:37" "7:38" "7:39" "7:40" "7:41"
## [1257] "7:42" "7:43" "7:44" "7:45" "7:46" "7:47" "7:48" "7:49"
## [1265] "7:50" "7:51" "7:52" "7:53" "7:54" "7:55" "7:56" "7:57"
## [1273] "7:58" "7:59" "8:00" "8:01" "8:02" "8:03" "8:04" "8:05"
## [1281] "8:06" "8:07" "8:08" "8:09" "8:10" "8:11" "8:12" "8:13"
## [1289] "8:14" "8:15" "8:16" "8:17" "8:18" "8:19" "8:20" "8:21"
## [1297] "8:22" "8:23" "8:24" "8:25" "8:26" "8:27" "8:28" "8:29"
## [1305] "8:30" "8:31" "8:32" "8:33" "8:34" "8:35" "8:36" "8:37"
## [1313] "8:38" "8:40" "8:41" "8:42" "8:43" "8:44" "8:45" "8:46"
## [1321] "8:47" "8:48" "8:49" "8:50" "8:51" "8:52" "8:53" "8:54"
## [1329] "8:55" "8:56" "8:57" "8:58" "8:59" "9:00" "9:01" "9:02"
## [1337] "9:03" "9:04" "9:05" "9:06" "9:07" "9:08" "9:09" "9:10"
## [1345] "9:11" "9:12" "9:13" "9:14" "9:15" "9:16" "9:17" "9:18"
## [1353] "9:19" "9:20" "9:21" "9:22" "9:23" "9:24" "9:25" "9:26"
## [1361] "9:27" "9:28" "9:29" "9:30" "9:31" "9:32" "9:33" "9:34"
## [1369] "9:35" "9:36" "9:37" "9:38" "9:39" "9:40" "9:41" "9:42"
## [1377] "9:43" "9:44" "9:45" "9:46" "9:47" "9:48" "9:49" "9:50"
## [1385] "9:51" "9:52" "9:53" "9:54" "9:55" "9:56" "9:57" "9:58"
## [1393] "9:59"
RoadFatalitiesfebruary2018$`Crash Type`<- as.factor(RoadFatalitiesfebruary2018$`Crash Type`)
levels(RoadFatalitiesfebruary2018$`Crash Type`)
## [1] "Multiple vehicle" "Pedestrian" "Single vehicle"
RoadFatalitiesfebruary2018$`Bus
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Bus
Involvement`)
levels(RoadFatalitiesfebruary2018$`Bus
Involvement`)
## [1] "-9" "No" "Yes"
RoadFatalitiesfebruary2018$`Rigid Truck
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Rigid Truck
Involvement`)
levels(RoadFatalitiesfebruary2018$`Rigid Truck
Involvement`)
## [1] "-9" "No" "Yes"
RoadFatalitiesfebruary2018$`Articulated Truck
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Articulated Truck
Involvement`)
levels(RoadFatalitiesfebruary2018$`Articulated Truck
Involvement`)
## [1] "-9" "No" "Yes"
RoadFatalitiesfebruary2018$`Speed Limit`<- as.factor(RoadFatalitiesfebruary2018$`Speed Limit`)
levels(RoadFatalitiesfebruary2018$`Speed Limit`)
## [1] "-9" "8" "10" "20" "25" "30" "40" "45" "50" "60" "70"
## [12] "75" "80" "90" "100" "110" "130" "888" "900"
RoadFatalitiesfebruary2018$`Speed Limit`<- as.factor(RoadFatalitiesfebruary2018$`Speed Limit`)
levels(RoadFatalitiesfebruary2018$`Speed Limit`)
## [1] "-9" "8" "10" "20" "25" "30" "40" "45" "50" "60" "70"
## [12] "75" "80" "90" "100" "110" "130" "888" "900"
RoadFatalitiesfebruary2018$`Road User`<- as.factor(RoadFatalitiesfebruary2018$`Road User`)
levels(RoadFatalitiesfebruary2018$`Road User`)
## [1] "9"
## [2] "Bicyclist (includes pillion passengers)"
## [3] "Driver"
## [4] "Motorcycle pillion passenger"
## [5] "Motorcycle rider"
## [6] "Other"
## [7] "Passenger"
## [8] "Pedestrian"
RoadFatalitiesfebruary2018$Gender<- as.factor(RoadFatalitiesfebruary2018$Gender)
levels(RoadFatalitiesfebruary2018$Gender)
## [1] "-9" "Female" "Male" "Unspecified"
RoadFatalitiesfebruary2018$Age<- as.factor(RoadFatalitiesfebruary2018$Age)
levels(RoadFatalitiesfebruary2018$Age)
## [1] "-9" "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
## [12] "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20"
## [23] "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" "31"
## [34] "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [45] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53"
## [56] "54" "55" "56" "57" "58" "59" "60" "61" "62" "63" "64"
## [67] "65" "66" "67" "68" "69" "70" "71" "72" "73" "74" "75"
## [78] "76" "77" "78" "79" "80" "81" "82" "83" "84" "85" "86"
## [89] "87" "88" "89" "90" "91" "92" "93" "94" "95" "96" "97"
## [100] "98" "99" "100" "101"
names(RoadFatalitiesfebruary2018)
## [1] "Crash ID" "State"
## [3] "Month" "Year"
## [5] "Dayweek" "Time"
## [7] "Crash Type" "Bus \nInvolvement"
## [9] "Rigid Truck \nInvolvement" "Articulated Truck \nInvolvement"
## [11] "Speed Limit" "Road User"
## [13] "Gender" "Age"
##Changing Col names##
colnames(RoadFatalitiesfebruary2018)[which(names(RoadFatalitiesfebruary2018) == "Crash ID")] <- "Accident_ID"
colnames(RoadFatalitiesfebruary2018)
## [1] "Accident_ID" "State"
## [3] "Month" "Year"
## [5] "Dayweek" "Time"
## [7] "Crash Type" "Bus \nInvolvement"
## [9] "Rigid Truck \nInvolvement" "Articulated Truck \nInvolvement"
## [11] "Speed Limit" "Road User"
## [13] "Gender" "Age"
#Checked the combined Vector list##
vect_list<- list(RoadFatalitiesfebruary2018$`Crash ID`,RoadFatalitiesfebruary2018$State,RoadFatalitiesfebruary2018$Month,RoadFatalitiesfebruary2018$Year,RoadFatalitiesfebruary2018$Dayweek,RoadFatalitiesfebruary2018$Time,RoadFatalitiesfebruary2018$`Crash Type`,RoadFatalitiesfebruary2018$`Crash Type`,RoadFatalitiesfebruary2018$`Bus
Involvement`)
## Warning: Unknown or uninitialised column: 'Crash ID'.
str(vect_list)
## List of 9
## $ : NULL
## $ : Factor w/ 8 levels "ACT","NSW","NT",..: 4 8 2 7 4 2 8 6 2 2 ...
## $ : Factor w/ 12 levels "April","August",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ : Factor w/ 30 levels "1989","1990",..: 30 30 30 30 30 30 30 30 30 30 ...
## $ : Factor w/ 7 levels "Friday","Monday",..: 6 6 2 2 2 4 4 4 3 3 ...
## $ : Factor w/ 1393 levels "-9","0:00","0:01",..: 654 505 257 882 296 315 257 796 60 1257 ...
## $ : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
## $ : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
## $ : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
##This is a chunk to subset the first 10 rows and converting it into Matrix##
df<-RoadFatalitiesfebruary2018[c(1:10),c(1:14)]
df
## # A tibble: 10 x 14
## Accident_ID State Month Year Dayweek Time `Crash Type`
## <chr> <fct> <fct> <fct> <fct> <fct> <fct>
## 1 201830006 QLD February 2018 Tuesday 19:00 Single vehicle
## 2 201850008 WA February 2018 Tuesday 16:30 Single vehicle
## 3 201810056 NSW February 2018 Monday 12:20 Pedestrian
## 4 201820024 VIC February 2018 Monday 21:55 Pedestrian
## 5 201830032 QLD February 2018 Monday 13:00 Multiple vehicle
## 6 201810012 NSW February 2018 Sunday 13:20 Single vehicle
## 7 201850016 WA February 2018 Sunday 12:20 Single vehicle
## 8 201860008 TAS February 2018 Sunday 20:28 Multiple vehicle
## 9 201810034 NSW February 2018 Saturday 1:00 Single vehicle
## 10 201810040 NSW February 2018 Saturday 7:42 Multiple vehicle
## # ... with 7 more variables: `Bus \nInvolvement` <fct>, `Rigid Truck
## # \nInvolvement` <fct>, `Articulated Truck \nInvolvement` <fct>, `Speed
## # Limit` <fct>, `Road User` <fct>, Gender <fct>, Age <fct>
##Converting data to matrix format##
df_matrix<- matrix(df)
df_matrix
## [,1]
## [1,] Character,10
## [2,] factor,10
## [3,] factor,10
## [4,] factor,10
## [5,] factor,10
## [6,] factor,10
## [7,] factor,10
## [8,] factor,10
## [9,] factor,10
## [10,] factor,10
## [11,] factor,10
## [12,] factor,10
## [13,] factor,10
## [14,] factor,10
##underlying structure of matrix##
attributes(df)
## $names
## [1] "Accident_ID" "State"
## [3] "Month" "Year"
## [5] "Dayweek" "Time"
## [7] "Crash Type" "Bus \nInvolvement"
## [9] "Rigid Truck \nInvolvement" "Articulated Truck \nInvolvement"
## [11] "Speed Limit" "Road User"
## [13] "Gender" "Age"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $class
## [1] "tbl_df" "tbl" "data.frame"
##Checking Matrix using column bind##
mcbd <- cbind(df$Age,df$`Speed Limit`)
mcbd
## [,1] [,2]
## [1,] 19 13
## [2,] 28 11
## [3,] 87 9
## [4,] 27 1
## [5,] 64 10
## [6,] 65 15
## [7,] 96 10
## [8,] 38 9
## [9,] 36 15
## [10,] 19 15
##Checking Attributes##
attributes(mcbd)
## $dim
## [1] 10 2
##Checking Matrix using row bind##
mrbd <- rbind(df$Age, df$`Speed Limit`)
mrbd
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 19 28 87 27 64 65 96 38 36 19
## [2,] 13 11 9 1 10 15 10 9 15 15
##Checking Attributes##
attributes(mrbd)
## $dim
## [1] 2 10
##Inspecting the Data Frame##
data_frame1 <- data.frame (RoadFatalitiesfebruary2018, stringsAsFactors = FALSE)
str(data_frame1)
## 'data.frame': 48782 obs. of 14 variables:
## $ Accident_ID : chr "201830006" "201850008" "201810056" "201820024" ...
## $ State : Factor w/ 8 levels "ACT","NSW","NT",..: 4 8 2 7 4 2 8 6 2 2 ...
## $ Month : Factor w/ 12 levels "April","August",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Year : Factor w/ 30 levels "1989","1990",..: 30 30 30 30 30 30 30 30 30 30 ...
## $ Dayweek : Factor w/ 7 levels "Friday","Monday",..: 6 6 2 2 2 4 4 4 3 3 ...
## $ Time : Factor w/ 1393 levels "-9","0:00","0:01",..: 654 505 257 882 296 315 257 796 60 1257 ...
## $ Crash.Type : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
## $ Bus..Involvement : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
## $ Rigid.Truck..Involvement : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
## $ Articulated.Truck..Involvement: Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
## $ Speed.Limit : Factor w/ 19 levels "-9","8","10",..: 13 11 9 1 10 15 10 9 15 15 ...
## $ Road.User : Factor w/ 8 levels "9","Bicyclist (includes pillion passengers)",..: 3 3 8 8 5 3 3 5 3 3 ...
## $ Gender : Factor w/ 4 levels "-9","Female",..: 3 3 3 3 3 3 2 3 3 3 ...
## $ Age : Factor w/ 103 levels "-9","0","1","2",..: 19 28 87 27 64 65 96 38 36 19 ...
##Subsetting the data frame into first and last variable##
RoadFatalitiesfebruary2018[, c(1,14)]
## # A tibble: 48,782 x 2
## Accident_ID Age
## <chr> <fct>
## 1 201830006 17
## 2 201850008 26
## 3 201810056 85
## 4 201820024 25
## 5 201830032 62
## 6 201810012 63
## 7 201850016 94
## 8 201860008 36
## 9 201810034 34
## 10 201810040 17
## # ... with 48,772 more rows
##Saving in as R object(.RData)
saveRDS(RoadFatalitiesfebruary2018, file = "my_data.RData")