sx— title: “MATH2349 Semester 1, 2018” author: “Mohammad Hanif Razzak (s3650497)” subtitle: Assignment 1 output: html_notebook: default —

Setup

library(readr)
library(readxl)
library(foreign)
library(RODBC)
library(rvest)
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding

Data Description

Read/Import Data

##Reading Data into R and deleting the First Row##
getwd()
## [1] "/Users/mohammadrazzak/Documents/University/RMIT/data preprocessing/assignment1"
library(readr)
bitrearddfatalitiesfebruary2018 <-read_csv("bitrearddfatalitiesfebruary2018.csv")
## Parsed with column specification:
## cols(
##   `Crash ID` = col_character(),
##   State = col_character(),
##   Month = col_character(),
##   Year = col_character(),
##   Dayweek = col_character(),
##   Time = col_character(),
##   `Crash Type` = col_character(),
##   `Bus 
## Involvement` = col_character(),
##   `Rigid Truck 
## Involvement` = col_character(),
##   `Articulated Truck 
## Involvement` = col_character(),
##   `Speed Limit` = col_integer(),
##   `Road User` = col_character(),
##   Gender = col_character(),
##   Age = col_integer()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 2)
## Warning: 1 parsing failure.
## row # A tibble: 1 x 5 col     row col   expected   actual    file                                  expected   <int> <chr> <chr>      <chr>     <chr>                                 actual 1     1 <NA>  14 columns 7 columns 'bitrearddfatalitiesfebruary2018.csv' file # A tibble: 1 x 5
View(bitrearddfatalitiesfebruary2018)
RoadFatalitiesfebruary2018 <- bitrearddfatalitiesfebruary2018[-1,]
View(RoadFatalitiesfebruary2018)

##View data class and head##

class(RoadFatalitiesfebruary2018)
## [1] "tbl_df"     "tbl"        "data.frame"
head(class)
##                      
## 1 .Primitive("class")
head(RoadFatalitiesfebruary2018)
## # A tibble: 6 x 14
##   `Crash ID` State Month Year  Dayweek Time  `Crash Type` `Bus \nInvolvem…
##   <chr>      <chr> <chr> <chr> <chr>   <chr> <chr>        <chr>           
## 1 201830006  QLD   Febr… 2018  Tuesday 19:00 Single vehi… No              
## 2 201850008  WA    Febr… 2018  Tuesday 16:30 Single vehi… No              
## 3 201810056  NSW   Febr… 2018  Monday  12:20 Pedestrian   No              
## 4 201820024  VIC   Febr… 2018  Monday  21:55 Pedestrian   -9              
## 5 201830032  QLD   Febr… 2018  Monday  13:00 Multiple ve… No              
## 6 201810012  NSW   Febr… 2018  Sunday  13:20 Single vehi… No              
## # ... with 6 more variables: `Rigid Truck \nInvolvement` <chr>,
## #   `Articulated Truck \nInvolvement` <chr>, `Speed Limit` <int>, `Road
## #   User` <chr>, Gender <chr>, Age <int>

Inspect and Understand

##View the data dimension##

dim(RoadFatalitiesfebruary2018)
## [1] 48782    14
##Check Double Variable##

dbl_var <-(RoadFatalitiesfebruary2018)
typeof(dbl_var)
## [1] "list"
##Checking Datatpes of Variables##

class(RoadFatalitiesfebruary2018$Crash.Type)
## Warning: Unknown or uninitialised column: 'Crash.Type'.
## [1] "NULL"
class(RoadFatalitiesfebruary2018$Crash.ID)
## Warning: Unknown or uninitialised column: 'Crash.ID'.
## [1] "NULL"
class(RoadFatalitiesfebruary2018$Month)
## [1] "character"
class(RoadFatalitiesfebruary2018$Year)
## [1] "character"
class(RoadFatalitiesfebruary2018$State)
## [1] "character"
class(RoadFatalitiesfebruary2018$Dayweek)
## [1] "character"
class(RoadFatalitiesfebruary2018$Time)
## [1] "character"
class(RoadFatalitiesfebruary2018$Bus..Involvement)
## Warning: Unknown or uninitialised column: 'Bus..Involvement'.
## [1] "NULL"
##Checking the levels of Factor Variable##


RoadFatalitiesfebruary2018$State<- as.factor(RoadFatalitiesfebruary2018$State)
levels(RoadFatalitiesfebruary2018$State)
## [1] "ACT" "NSW" "NT"  "QLD" "SA"  "TAS" "VIC" "WA"
RoadFatalitiesfebruary2018$Month<- as.factor(RoadFatalitiesfebruary2018$Month)
levels(RoadFatalitiesfebruary2018$Month)
##  [1] "April"     "August"    "December"  "February"  "January"  
##  [6] "July"      "June"      "March"     "May"       "November" 
## [11] "October"   "September"
RoadFatalitiesfebruary2018$Year<- as.factor(RoadFatalitiesfebruary2018$Year)
levels(RoadFatalitiesfebruary2018$Year)
##  [1] "1989" "1990" "1991" "1992" "1993" "1994" "1995" "1996" "1997" "1998"
## [11] "1999" "2000" "2001" "2002" "2003" "2004" "2005" "2006" "2007" "2008"
## [21] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
RoadFatalitiesfebruary2018$Dayweek<- as.factor(RoadFatalitiesfebruary2018$Dayweek)
levels(RoadFatalitiesfebruary2018$Dayweek)
## [1] "Friday"    "Monday"    "Saturday"  "Sunday"    "Thursday"  "Tuesday"  
## [7] "Wednesday"
RoadFatalitiesfebruary2018$Time<- as.factor(RoadFatalitiesfebruary2018$Time)
levels(RoadFatalitiesfebruary2018$Time)
##    [1] "-9"    "0:00"  "0:01"  "0:02"  "0:03"  "0:04"  "0:05"  "0:06" 
##    [9] "0:07"  "0:08"  "0:09"  "0:10"  "0:11"  "0:12"  "0:13"  "0:14" 
##   [17] "0:15"  "0:16"  "0:17"  "0:18"  "0:20"  "0:21"  "0:22"  "0:23" 
##   [25] "0:24"  "0:25"  "0:26"  "0:27"  "0:28"  "0:29"  "0:30"  "0:31" 
##   [33] "0:32"  "0:33"  "0:34"  "0:35"  "0:36"  "0:37"  "0:38"  "0:39" 
##   [41] "0:40"  "0:41"  "0:42"  "0:43"  "0:45"  "0:46"  "0:47"  "0:48" 
##   [49] "0:49"  "0:50"  "0:51"  "0:52"  "0:53"  "0:54"  "0:55"  "0:56" 
##   [57] "0:57"  "0:58"  "0:59"  "1:00"  "1:01"  "1:02"  "1:03"  "1:04" 
##   [65] "1:05"  "1:06"  "1:07"  "1:08"  "1:09"  "1:10"  "1:11"  "1:12" 
##   [73] "1:13"  "1:14"  "1:15"  "1:16"  "1:17"  "1:18"  "1:19"  "1:20" 
##   [81] "1:21"  "1:22"  "1:23"  "1:25"  "1:26"  "1:27"  "1:28"  "1:29" 
##   [89] "1:30"  "1:32"  "1:33"  "1:34"  "1:35"  "1:36"  "1:37"  "1:38" 
##   [97] "1:39"  "1:40"  "1:41"  "1:42"  "1:43"  "1:44"  "1:45"  "1:46" 
##  [105] "1:47"  "1:48"  "1:49"  "1:50"  "1:51"  "1:52"  "1:53"  "1:54" 
##  [113] "1:55"  "1:56"  "1:57"  "1:58"  "1:59"  "10:00" "10:01" "10:02"
##  [121] "10:03" "10:04" "10:05" "10:06" "10:07" "10:08" "10:09" "10:10"
##  [129] "10:11" "10:12" "10:13" "10:14" "10:15" "10:16" "10:17" "10:18"
##  [137] "10:19" "10:20" "10:22" "10:23" "10:24" "10:25" "10:26" "10:27"
##  [145] "10:28" "10:29" "10:30" "10:31" "10:32" "10:33" "10:34" "10:35"
##  [153] "10:36" "10:37" "10:38" "10:39" "10:40" "10:41" "10:42" "10:43"
##  [161] "10:44" "10:45" "10:46" "10:47" "10:48" "10:49" "10:50" "10:51"
##  [169] "10:52" "10:53" "10:54" "10:55" "10:56" "10:57" "10:58" "10:59"
##  [177] "11:00" "11:01" "11:02" "11:03" "11:04" "11:05" "11:06" "11:07"
##  [185] "11:08" "11:09" "11:10" "11:11" "11:12" "11:13" "11:14" "11:15"
##  [193] "11:16" "11:17" "11:18" "11:19" "11:20" "11:21" "11:22" "11:23"
##  [201] "11:24" "11:25" "11:26" "11:27" "11:28" "11:29" "11:30" "11:31"
##  [209] "11:32" "11:33" "11:34" "11:35" "11:36" "11:37" "11:38" "11:39"
##  [217] "11:40" "11:41" "11:42" "11:43" "11:44" "11:45" "11:46" "11:47"
##  [225] "11:48" "11:49" "11:50" "11:51" "11:52" "11:53" "11:54" "11:55"
##  [233] "11:56" "11:57" "11:58" "11:59" "12:00" "12:01" "12:02" "12:03"
##  [241] "12:04" "12:05" "12:06" "12:07" "12:08" "12:09" "12:10" "12:11"
##  [249] "12:12" "12:13" "12:14" "12:15" "12:16" "12:17" "12:18" "12:19"
##  [257] "12:20" "12:21" "12:22" "12:23" "12:24" "12:25" "12:26" "12:27"
##  [265] "12:28" "12:29" "12:30" "12:31" "12:32" "12:33" "12:34" "12:35"
##  [273] "12:36" "12:37" "12:38" "12:39" "12:40" "12:41" "12:42" "12:43"
##  [281] "12:44" "12:45" "12:46" "12:47" "12:48" "12:49" "12:50" "12:52"
##  [289] "12:53" "12:54" "12:55" "12:56" "12:57" "12:58" "12:59" "13:00"
##  [297] "13:01" "13:02" "13:03" "13:04" "13:05" "13:06" "13:07" "13:08"
##  [305] "13:09" "13:10" "13:11" "13:12" "13:13" "13:14" "13:15" "13:17"
##  [313] "13:18" "13:19" "13:20" "13:21" "13:22" "13:23" "13:24" "13:25"
##  [321] "13:26" "13:27" "13:28" "13:29" "13:30" "13:31" "13:32" "13:33"
##  [329] "13:34" "13:35" "13:36" "13:37" "13:38" "13:39" "13:40" "13:41"
##  [337] "13:42" "13:43" "13:44" "13:45" "13:46" "13:47" "13:48" "13:49"
##  [345] "13:50" "13:51" "13:52" "13:53" "13:54" "13:55" "13:56" "13:57"
##  [353] "13:58" "13:59" "14:00" "14:01" "14:02" "14:03" "14:04" "14:05"
##  [361] "14:06" "14:07" "14:08" "14:09" "14:10" "14:11" "14:12" "14:13"
##  [369] "14:14" "14:15" "14:16" "14:17" "14:18" "14:19" "14:20" "14:21"
##  [377] "14:22" "14:23" "14:24" "14:25" "14:26" "14:27" "14:28" "14:29"
##  [385] "14:30" "14:31" "14:32" "14:33" "14:34" "14:35" "14:36" "14:37"
##  [393] "14:38" "14:39" "14:40" "14:41" "14:42" "14:43" "14:44" "14:45"
##  [401] "14:46" "14:47" "14:48" "14:49" "14:50" "14:51" "14:52" "14:53"
##  [409] "14:54" "14:55" "14:56" "14:57" "14:58" "14:59" "15:00" "15:01"
##  [417] "15:02" "15:03" "15:04" "15:05" "15:06" "15:07" "15:08" "15:09"
##  [425] "15:10" "15:11" "15:12" "15:13" "15:14" "15:15" "15:16" "15:17"
##  [433] "15:18" "15:19" "15:20" "15:21" "15:22" "15:23" "15:24" "15:25"
##  [441] "15:26" "15:27" "15:28" "15:29" "15:30" "15:31" "15:32" "15:33"
##  [449] "15:34" "15:35" "15:36" "15:37" "15:38" "15:39" "15:40" "15:41"
##  [457] "15:42" "15:43" "15:44" "15:45" "15:46" "15:47" "15:48" "15:49"
##  [465] "15:50" "15:51" "15:52" "15:53" "15:54" "15:55" "15:56" "15:57"
##  [473] "15:58" "15:59" "16:00" "16:01" "16:02" "16:03" "16:04" "16:05"
##  [481] "16:06" "16:07" "16:08" "16:09" "16:10" "16:11" "16:12" "16:13"
##  [489] "16:14" "16:15" "16:16" "16:17" "16:18" "16:19" "16:20" "16:21"
##  [497] "16:22" "16:23" "16:24" "16:25" "16:26" "16:27" "16:28" "16:29"
##  [505] "16:30" "16:32" "16:33" "16:34" "16:35" "16:36" "16:37" "16:38"
##  [513] "16:39" "16:40" "16:41" "16:42" "16:43" "16:44" "16:45" "16:46"
##  [521] "16:47" "16:48" "16:49" "16:50" "16:51" "16:52" "16:53" "16:54"
##  [529] "16:55" "16:56" "16:57" "16:58" "16:59" "17:00" "17:01" "17:02"
##  [537] "17:03" "17:04" "17:05" "17:06" "17:07" "17:08" "17:09" "17:10"
##  [545] "17:11" "17:12" "17:13" "17:14" "17:15" "17:16" "17:17" "17:18"
##  [553] "17:19" "17:20" "17:21" "17:22" "17:23" "17:24" "17:25" "17:26"
##  [561] "17:27" "17:28" "17:29" "17:30" "17:31" "17:32" "17:33" "17:34"
##  [569] "17:35" "17:36" "17:37" "17:38" "17:39" "17:40" "17:41" "17:42"
##  [577] "17:43" "17:44" "17:45" "17:46" "17:47" "17:48" "17:49" "17:50"
##  [585] "17:51" "17:52" "17:53" "17:54" "17:55" "17:56" "17:57" "17:58"
##  [593] "17:59" "18:00" "18:01" "18:02" "18:03" "18:04" "18:05" "18:06"
##  [601] "18:07" "18:08" "18:09" "18:10" "18:11" "18:12" "18:13" "18:14"
##  [609] "18:15" "18:16" "18:17" "18:18" "18:19" "18:20" "18:21" "18:22"
##  [617] "18:23" "18:24" "18:25" "18:26" "18:27" "18:28" "18:29" "18:30"
##  [625] "18:31" "18:32" "18:33" "18:34" "18:35" "18:36" "18:37" "18:38"
##  [633] "18:39" "18:40" "18:41" "18:42" "18:43" "18:44" "18:45" "18:46"
##  [641] "18:47" "18:48" "18:49" "18:50" "18:51" "18:52" "18:53" "18:54"
##  [649] "18:55" "18:56" "18:57" "18:58" "18:59" "19:00" "19:01" "19:02"
##  [657] "19:03" "19:04" "19:05" "19:06" "19:07" "19:08" "19:09" "19:10"
##  [665] "19:11" "19:12" "19:13" "19:14" "19:15" "19:16" "19:17" "19:18"
##  [673] "19:19" "19:20" "19:21" "19:22" "19:23" "19:24" "19:25" "19:26"
##  [681] "19:27" "19:28" "19:29" "19:30" "19:31" "19:32" "19:33" "19:34"
##  [689] "19:35" "19:36" "19:37" "19:38" "19:39" "19:40" "19:41" "19:42"
##  [697] "19:43" "19:44" "19:45" "19:46" "19:47" "19:48" "19:49" "19:50"
##  [705] "19:51" "19:52" "19:53" "19:54" "19:55" "19:56" "19:57" "19:58"
##  [713] "19:59" "2:00"  "2:01"  "2:03"  "2:04"  "2:05"  "2:06"  "2:07" 
##  [721] "2:08"  "2:09"  "2:10"  "2:11"  "2:13"  "2:14"  "2:15"  "2:16" 
##  [729] "2:17"  "2:18"  "2:19"  "2:20"  "2:22"  "2:23"  "2:24"  "2:25" 
##  [737] "2:26"  "2:27"  "2:28"  "2:29"  "2:30"  "2:31"  "2:32"  "2:33" 
##  [745] "2:34"  "2:35"  "2:36"  "2:38"  "2:39"  "2:40"  "2:41"  "2:42" 
##  [753] "2:43"  "2:44"  "2:45"  "2:46"  "2:47"  "2:48"  "2:49"  "2:50" 
##  [761] "2:51"  "2:52"  "2:53"  "2:54"  "2:55"  "2:57"  "2:58"  "2:59" 
##  [769] "20:00" "20:02" "20:03" "20:04" "20:05" "20:06" "20:07" "20:08"
##  [777] "20:09" "20:10" "20:11" "20:12" "20:13" "20:14" "20:15" "20:16"
##  [785] "20:17" "20:18" "20:19" "20:20" "20:21" "20:22" "20:23" "20:24"
##  [793] "20:25" "20:26" "20:27" "20:28" "20:29" "20:30" "20:31" "20:32"
##  [801] "20:33" "20:34" "20:35" "20:36" "20:37" "20:38" "20:40" "20:41"
##  [809] "20:42" "20:43" "20:44" "20:45" "20:46" "20:47" "20:48" "20:49"
##  [817] "20:50" "20:51" "20:52" "20:53" "20:54" "20:55" "20:56" "20:57"
##  [825] "20:58" "20:59" "21:00" "21:01" "21:02" "21:03" "21:04" "21:05"
##  [833] "21:06" "21:07" "21:08" "21:09" "21:10" "21:11" "21:12" "21:13"
##  [841] "21:14" "21:15" "21:16" "21:17" "21:18" "21:19" "21:20" "21:21"
##  [849] "21:22" "21:23" "21:24" "21:25" "21:26" "21:27" "21:28" "21:29"
##  [857] "21:30" "21:31" "21:32" "21:33" "21:34" "21:35" "21:36" "21:37"
##  [865] "21:38" "21:39" "21:40" "21:41" "21:42" "21:43" "21:44" "21:45"
##  [873] "21:46" "21:47" "21:48" "21:49" "21:50" "21:51" "21:52" "21:53"
##  [881] "21:54" "21:55" "21:56" "21:57" "21:58" "21:59" "22:00" "22:01"
##  [889] "22:02" "22:03" "22:04" "22:05" "22:06" "22:07" "22:08" "22:09"
##  [897] "22:10" "22:11" "22:12" "22:13" "22:14" "22:15" "22:16" "22:17"
##  [905] "22:18" "22:19" "22:20" "22:21" "22:22" "22:23" "22:24" "22:25"
##  [913] "22:26" "22:27" "22:28" "22:29" "22:30" "22:31" "22:33" "22:34"
##  [921] "22:35" "22:36" "22:37" "22:38" "22:39" "22:40" "22:41" "22:42"
##  [929] "22:43" "22:44" "22:45" "22:46" "22:48" "22:49" "22:50" "22:51"
##  [937] "22:52" "22:53" "22:54" "22:55" "22:56" "22:58" "22:59" "23:00"
##  [945] "23:01" "23:02" "23:03" "23:04" "23:05" "23:06" "23:07" "23:08"
##  [953] "23:09" "23:10" "23:11" "23:12" "23:13" "23:14" "23:15" "23:16"
##  [961] "23:17" "23:18" "23:20" "23:21" "23:22" "23:23" "23:24" "23:25"
##  [969] "23:26" "23:27" "23:28" "23:29" "23:30" "23:31" "23:32" "23:33"
##  [977] "23:34" "23:35" "23:36" "23:37" "23:38" "23:39" "23:40" "23:41"
##  [985] "23:42" "23:44" "23:45" "23:46" "23:47" "23:48" "23:49" "23:50"
##  [993] "23:51" "23:52" "23:54" "23:55" "23:56" "23:57" "23:58" "23:59"
## [1001] "3:00"  "3:01"  "3:02"  "3:03"  "3:04"  "3:05"  "3:06"  "3:07" 
## [1009] "3:08"  "3:09"  "3:10"  "3:11"  "3:12"  "3:13"  "3:14"  "3:15" 
## [1017] "3:16"  "3:17"  "3:18"  "3:19"  "3:20"  "3:21"  "3:22"  "3:23" 
## [1025] "3:24"  "3:25"  "3:26"  "3:28"  "3:30"  "3:32"  "3:33"  "3:34" 
## [1033] "3:35"  "3:36"  "3:37"  "3:38"  "3:39"  "3:40"  "3:42"  "3:44" 
## [1041] "3:45"  "3:46"  "3:47"  "3:48"  "3:49"  "3:50"  "3:52"  "3:53" 
## [1049] "3:54"  "3:55"  "3:56"  "3:57"  "3:58"  "3:59"  "4:00"  "4:01" 
## [1057] "4:02"  "4:04"  "4:05"  "4:06"  "4:07"  "4:08"  "4:09"  "4:10" 
## [1065] "4:11"  "4:12"  "4:13"  "4:14"  "4:15"  "4:16"  "4:18"  "4:19" 
## [1073] "4:20"  "4:21"  "4:22"  "4:24"  "4:25"  "4:27"  "4:28"  "4:30" 
## [1081] "4:33"  "4:34"  "4:35"  "4:36"  "4:37"  "4:38"  "4:40"  "4:41" 
## [1089] "4:42"  "4:43"  "4:44"  "4:45"  "4:46"  "4:47"  "4:49"  "4:50" 
## [1097] "4:51"  "4:52"  "4:54"  "4:55"  "4:56"  "4:58"  "5:00"  "5:02" 
## [1105] "5:03"  "5:04"  "5:05"  "5:06"  "5:07"  "5:08"  "5:09"  "5:10" 
## [1113] "5:11"  "5:12"  "5:13"  "5:14"  "5:15"  "5:16"  "5:17"  "5:18" 
## [1121] "5:19"  "5:20"  "5:21"  "5:24"  "5:25"  "5:26"  "5:28"  "5:29" 
## [1129] "5:30"  "5:31"  "5:32"  "5:33"  "5:34"  "5:35"  "5:36"  "5:37" 
## [1137] "5:38"  "5:39"  "5:40"  "5:41"  "5:42"  "5:43"  "5:44"  "5:45" 
## [1145] "5:46"  "5:47"  "5:48"  "5:50"  "5:51"  "5:52"  "5:53"  "5:54" 
## [1153] "5:55"  "5:56"  "5:58"  "5:59"  "6:00"  "6:01"  "6:02"  "6:03" 
## [1161] "6:04"  "6:05"  "6:06"  "6:07"  "6:08"  "6:09"  "6:10"  "6:11" 
## [1169] "6:12"  "6:13"  "6:14"  "6:15"  "6:16"  "6:17"  "6:18"  "6:19" 
## [1177] "6:20"  "6:21"  "6:22"  "6:23"  "6:24"  "6:25"  "6:26"  "6:27" 
## [1185] "6:28"  "6:29"  "6:30"  "6:31"  "6:32"  "6:33"  "6:34"  "6:35" 
## [1193] "6:36"  "6:37"  "6:38"  "6:39"  "6:40"  "6:41"  "6:42"  "6:43" 
## [1201] "6:44"  "6:45"  "6:46"  "6:47"  "6:48"  "6:49"  "6:50"  "6:51" 
## [1209] "6:52"  "6:53"  "6:54"  "6:55"  "6:56"  "6:57"  "6:58"  "6:59" 
## [1217] "7:00"  "7:02"  "7:03"  "7:04"  "7:05"  "7:06"  "7:07"  "7:08" 
## [1225] "7:09"  "7:10"  "7:11"  "7:12"  "7:13"  "7:15"  "7:16"  "7:17" 
## [1233] "7:18"  "7:19"  "7:20"  "7:21"  "7:22"  "7:23"  "7:24"  "7:25" 
## [1241] "7:26"  "7:27"  "7:28"  "7:29"  "7:30"  "7:31"  "7:32"  "7:33" 
## [1249] "7:34"  "7:35"  "7:36"  "7:37"  "7:38"  "7:39"  "7:40"  "7:41" 
## [1257] "7:42"  "7:43"  "7:44"  "7:45"  "7:46"  "7:47"  "7:48"  "7:49" 
## [1265] "7:50"  "7:51"  "7:52"  "7:53"  "7:54"  "7:55"  "7:56"  "7:57" 
## [1273] "7:58"  "7:59"  "8:00"  "8:01"  "8:02"  "8:03"  "8:04"  "8:05" 
## [1281] "8:06"  "8:07"  "8:08"  "8:09"  "8:10"  "8:11"  "8:12"  "8:13" 
## [1289] "8:14"  "8:15"  "8:16"  "8:17"  "8:18"  "8:19"  "8:20"  "8:21" 
## [1297] "8:22"  "8:23"  "8:24"  "8:25"  "8:26"  "8:27"  "8:28"  "8:29" 
## [1305] "8:30"  "8:31"  "8:32"  "8:33"  "8:34"  "8:35"  "8:36"  "8:37" 
## [1313] "8:38"  "8:40"  "8:41"  "8:42"  "8:43"  "8:44"  "8:45"  "8:46" 
## [1321] "8:47"  "8:48"  "8:49"  "8:50"  "8:51"  "8:52"  "8:53"  "8:54" 
## [1329] "8:55"  "8:56"  "8:57"  "8:58"  "8:59"  "9:00"  "9:01"  "9:02" 
## [1337] "9:03"  "9:04"  "9:05"  "9:06"  "9:07"  "9:08"  "9:09"  "9:10" 
## [1345] "9:11"  "9:12"  "9:13"  "9:14"  "9:15"  "9:16"  "9:17"  "9:18" 
## [1353] "9:19"  "9:20"  "9:21"  "9:22"  "9:23"  "9:24"  "9:25"  "9:26" 
## [1361] "9:27"  "9:28"  "9:29"  "9:30"  "9:31"  "9:32"  "9:33"  "9:34" 
## [1369] "9:35"  "9:36"  "9:37"  "9:38"  "9:39"  "9:40"  "9:41"  "9:42" 
## [1377] "9:43"  "9:44"  "9:45"  "9:46"  "9:47"  "9:48"  "9:49"  "9:50" 
## [1385] "9:51"  "9:52"  "9:53"  "9:54"  "9:55"  "9:56"  "9:57"  "9:58" 
## [1393] "9:59"
RoadFatalitiesfebruary2018$`Crash Type`<- as.factor(RoadFatalitiesfebruary2018$`Crash Type`)
levels(RoadFatalitiesfebruary2018$`Crash Type`)
## [1] "Multiple vehicle" "Pedestrian"       "Single vehicle"
RoadFatalitiesfebruary2018$`Bus 
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Bus 
Involvement`)
levels(RoadFatalitiesfebruary2018$`Bus 
Involvement`)
## [1] "-9"  "No"  "Yes"
RoadFatalitiesfebruary2018$`Rigid Truck 
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Rigid Truck 
Involvement`)
levels(RoadFatalitiesfebruary2018$`Rigid Truck 
Involvement`)
## [1] "-9"  "No"  "Yes"
RoadFatalitiesfebruary2018$`Articulated Truck 
Involvement`<- as.factor(RoadFatalitiesfebruary2018$`Articulated Truck 
Involvement`)
levels(RoadFatalitiesfebruary2018$`Articulated Truck 
Involvement`)
## [1] "-9"  "No"  "Yes"
RoadFatalitiesfebruary2018$`Speed Limit`<- as.factor(RoadFatalitiesfebruary2018$`Speed Limit`)
levels(RoadFatalitiesfebruary2018$`Speed Limit`)
##  [1] "-9"  "8"   "10"  "20"  "25"  "30"  "40"  "45"  "50"  "60"  "70" 
## [12] "75"  "80"  "90"  "100" "110" "130" "888" "900"
RoadFatalitiesfebruary2018$`Speed Limit`<- as.factor(RoadFatalitiesfebruary2018$`Speed Limit`)
levels(RoadFatalitiesfebruary2018$`Speed Limit`)
##  [1] "-9"  "8"   "10"  "20"  "25"  "30"  "40"  "45"  "50"  "60"  "70" 
## [12] "75"  "80"  "90"  "100" "110" "130" "888" "900"
RoadFatalitiesfebruary2018$`Road User`<- as.factor(RoadFatalitiesfebruary2018$`Road User`)
levels(RoadFatalitiesfebruary2018$`Road User`)
## [1] "9"                                      
## [2] "Bicyclist (includes pillion passengers)"
## [3] "Driver"                                 
## [4] "Motorcycle pillion passenger"           
## [5] "Motorcycle rider"                       
## [6] "Other"                                  
## [7] "Passenger"                              
## [8] "Pedestrian"
RoadFatalitiesfebruary2018$Gender<- as.factor(RoadFatalitiesfebruary2018$Gender)
levels(RoadFatalitiesfebruary2018$Gender)
## [1] "-9"          "Female"      "Male"        "Unspecified"
RoadFatalitiesfebruary2018$Age<- as.factor(RoadFatalitiesfebruary2018$Age)
levels(RoadFatalitiesfebruary2018$Age)
##   [1] "-9"  "0"   "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"  
##  [12] "10"  "11"  "12"  "13"  "14"  "15"  "16"  "17"  "18"  "19"  "20" 
##  [23] "21"  "22"  "23"  "24"  "25"  "26"  "27"  "28"  "29"  "30"  "31" 
##  [34] "32"  "33"  "34"  "35"  "36"  "37"  "38"  "39"  "40"  "41"  "42" 
##  [45] "43"  "44"  "45"  "46"  "47"  "48"  "49"  "50"  "51"  "52"  "53" 
##  [56] "54"  "55"  "56"  "57"  "58"  "59"  "60"  "61"  "62"  "63"  "64" 
##  [67] "65"  "66"  "67"  "68"  "69"  "70"  "71"  "72"  "73"  "74"  "75" 
##  [78] "76"  "77"  "78"  "79"  "80"  "81"  "82"  "83"  "84"  "85"  "86" 
##  [89] "87"  "88"  "89"  "90"  "91"  "92"  "93"  "94"  "95"  "96"  "97" 
## [100] "98"  "99"  "100" "101"
names(RoadFatalitiesfebruary2018)
##  [1] "Crash ID"                        "State"                          
##  [3] "Month"                           "Year"                           
##  [5] "Dayweek"                         "Time"                           
##  [7] "Crash Type"                      "Bus \nInvolvement"              
##  [9] "Rigid Truck \nInvolvement"       "Articulated Truck \nInvolvement"
## [11] "Speed Limit"                     "Road User"                      
## [13] "Gender"                          "Age"
##Changing Col names##

colnames(RoadFatalitiesfebruary2018)[which(names(RoadFatalitiesfebruary2018) == "Crash ID")] <- "Accident_ID"
colnames(RoadFatalitiesfebruary2018)
##  [1] "Accident_ID"                     "State"                          
##  [3] "Month"                           "Year"                           
##  [5] "Dayweek"                         "Time"                           
##  [7] "Crash Type"                      "Bus \nInvolvement"              
##  [9] "Rigid Truck \nInvolvement"       "Articulated Truck \nInvolvement"
## [11] "Speed Limit"                     "Road User"                      
## [13] "Gender"                          "Age"
#Checked the combined Vector list##

vect_list<- list(RoadFatalitiesfebruary2018$`Crash ID`,RoadFatalitiesfebruary2018$State,RoadFatalitiesfebruary2018$Month,RoadFatalitiesfebruary2018$Year,RoadFatalitiesfebruary2018$Dayweek,RoadFatalitiesfebruary2018$Time,RoadFatalitiesfebruary2018$`Crash Type`,RoadFatalitiesfebruary2018$`Crash Type`,RoadFatalitiesfebruary2018$`Bus 
Involvement`)
## Warning: Unknown or uninitialised column: 'Crash ID'.
str(vect_list)
## List of 9
##  $ : NULL
##  $ : Factor w/ 8 levels "ACT","NSW","NT",..: 4 8 2 7 4 2 8 6 2 2 ...
##  $ : Factor w/ 12 levels "April","August",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ : Factor w/ 30 levels "1989","1990",..: 30 30 30 30 30 30 30 30 30 30 ...
##  $ : Factor w/ 7 levels "Friday","Monday",..: 6 6 2 2 2 4 4 4 3 3 ...
##  $ : Factor w/ 1393 levels "-9","0:00","0:01",..: 654 505 257 882 296 315 257 796 60 1257 ...
##  $ : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
##  $ : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
##  $ : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...

Subsetting I

##This is a chunk to subset the first 10 rows and converting it into Matrix##

df<-RoadFatalitiesfebruary2018[c(1:10),c(1:14)]
df
## # A tibble: 10 x 14
##    Accident_ID State Month    Year  Dayweek  Time  `Crash Type`    
##    <chr>       <fct> <fct>    <fct> <fct>    <fct> <fct>           
##  1 201830006   QLD   February 2018  Tuesday  19:00 Single vehicle  
##  2 201850008   WA    February 2018  Tuesday  16:30 Single vehicle  
##  3 201810056   NSW   February 2018  Monday   12:20 Pedestrian      
##  4 201820024   VIC   February 2018  Monday   21:55 Pedestrian      
##  5 201830032   QLD   February 2018  Monday   13:00 Multiple vehicle
##  6 201810012   NSW   February 2018  Sunday   13:20 Single vehicle  
##  7 201850016   WA    February 2018  Sunday   12:20 Single vehicle  
##  8 201860008   TAS   February 2018  Sunday   20:28 Multiple vehicle
##  9 201810034   NSW   February 2018  Saturday 1:00  Single vehicle  
## 10 201810040   NSW   February 2018  Saturday 7:42  Multiple vehicle
## # ... with 7 more variables: `Bus \nInvolvement` <fct>, `Rigid Truck
## #   \nInvolvement` <fct>, `Articulated Truck \nInvolvement` <fct>, `Speed
## #   Limit` <fct>, `Road User` <fct>, Gender <fct>, Age <fct>
##Converting data to matrix format##

df_matrix<- matrix(df)
df_matrix
##       [,1]        
##  [1,] Character,10
##  [2,] factor,10   
##  [3,] factor,10   
##  [4,] factor,10   
##  [5,] factor,10   
##  [6,] factor,10   
##  [7,] factor,10   
##  [8,] factor,10   
##  [9,] factor,10   
## [10,] factor,10   
## [11,] factor,10   
## [12,] factor,10   
## [13,] factor,10   
## [14,] factor,10
##underlying structure of matrix##

attributes(df)
## $names
##  [1] "Accident_ID"                     "State"                          
##  [3] "Month"                           "Year"                           
##  [5] "Dayweek"                         "Time"                           
##  [7] "Crash Type"                      "Bus \nInvolvement"              
##  [9] "Rigid Truck \nInvolvement"       "Articulated Truck \nInvolvement"
## [11] "Speed Limit"                     "Road User"                      
## [13] "Gender"                          "Age"                            
## 
## $row.names
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $class
## [1] "tbl_df"     "tbl"        "data.frame"
##Checking Matrix using column bind##

mcbd <- cbind(df$Age,df$`Speed Limit`)
mcbd
##       [,1] [,2]
##  [1,]   19   13
##  [2,]   28   11
##  [3,]   87    9
##  [4,]   27    1
##  [5,]   64   10
##  [6,]   65   15
##  [7,]   96   10
##  [8,]   38    9
##  [9,]   36   15
## [10,]   19   15
##Checking Attributes##

attributes(mcbd)
## $dim
## [1] 10  2
##Checking Matrix using row bind##

mrbd <- rbind(df$Age, df$`Speed Limit`)
mrbd
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]   19   28   87   27   64   65   96   38   36    19
## [2,]   13   11    9    1   10   15   10    9   15    15
##Checking Attributes##

attributes(mrbd)
## $dim
## [1]  2 10
##Inspecting the Data Frame##

data_frame1 <- data.frame (RoadFatalitiesfebruary2018, stringsAsFactors = FALSE)

str(data_frame1)
## 'data.frame':    48782 obs. of  14 variables:
##  $ Accident_ID                   : chr  "201830006" "201850008" "201810056" "201820024" ...
##  $ State                         : Factor w/ 8 levels "ACT","NSW","NT",..: 4 8 2 7 4 2 8 6 2 2 ...
##  $ Month                         : Factor w/ 12 levels "April","August",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Year                          : Factor w/ 30 levels "1989","1990",..: 30 30 30 30 30 30 30 30 30 30 ...
##  $ Dayweek                       : Factor w/ 7 levels "Friday","Monday",..: 6 6 2 2 2 4 4 4 3 3 ...
##  $ Time                          : Factor w/ 1393 levels "-9","0:00","0:01",..: 654 505 257 882 296 315 257 796 60 1257 ...
##  $ Crash.Type                    : Factor w/ 3 levels "Multiple vehicle",..: 3 3 2 2 1 3 3 1 3 1 ...
##  $ Bus..Involvement              : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
##  $ Rigid.Truck..Involvement      : Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
##  $ Articulated.Truck..Involvement: Factor w/ 3 levels "-9","No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
##  $ Speed.Limit                   : Factor w/ 19 levels "-9","8","10",..: 13 11 9 1 10 15 10 9 15 15 ...
##  $ Road.User                     : Factor w/ 8 levels "9","Bicyclist (includes pillion passengers)",..: 3 3 8 8 5 3 3 5 3 3 ...
##  $ Gender                        : Factor w/ 4 levels "-9","Female",..: 3 3 3 3 3 3 2 3 3 3 ...
##  $ Age                           : Factor w/ 103 levels "-9","0","1","2",..: 19 28 87 27 64 65 96 38 36 19 ...

Subsetting II

##Subsetting the data frame into first and last variable##

RoadFatalitiesfebruary2018[, c(1,14)]
## # A tibble: 48,782 x 2
##    Accident_ID Age  
##    <chr>       <fct>
##  1 201830006   17   
##  2 201850008   26   
##  3 201810056   85   
##  4 201820024   25   
##  5 201830032   62   
##  6 201810012   63   
##  7 201850016   94   
##  8 201860008   36   
##  9 201810034   34   
## 10 201810040   17   
## # ... with 48,772 more rows
##Saving in as R object(.RData)

saveRDS(RoadFatalitiesfebruary2018, file = "my_data.RData")