library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
## Warning: package 'readr' was built under R version 3.3.2
library(tidyr)
veh<- read.csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/VEH_AUX.csv", stringsAsFactors=FALSE)
dim(veh)
## [1] 48923 16
names(veh)
## [1] "YEAR" "ST_CASE" "VEH_NO" "A_BODY" "A_IMP1" "A_IMP2"
## [7] "A_VROLL" "A_LIC_S" "A_LIC_C" "A_CDL_S" "A_MC_L_S" "A_SPVEH"
## [13] "A_SBUS" "A_MOD_YR" "A_DRDIS" "A_DRDRO"
summary(veh)
## YEAR ST_CASE VEH_NO A_BODY
## Min. :2015 Min. : 10001 Min. : 1.000 Min. :1.000
## 1st Qu.:2015 1st Qu.:122161 1st Qu.: 1.000 1st Qu.:1.000
## Median :2015 Median :270341 Median : 1.000 Median :2.000
## Mean :2015 Mean :276903 Mean : 1.504 Mean :2.928
## 3rd Qu.:2015 3rd Qu.:420660 3rd Qu.: 2.000 3rd Qu.:4.000
## Max. :2015 Max. :560130 Max. :58.000 Max. :9.000
## A_IMP1 A_IMP2 A_VROLL A_LIC_S
## Min. :1.000 Min. :0 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:0 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :0 Median :2.000 Median :1.000
## Mean :2.749 Mean :0 Mean :1.837 Mean :1.482
## 3rd Qu.:3.000 3rd Qu.:0 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :7.000 Max. :0 Max. :2.000 Max. :4.000
## A_LIC_C A_CDL_S A_MC_L_S A_SPVEH
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:4.000 1st Qu.:2.00
## Median :1.000 Median :2.000 Median :4.000 Median :2.00
## Mean :1.206 Mean :1.911 Mean :3.719 Mean :1.82
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:2.00
## Max. :3.000 Max. :3.000 Max. :4.000 Max. :2.00
## A_SBUS A_MOD_YR A_DRDIS A_DRDRO
## Min. :1.000 Min. :1923 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2001 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :2005 Median :2.000 Median :2.000
## Mean :2.996 Mean :2193 Mean :1.968 Mean :1.985
## 3rd Qu.:3.000 3rd Qu.:2010 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :3.000 Max. :9999 Max. :2.000 Max. :2.000
per<- read.csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/PER_AUX.csv", stringsAsFactors=FALSE)
dim(per)
## [1] 80587 22
names(per)
## [1] "A_AGE1" "A_AGE2" "A_AGE3" "A_AGE4" "A_AGE5" "A_AGE6"
## [7] "A_AGE7" "A_AGE8" "A_AGE9" "ST_CASE" "VEH_NO" "PER_NO"
## [13] "YEAR" "A_PTYPE" "A_REST" "A_ALCTES" "A_HISP" "A_RCAT"
## [19] "A_HRACE" "A_EJECT" "A_PERINJ" "A_LOC"
summary(per)
## A_AGE1 A_AGE2 A_AGE3 A_AGE4
## Min. :1.00 Min. :1.000 Min. : 1.000 Min. :1.00
## 1st Qu.:2.00 1st Qu.:3.000 1st Qu.: 6.000 1st Qu.:3.00
## Median :3.00 Median :5.000 Median : 8.000 Median :5.00
## Mean :2.91 Mean :3.997 Mean : 7.754 Mean :4.54
## 3rd Qu.:4.00 3rd Qu.:5.000 3rd Qu.:10.000 3rd Qu.:6.00
## Max. :5.00 Max. :6.000 Max. :13.000 Max. :8.00
## A_AGE5 A_AGE6 A_AGE7 A_AGE8
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. :1.000
## 1st Qu.: 3.000 1st Qu.: 3.000 1st Qu.: 5.000 1st Qu.:2.000
## Median : 5.000 Median : 5.000 Median : 7.000 Median :3.000
## Mean : 4.876 Mean : 4.885 Mean : 6.802 Mean :3.462
## 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.: 9.000 3rd Qu.:5.000
## Max. :10.000 Max. :10.000 Max. :12.000 Max. :7.000
## A_AGE9 ST_CASE VEH_NO PER_NO
## Min. :1.000 Min. : 10001 Min. : 0.000 Min. : 1.000
## 1st Qu.:2.000 1st Qu.:121977 1st Qu.: 1.000 1st Qu.: 1.000
## Median :2.000 Median :270282 Median : 1.000 Median : 1.000
## Mean :1.937 Mean :275607 Mean : 1.389 Mean : 1.629
## 3rd Qu.:2.000 3rd Qu.:420645 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :3.000 Max. :560130 Max. :58.000 Max. :51.000
## YEAR A_PTYPE A_REST A_ALCTES
## Min. :2015 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:2015 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:2.000
## Median :2015 Median :1.00 Median :1.000 Median :3.000
## Mean :2015 Mean :1.51 Mean :1.575 Mean :2.566
## 3rd Qu.:2015 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :2015 Max. :5.00 Max. :3.000 Max. :5.000
## A_HISP A_RCAT A_HRACE A_EJECT
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000
## Median :0.0000 Median :0.0000 Median :0.000 Median :1.000
## Mean :0.6326 Mean :0.9696 Mean :1.426 Mean :1.101
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :3.0000 Max. :8.0000 Max. :9.000 Max. :3.000
## A_PERINJ A_LOC
## Min. :1.000 Min. :1.00
## 1st Qu.:1.000 1st Qu.:1.00
## Median :6.000 Median :1.00
## Mean :3.823 Mean :1.17
## 3rd Qu.:6.000 3rd Qu.:1.00
## Max. :6.000 Max. :5.00
acc<- read.csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/ACC_AUX.csv", stringsAsFactors=FALSE)
dim(acc)
## [1] 32166 38
names(acc)
## [1] "YEAR" "STATE" "ST_CASE" "COUNTY" "FATALS"
## [6] "A_CRAINJ" "A_REGION" "A_RU" "A_INTER" "A_RELRD"
## [11] "A_INTSEC" "A_ROADFC" "A_JUNC" "A_MANCOL" "A_TOD"
## [16] "A_DOW" "A_CT" "A_LT" "A_MC" "A_SPCRA"
## [21] "A_PED" "A_PED_F" "A_PEDAL" "A_PEDAL_F" "A_ROLL"
## [26] "A_POLPUR" "A_POSBAC" "A_D15_19" "A_D16_19" "A_D15_20"
## [31] "A_D16_20" "A_D65PLS" "A_D21_24" "A_D16_24" "A_RD"
## [36] "A_HR" "A_DIST" "A_DROWSY"
summary(acc)
## YEAR STATE ST_CASE COUNTY
## Min. :2015 Min. : 1.0 Min. : 10001 Min. : 1.00
## 1st Qu.:2015 1st Qu.:12.0 1st Qu.:122183 1st Qu.: 31.00
## Median :2015 Median :28.0 Median :280003 Median : 71.00
## Mean :2015 Mean :27.6 Mean :276730 Mean : 91.23
## 3rd Qu.:2015 3rd Qu.:42.0 3rd Qu.:420566 3rd Qu.:115.00
## Max. :2015 Max. :56.0 Max. :560130 Max. :999.00
## FATALS A_CRAINJ A_REGION A_RU
## Min. : 1.000 Min. :1 Min. : 1.000 Min. :1.000
## 1st Qu.: 1.000 1st Qu.:1 1st Qu.: 4.000 1st Qu.:1.000
## Median : 1.000 Median :1 Median : 5.000 Median :2.000
## Mean : 1.091 Mean :1 Mean : 5.333 Mean :1.601
## 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 7.000 3rd Qu.:2.000
## Max. :10.000 Max. :1 Max. :10.000 Max. :3.000
## A_INTER A_RELRD A_INTSEC A_ROADFC
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:3.000
## Median :2.000 Median :1.000 Median :2.00 Median :4.000
## Mean :1.961 Mean :2.136 Mean :1.76 Mean :3.927
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:2.00 3rd Qu.:5.000
## Max. :3.000 Max. :6.000 Max. :3.00 Max. :7.000
## A_JUNC A_MANCOL A_TOD A_DOW
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :1.000 Median :2.000 Median :1.000
## Mean :1.835 Mean :1.948 Mean :1.526 Mean :1.412
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :7.000 Max. :3.000 Max. :3.000
## A_CT A_LT A_MC A_SPCRA
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.487 Mean :1.888 Mean :1.848 Mean :1.734
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :3.000 Max. :2.000 Max. :2.000 Max. :2.000
## A_PED A_PED_F A_PEDAL A_PEDAL_F
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.834 Mean :1.835 Mean :1.975 Mean :1.975
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## A_ROLL A_POLPUR A_POSBAC A_D15_19
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :3.000 Median :2.000
## Mean :1.758 Mean :1.991 Mean :2.308 Mean :1.904
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :3.000 Max. :2.000
## A_D16_19 A_D15_20 A_D16_20 A_D65PLS
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.907 Mean :1.871 Mean :1.874 Mean :1.812
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## A_D21_24 A_D16_24 A_RD A_HR
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :1.000 Median :2.000
## Mean :1.852 Mean :1.734 Mean :1.475 Mean :1.947
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## A_DIST A_DROWSY
## Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000
## Mean :1.901 Mean :1.977
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000
accident <- read_csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/accident.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## TWAY_ID = col_character(),
## TWAY_ID2 = col_character(),
## LATITUDE = col_double(),
## LONGITUD = col_double(),
## RAIL = col_character()
## )
## See spec(...) for full column specifications.
person <- read_csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/person.csv")
## Parsed with column specification:
## cols(
## .default = col_integer()
## )
## See spec(...) for full column specifications.
vehicle <- read_csv("C:/Users/user/Desktop/UFM/4to semestre/Data2/proyectos/intro_ds2/trafico/vehicle.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## VIN = col_character(),
## VIN_1 = col_character(),
## VIN_2 = col_character(),
## VIN_3 = col_character(),
## VIN_4 = col_character(),
## VIN_5 = col_character(),
## VIN_6 = col_character(),
## VIN_7 = col_character(),
## VIN_8 = col_character(),
## VIN_9 = col_character(),
## VIN_10 = col_character(),
## VIN_11 = col_character(),
## VIN_12 = col_character(),
## MCARR_I2 = col_character(),
## MCARR_ID = col_character()
## )
## See spec(...) for full column specifications.
names(vehicle)
## [1] "STATE" "ST_CASE" "VEH_NO" "VE_FORMS" "NUMOCCS" "DAY"
## [7] "MONTH" "HOUR" "MINUTE" "HARM_EV" "MAN_COLL" "UNITTYPE"
## [13] "HIT_RUN" "REG_STAT" "OWNER" "MAKE" "MODEL" "MAK_MOD"
## [19] "BODY_TYP" "MOD_YEAR" "VIN" "VIN_1" "VIN_2" "VIN_3"
## [25] "VIN_4" "VIN_5" "VIN_6" "VIN_7" "VIN_8" "VIN_9"
## [31] "VIN_10" "VIN_11" "VIN_12" "TOW_VEH" "J_KNIFE" "MCARR_I1"
## [37] "MCARR_I2" "MCARR_ID" "GVWR" "V_CONFIG" "CARGO_BT" "HAZ_INV"
## [43] "HAZ_PLAC" "HAZ_ID" "HAZ_CNO" "HAZ_REL" "BUS_USE" "SPEC_USE"
## [49] "EMER_USE" "TRAV_SP" "UNDERIDE" "ROLLOVER" "ROLINLOC" "IMPACT1"
## [55] "DEFORMED" "TOWED" "M_HARM" "VEH_SC1" "VEH_SC2" "FIRE_EXP"
## [61] "DR_PRES" "L_STATE" "DR_ZIP" "L_STATUS" "L_TYPE" "CDL_STAT"
## [67] "L_ENDORS" "L_COMPL" "L_RESTRI" "DR_HGT" "DR_WGT" "PREV_ACC"
## [73] "PREV_SUS" "PREV_DWI" "PREV_SPD" "PREV_OTH" "FIRST_MO" "FIRST_YR"
## [79] "LAST_MO" "LAST_YR" "SPEEDREL" "DR_SF1" "DR_SF2" "DR_SF3"
## [85] "DR_SF4" "VTRAFWAY" "VNUM_LAN" "VSPD_LIM" "VALIGN" "VPROFILE"
## [91] "VPAVETYP" "VSURCOND" "VTRAFCON" "VTCONT_F" "P_CRASH1" "P_CRASH2"
## [97] "P_CRASH3" "PCRASH4" "PCRASH5" "ACC_TYPE" "DEATHS" "DR_DRINK"
names(person)
## [1] "STATE" "ST_CASE" "VE_FORMS" "VEH_NO" "PER_NO"
## [6] "STR_VEH" "COUNTY" "DAY" "MONTH" "HOUR"
## [11] "MINUTE" "RUR_URB" "FUNC_SYS" "HARM_EV" "MAN_COLL"
## [16] "SCH_BUS" "MAKE" "MAK_MOD" "BODY_TYP" "MOD_YEAR"
## [21] "TOW_VEH" "SPEC_USE" "EMER_USE" "ROLLOVER" "IMPACT1"
## [26] "FIRE_EXP" "AGE" "SEX" "PER_TYP" "INJ_SEV"
## [31] "SEAT_POS" "REST_USE" "REST_MIS" "AIR_BAG" "EJECTION"
## [36] "EJ_PATH" "EXTRICAT" "DRINKING" "ALC_DET" "ALC_STATUS"
## [41] "ATST_TYP" "ALC_RES" "DRUGS" "DRUG_DET" "DSTATUS"
## [46] "DRUGTST1" "DRUGTST2" "DRUGTST3" "DRUGRES1" "DRUGRES2"
## [51] "DRUGRES3" "HOSPITAL" "DOA" "DEATH_DA" "DEATH_MO"
## [56] "DEATH_YR" "DEATH_HR" "DEATH_MN" "DEATH_TM" "LAG_HRS"
## [61] "LAG_MINS" "P_SF1" "P_SF2" "P_SF3" "WORK_INJ"
## [66] "HISPANIC" "RACE" "LOCATION"
names(accident)
## [1] "STATE" "ST_CASE" "VE_TOTAL" "VE_FORMS" "PVH_INVL"
## [6] "PEDS" "PERNOTMVIT" "PERMVIT" "PERSONS" "COUNTY"
## [11] "CITY" "DAY" "MONTH" "YEAR" "DAY_WEEK"
## [16] "HOUR" "MINUTE" "NHS" "RUR_URB" "FUNC_SYS"
## [21] "RD_OWNER" "ROUTE" "TWAY_ID" "TWAY_ID2" "MILEPT"
## [26] "LATITUDE" "LONGITUD" "SP_JUR" "HARM_EV" "MAN_COLL"
## [31] "RELJCT1" "RELJCT2" "TYP_INT" "WRK_ZONE" "REL_ROAD"
## [36] "LGT_COND" "WEATHER1" "WEATHER2" "WEATHER" "SCH_BUS"
## [41] "RAIL" "NOT_HOUR" "NOT_MIN" "ARR_HOUR" "ARR_MIN"
## [46] "HOSP_HR" "HOSP_MN" "CF1" "CF2" "CF3"
## [51] "FATALS" "DRUNK_DR"
Unir Datasets
data2 <- left_join(acc,veh,by = "ST_CASE")
data3 <- left_join(data2, per,"ST_CASE")
names(data3)
## [1] "YEAR.x" "STATE" "ST_CASE" "COUNTY" "FATALS"
## [6] "A_CRAINJ" "A_REGION" "A_RU" "A_INTER" "A_RELRD"
## [11] "A_INTSEC" "A_ROADFC" "A_JUNC" "A_MANCOL" "A_TOD"
## [16] "A_DOW" "A_CT" "A_LT" "A_MC" "A_SPCRA"
## [21] "A_PED" "A_PED_F" "A_PEDAL" "A_PEDAL_F" "A_ROLL"
## [26] "A_POLPUR" "A_POSBAC" "A_D15_19" "A_D16_19" "A_D15_20"
## [31] "A_D16_20" "A_D65PLS" "A_D21_24" "A_D16_24" "A_RD"
## [36] "A_HR" "A_DIST" "A_DROWSY" "YEAR.y" "VEH_NO.x"
## [41] "A_BODY" "A_IMP1" "A_IMP2" "A_VROLL" "A_LIC_S"
## [46] "A_LIC_C" "A_CDL_S" "A_MC_L_S" "A_SPVEH" "A_SBUS"
## [51] "A_MOD_YR" "A_DRDIS" "A_DRDRO" "A_AGE1" "A_AGE2"
## [56] "A_AGE3" "A_AGE4" "A_AGE5" "A_AGE6" "A_AGE7"
## [61] "A_AGE8" "A_AGE9" "VEH_NO.y" "PER_NO" "YEAR"
## [66] "A_PTYPE" "A_REST" "A_ALCTES" "A_HISP" "A_RCAT"
## [71] "A_HRACE" "A_EJECT" "A_PERINJ" "A_LOC"
Reporte 1
a <- filter(acc, YEAR == 2015 )
b <- select(a, FATALS, STATE)
d <- count(b, STATE)
c <- group_by(d, STATE)
Reporte 2
e<- filter(data3, YEAR == 2015)
f <- select(e, A_DOW, STATE, PER_NO)
count(f, PER_NO, A_DOW)
## Source: local data frame [73 x 3]
## Groups: PER_NO [?]
##
## PER_NO A_DOW n
## <int> <int> <int>
## 1 1 1 66972
## 2 1 2 37595
## 3 1 3 86
## 4 2 1 17130
## 5 2 2 13148
## 6 2 3 16
## 7 3 1 5627
## 8 3 2 4931
## 9 3 3 4
## 10 4 1 2549
## # ... with 63 more rows
Reporte 3
g <- filter(data3, YEAR == 2015)
h <- select(g, A_DOW, STATE)
Preguntas
1. ¿Cuántas fatalidades fueron causadas por alta velocidad (speeding)?
m<- select(vehicle, SPEEDREL, DEATHS)
n <- count(m, SPEEDREL)
2. Indicar por raza la cantidad de fatalidades en el 2015
i <- filter(person, RACE, DEATH_HR)
count(i, RACE, DEATH_HR)
## Source: local data frame [366 x 3]
## Groups: RACE [?]
##
## RACE DEATH_HR n
## <int> <int> <int>
## 1 1 1 899
## 2 1 2 840
## 3 1 3 675
## 4 1 4 581
## 5 1 5 608
## 6 1 6 804
## 7 1 7 807
## 8 1 8 738
## 9 1 9 703
## 10 1 10 751
## # ... with 356 more rows