DATA 607 - Data Acquisition and Management - Week1 Hands on Lab
Data Title: Pittsburgh Bridges Data Set
Source:
Creators:
Yoram Reich & Steven J. Fenves
Department of Civil Engineering and Engineering Design Research Center
Carnegie Mellon University
Pittsburgh, PA 15213
Donor:
Yoram Reich (yoram.reich ‘@’ cs.cmu.edu)
Load necessary R Libraries
library(plyr)
library(htmlTable)
Load bridges data into a Data Frame
bridges_df <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1",header=FALSE,na.strings = "?")
head(bridges_df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## 1 E1 M 3 1818 HIGHWAY NA 2 N THROUGH WOOD SHORT S WOOD
## 2 E2 A 25 1819 HIGHWAY 1037 2 N THROUGH WOOD SHORT S WOOD
## 3 E3 A 39 1829 AQUEDUCT NA 1 N THROUGH WOOD <NA> S WOOD
## 4 E5 A 29 1837 HIGHWAY 1000 2 N THROUGH WOOD SHORT S WOOD
## 5 E6 M 23 1838 HIGHWAY NA 2 N THROUGH WOOD <NA> S WOOD
## 6 E7 A 27 1840 HIGHWAY 990 2 N THROUGH WOOD MEDIUM S WOOD
Rename columns in the data frame
names(bridges_df)[] <- c("V1"="Identifier","V2"= "River","V3"="Location","V4"="Erected","V5"="Purpose","V6"="Length","V7"="Lanes","V8"="Clear-G","V9"="T-Or-D","V10"="Material","V11"="Span","V12"="Rel-L","V13"="Type")
htmlTable(head(bridges_df))
|
Identifier
|
River
|
Location
|
Erected
|
Purpose
|
Length
|
Lanes
|
Clear-G
|
T-Or-D
|
Material
|
Span
|
Rel-L
|
Type
|
1
|
E1
|
M
|
3
|
1818
|
HIGHWAY
|
|
2
|
N
|
THROUGH
|
WOOD
|
SHORT
|
S
|
WOOD
|
2
|
E2
|
A
|
25
|
1819
|
HIGHWAY
|
1037
|
2
|
N
|
THROUGH
|
WOOD
|
SHORT
|
S
|
WOOD
|
3
|
E3
|
A
|
39
|
1829
|
AQUEDUCT
|
|
1
|
N
|
THROUGH
|
WOOD
|
|
S
|
WOOD
|
4
|
E5
|
A
|
29
|
1837
|
HIGHWAY
|
1000
|
2
|
N
|
THROUGH
|
WOOD
|
SHORT
|
S
|
WOOD
|
5
|
E6
|
M
|
23
|
1838
|
HIGHWAY
|
|
2
|
N
|
THROUGH
|
WOOD
|
|
S
|
WOOD
|
6
|
E7
|
A
|
27
|
1840
|
HIGHWAY
|
990
|
2
|
N
|
THROUGH
|
WOOD
|
MEDIUM
|
S
|
WOOD
|
Summerize the bridges data
summary(bridges_df)
## Identifier River Location Erected Purpose
## E1 : 1 A:49 Min. : 1.00 Min. :1818 AQUEDUCT: 4
## E10 : 1 M:41 1st Qu.:15.50 1st Qu.:1884 HIGHWAY :71
## E100 : 1 O:15 Median :27.00 Median :1903 RR :32
## E101 : 1 Y: 3 Mean :25.98 Mean :1905 WALK : 1
## E102 : 1 3rd Qu.:37.50 3rd Qu.:1928
## E103 : 1 Max. :52.00 Max. :1986
## (Other):102 NA's :1
## Length Lanes Clear-G T-Or-D Material
## Min. : 804 Min. :1.00 G :80 DECK :15 IRON :11
## 1st Qu.:1000 1st Qu.:2.00 N :26 THROUGH:87 STEEL:79
## Median :1300 Median :2.00 NA's: 2 NA's : 6 WOOD :16
## Mean :1567 Mean :2.63 NA's : 2
## 3rd Qu.:2000 3rd Qu.:4.00
## Max. :4558 Max. :6.00
## NA's :27 NA's :16
## Span Rel-L Type
## LONG :30 F :58 SIMPLE-T:44
## MEDIUM:53 S :30 WOOD :16
## SHORT : 9 S-F :15 ARCH :13
## NA's :16 NA's: 5 CANTILEV:11
## SUSPEN :11
## (Other) :11
## NA's : 2
Distribution of bridges based on the year of erection/build
hist(bridges_df$Erected)

A pie chart for bridge purposes
Purposes <- table(bridges_df$Purpose)
PurposeRatios <- Purposes/sum(Purposes)
PurposeLabels <- c("Aqueduct","Highway","Railroad","Walking")
pie(PurposeRatios, labels = PurposeLabels, main = "Purpose of Bridges Built")

Railroad bridges - a histogram of the dates that they were installed:
RRBridges <- subset(bridges_df, bridges_df$Purpose == "RR")
hist(RRBridges$Erected)

Histogram of highway bridge dates of installation.
HighwayBridges <- subset(bridges_df, bridges_df$Purpose == "HIGHWAY")
hist(HighwayBridges$Erected)
