DATA 607 - Data Acquisition and Management - Week1 Hands on Lab

Data Title: Pittsburgh Bridges Data Set

Source:

Creators:

Yoram Reich & Steven J. Fenves

Department of Civil Engineering and Engineering Design Research Center

Carnegie Mellon University

Pittsburgh, PA 15213

Donor:

Yoram Reich (yoram.reich ‘@’ cs.cmu.edu)

Load necessary R Libraries

library(plyr)
library(htmlTable)

Load bridges data into a Data Frame

bridges_df <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1",header=FALSE,na.strings = "?")
head(bridges_df)
##   V1 V2 V3   V4       V5   V6 V7 V8      V9  V10    V11 V12  V13
## 1 E1  M  3 1818  HIGHWAY   NA  2  N THROUGH WOOD  SHORT   S WOOD
## 2 E2  A 25 1819  HIGHWAY 1037  2  N THROUGH WOOD  SHORT   S WOOD
## 3 E3  A 39 1829 AQUEDUCT   NA  1  N THROUGH WOOD   <NA>   S WOOD
## 4 E5  A 29 1837  HIGHWAY 1000  2  N THROUGH WOOD  SHORT   S WOOD
## 5 E6  M 23 1838  HIGHWAY   NA  2  N THROUGH WOOD   <NA>   S WOOD
## 6 E7  A 27 1840  HIGHWAY  990  2  N THROUGH WOOD MEDIUM   S WOOD

Rename columns in the data frame

names(bridges_df)[] <- c("V1"="Identifier","V2"= "River","V3"="Location","V4"="Erected","V5"="Purpose","V6"="Length","V7"="Lanes","V8"="Clear-G","V9"="T-Or-D","V10"="Material","V11"="Span","V12"="Rel-L","V13"="Type")
htmlTable(head(bridges_df))
Identifier River Location Erected Purpose Length Lanes Clear-G T-Or-D Material Span Rel-L Type
1 E1 M 3 1818 HIGHWAY 2 N THROUGH WOOD SHORT S WOOD
2 E2 A 25 1819 HIGHWAY 1037 2 N THROUGH WOOD SHORT S WOOD
3 E3 A 39 1829 AQUEDUCT 1 N THROUGH WOOD S WOOD
4 E5 A 29 1837 HIGHWAY 1000 2 N THROUGH WOOD SHORT S WOOD
5 E6 M 23 1838 HIGHWAY 2 N THROUGH WOOD S WOOD
6 E7 A 27 1840 HIGHWAY 990 2 N THROUGH WOOD MEDIUM S WOOD

Summerize the bridges data

summary(bridges_df)
##    Identifier  River     Location        Erected         Purpose  
##  E1     :  1   A:49   Min.   : 1.00   Min.   :1818   AQUEDUCT: 4  
##  E10    :  1   M:41   1st Qu.:15.50   1st Qu.:1884   HIGHWAY :71  
##  E100   :  1   O:15   Median :27.00   Median :1903   RR      :32  
##  E101   :  1   Y: 3   Mean   :25.98   Mean   :1905   WALK    : 1  
##  E102   :  1          3rd Qu.:37.50   3rd Qu.:1928                
##  E103   :  1          Max.   :52.00   Max.   :1986                
##  (Other):102          NA's   :1                                   
##      Length         Lanes      Clear-G       T-Or-D    Material 
##  Min.   : 804   Min.   :1.00   G   :80   DECK   :15   IRON :11  
##  1st Qu.:1000   1st Qu.:2.00   N   :26   THROUGH:87   STEEL:79  
##  Median :1300   Median :2.00   NA's: 2   NA's   : 6   WOOD :16  
##  Mean   :1567   Mean   :2.63                          NA's : 2  
##  3rd Qu.:2000   3rd Qu.:4.00                                    
##  Max.   :4558   Max.   :6.00                                    
##  NA's   :27     NA's   :16                                      
##      Span     Rel-L          Type   
##  LONG  :30   F   :58   SIMPLE-T:44  
##  MEDIUM:53   S   :30   WOOD    :16  
##  SHORT : 9   S-F :15   ARCH    :13  
##  NA's  :16   NA's: 5   CANTILEV:11  
##                        SUSPEN  :11  
##                        (Other) :11  
##                        NA's    : 2

Distribution of bridges based on the year of erection/build

hist(bridges_df$Erected)

A pie chart for bridge purposes

Purposes <- table(bridges_df$Purpose)
PurposeRatios <- Purposes/sum(Purposes)
PurposeLabels <- c("Aqueduct","Highway","Railroad","Walking")
pie(PurposeRatios, labels = PurposeLabels, main = "Purpose of Bridges Built")

Railroad bridges - a histogram of the dates that they were installed:

RRBridges <- subset(bridges_df, bridges_df$Purpose == "RR")
hist(RRBridges$Erected)

Histogram of highway bridge dates of installation.

HighwayBridges <- subset(bridges_df, bridges_df$Purpose == "HIGHWAY")
hist(HighwayBridges$Erected)