Get web data
library(RCurl)
v = readLines("http://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version2") #start from weblink
summary(v) #character vector of length 108
## Length Class Mode
## 108 character character
Convert webpage to dataframe
v<-strsplit(v,",") #split into list
df<-data.frame(matrix(unlist(v), nrow=108, byrow=T)) #unlist and convert to dataframe
tail(df)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## 103 E85 M 9 MODERN HIGHWAY LONG 4 G DECK STEEL LONG F CONT-T
## 104 E84 A 24 MODERN HIGHWAY SHORT 6 G THROUGH STEEL MEDIUM F ARCH
## 105 E91 O 44 MODERN HIGHWAY LONG 6 G THROUGH STEEL LONG F ARCH
## 106 E90 M 7 MODERN HIGHWAY SHORT 6 G THROUGH STEEL LONG F ARCH
## 107 E100 O 43 MODERN HIGHWAY ? ? G ? ? ? F ?
## 108 E109 A 28 MODERN HIGHWAY ? ? G ? ? ? F ?
Subset columns of interest and assign descriptive names
br<-subset(df[,c(1:3,7:11)])
colnames(br)<-c("id","river","location","lanes","clear_g","t_d","material","span")
head(br)
## id river location lanes clear_g t_d material span
## 1 E1 M 3 2 N THROUGH WOOD SHORT
## 2 E2 A 25 2 N THROUGH WOOD SHORT
## 3 E3 A 39 1 N THROUGH WOOD ?
## 4 E5 A 29 2 N THROUGH WOOD SHORT
## 5 E6 M 23 2 N THROUGH WOOD ?
## 6 E7 A 27 2 N THROUGH WOOD MEDIUM
Some basic data cleasing
br[br == '?']<-NA #set missing values to NA
levels(br$span)[levels(br$span)=="?"]<-"missing" #rename a factor level
str(br$span)
## Factor w/ 4 levels "missing","LONG",..: 4 4 NA 4 NA 3 4 4 NA 3 ...
br$lanes<-as.numeric(br$lanes) #prepare numbers for analysis
summary(br$lanes)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.000 3.000 3.000 3.293 4.000 5.000 16
Get rows of interest: Bridges with long spans and many lanes
bigbr <- subset(br, br$span == "LONG" & br$lanes >= 4)
bigbr
## id river location lanes clear_g t_d material span
## 67 E60 A 24 4 G THROUGH STEEL LONG
## 78 E67 M 1 4 G THROUGH STEEL LONG
## 83 E78 O 40 4 G THROUGH STEEL LONG
## 84 E77 O 42 4 N THROUGH STEEL LONG
## 85 E76 M 6 4 G THROUGH STEEL LONG
## 86 E93 M 11 4 N DECK STEEL LONG
## 96 E81 M 14 4 G DECK STEEL LONG
## 97 E80 M 19 4 G THROUGH STEEL LONG
## 98 E88 A 37 4 N DECK STEEL LONG
## 101 E83 M 1 5 G THROUGH STEEL LONG
## 103 E85 M 9 4 G DECK STEEL LONG
## 105 E91 O 44 5 G THROUGH STEEL LONG
## 106 E90 M 7 5 G THROUGH STEEL LONG