Set working directory
setwd("/Users/BK/Documents/GitHub/KreisWk3")
Import data
Bridges <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1", header = FALSE)
write.table(Bridges, file = "Bridges.csv", sep = ",")
Set column names
names(Bridges) <- c("ID", "RIVER", "LOCATION", "ERECTED", "PURPOSE", "LENGTH", "LANES", "CLEAR-G", "T-OR-D", "MATERIAL", "SPAN", "REL-L", "TYPE")
Check if number of lanes is a factor, convert it to numeric
is.factor(Bridges$LANES)
## [1] TRUE
Bridges$LANES <- as.numeric(Bridges$LANES)
View initial dimensions and verify changes
dim(Bridges)
## [1] 108 13
head(Bridges)
## ID RIVER LOCATION ERECTED PURPOSE LENGTH LANES CLEAR-G T-OR-D MATERIAL
## 1 E1 M 3 1818 HIGHWAY ? 3 N THROUGH WOOD
## 2 E2 A 25 1819 HIGHWAY 1037 3 N THROUGH WOOD
## 3 E3 A 39 1829 AQUEDUCT ? 2 N THROUGH WOOD
## 4 E5 A 29 1837 HIGHWAY 1000 3 N THROUGH WOOD
## 5 E6 M 23 1838 HIGHWAY ? 3 N THROUGH WOOD
## 6 E7 A 27 1840 HIGHWAY 990 3 N THROUGH WOOD
## SPAN REL-L TYPE
## 1 SHORT S WOOD
## 2 SHORT S WOOD
## 3 ? S WOOD
## 4 SHORT S WOOD
## 5 ? S WOOD
## 6 MEDIUM S WOOD
Create data frame with selected columns
df <- data.frame(Bridges$ID, Bridges$RIVER, Bridges$LANES, Bridges$LENGTH)
Subset data to show bridges with 4 or more lanes and display observations
x <- subset(df, Bridges$LANES >= 4)
x
## Bridges.ID Bridges.RIVER Bridges.LANES Bridges.LENGTH
## 22 E22 A 4 1200
## 57 E53 A 4 965
## 67 E60 A 4 1000
## 71 E64 A 4 885
## 72 E66 A 4 2365
## 73 E70 A 4 860
## 74 E69 A 4 884
## 77 E72 M 4 2663
## 78 E67 M 4 1330
## 79 E75 A 4 2678
## 81 E71 A 4 860
## 83 E78 O 4 1365
## 84 E77 O 4 1450
## 85 E76 M 4 1500
## 86 E93 M 4 1690
## 87 E79 A 4 1800
## 88 E108 A 4 1060
## 95 E98 M 4 900
## 96 E81 M 4 2423
## 97 E80 M 4 1031
## 98 E88 A 4 2300
## 101 E83 M 5 1000
## 102 E86 A 4 980
## 103 E85 M 4 2213
## 104 E84 A 5 870
## 105 E91 O 5 3756
## 106 E90 M 5 950