Load data as data frame, inspect

bridges <- read.csv(url("https://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1"))
bridges <- as.data.frame(bridges)
names(bridges) = c("ID","River","State","Year_Built","Purpose","Length","Lanes","Clear","Deck_Type","Material","Span","Rel","Type")
summary(bridges)
##        ID      River      State      Year_Built       Purpose  
##  E10    :  1   A:49   28     : 5   Min.   :1819   AQUEDUCT: 4  
##  E100   :  1   M:40   39     : 5   1st Qu.:1884   HIGHWAY :70  
##  E101   :  1   O:15   25     : 4   Median :1903   RR      :32  
##  E102   :  1   Y: 3   27     : 4   Mean   :1906   WALK    : 1  
##  E103   :  1          29     : 4   3rd Qu.:1928                
##  E105   :  1          1      : 3   Max.   :1986                
##  (Other):101          (Other):82                               
##      Length   Lanes  Clear    Deck_Type   Material      Span     Rel    
##  ?      :26   ?:16   ?: 2   ?      : 6   ?    : 2   ?     :16   ?  : 5  
##  1000   :10   1: 4   G:80   DECK   :15   IRON :11   LONG  :30   F  :58  
##  1200   : 5   2:60   N:25   THROUGH:86   STEEL:79   MEDIUM:53   S  :29  
##  1500   : 2   4:23                       WOOD :15   SHORT : 8   S-F:15  
##  2000   : 2   6: 4                                                      
##  2300   : 2                                                             
##  (Other):60                                                             
##        Type   
##  SIMPLE-T:44  
##  WOOD    :15  
##  ARCH    :13  
##  CANTILEV:11  
##  SUSPEN  :11  
##  CONT-T  :10  
##  (Other) : 3
head(bridges)
##   ID River State Year_Built  Purpose Length Lanes Clear Deck_Type Material
## 1 E2     A    25       1819  HIGHWAY   1037     2     N   THROUGH     WOOD
## 2 E3     A    39       1829 AQUEDUCT      ?     1     N   THROUGH     WOOD
## 3 E5     A    29       1837  HIGHWAY   1000     2     N   THROUGH     WOOD
## 4 E6     M    23       1838  HIGHWAY      ?     2     N   THROUGH     WOOD
## 5 E7     A    27       1840  HIGHWAY    990     2     N   THROUGH     WOOD
## 6 E8     A    28       1844 AQUEDUCT   1000     1     N   THROUGH     IRON
##     Span Rel   Type
## 1  SHORT   S   WOOD
## 2      ?   S   WOOD
## 3  SHORT   S   WOOD
## 4      ?   S   WOOD
## 5 MEDIUM   S   WOOD
## 6  SHORT   S SUSPEN

Subset data usign subset function to select certain columns

subBridges <- subset(bridges, select=c(Lanes, State, Purpose, Material, Year_Built, Length))
subLongBridges <- subset(bridges, select=c(Lanes, State, Purpose, Material, Year_Built, Length), as.numeric(as.character(Length)) > 100) 
## Warning in eval(e, x, parent.frame()): NAs introduced by coercion
subNEBridges <- subset(subBridges, State=="6"|State=="18"|State=="20"|State=="28"|State=="44"|State=="38")

Graphic exploration, using tidyverse

By storing a basic aesthetic plot as an object, several variations can be plotted quickly with + [different geoms]

lengthPlot <- 
  ggplot(subLongBridges, aes(x=as.numeric(as.character(Year_Built)),y=as.numeric(as.character(Length))))+
  labs(x="Year Built", y="Bridge Length", caption="Lab 1 Data 607, Alice Friedman")

lengthPlot+geom_jitter(alpha=0.5)

lengthPlot+geom_jitter(aes(col=Material))

lengthPlot+geom_jitter(aes(col=Purpose))

lengthPlot+geom_jitter(aes(col=Lanes))

By creating a function, the same plot type can be used on different tables.

#Function requires that all tables have column names Year_Built and Length
plot_bridges <- function(yourTable, tableTitle){
p <- 
  ggplot(data=yourTable, aes(x=as.numeric(as.character(Year_Built)),y=as.numeric(as.character(Length))))+
  labs(x="Year Built", y="Bridge Length", caption="Lab 1 Data 607, Alice Friedman", title=tableTitle)
p + geom_jitter(alpha=0.5, aes(col=Material))
}

plot_bridges(subNEBridges,"Subset of Data for New England Bridges")
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 2 rows containing missing values (geom_point).

plot_bridges(subLongBridges,"Subset of Bridges Longer than 1000'")

A more useful version of this would take the column names and labels as arguments, as well!