choose one dataset, then study the data and its associated description of the data (i.e. “data dictionary”). You should take the data, and create an R data frame with a subset of the columns (and if you like rows) in the dataset.
# install necessary packages
install.packages("devtools", dependencies = TRUE, repos = "http://lib.stat.cmu.edu/R/CRAN/")
## Installing package into 'C:/Users/Wisdom Roland/Documents/R/win-library/3.2'
## (as 'lib' is unspecified)
## Warning: dependency 'BiocInstaller' is not available
## package 'devtools' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Wisdom Roland\AppData\Local\Temp\RtmpWcHn6r\downloaded_packages
## Installing package into 'C:/Users/Wisdom Roland/Documents/R/win-library/3.2'
## (as 'lib' is unspecified)
## package 'DataCombine' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Wisdom Roland\AppData\Local\Temp\RtmpWcHn6r\downloaded_packages
# get RCurl on board so csv file can be retrieved
library(RCurl)
## Loading required package: bitops
# get package DataCombine on board so FindReplace can be used
library(DataCombine)
## Warning: package 'DataCombine' was built under R version 3.2.2
Bridges <- getURL("https://raw.githubusercontent.com/Wisdomlite143/MSDA-Repository/master/bridges.data.version1")
# read the csv file into R
Pittsburgh <- read.csv(text = Bridges, header = FALSE, stringsAsFactors = FALSE)
# head(Pittsburgh) check the first 6 rows of data set Pittsburgh
# decrease number of columns in dataset, as per assignment
fewercol_Pittsburgh <- Pittsburgh[, 1:13]
# head(fewercol_Pittsburgh) check the first 6 rows of data set fewercol_Pittsburgh
# convert to dataframe
df_fewercol_Pittsburgh <- data.frame(fewercol_Pittsburgh)
# modify column names
colnames(df_fewercol_Pittsburgh) <- c("IDENTIF", "RIVER", "LOCATION", "ERECTED", "PURPOSE", "LENGTH", "LANES", "CLEAR-G", "T-OR-D", "MATERIAL", "SPAN", "REL-L", "TYPE")
# show the first 6 rows of the new dataframe
head(df_fewercol_Pittsburgh)
## IDENTIF RIVER LOCATION ERECTED PURPOSE LENGTH LANES CLEAR-G T-OR-D
## 1 E1 M 3 1818 HIGHWAY ? 2 N THROUGH
## 2 E2 A 25 1819 HIGHWAY 1037 2 N THROUGH
## 3 E3 A 39 1829 AQUEDUCT ? 1 N THROUGH
## 4 E5 A 29 1837 HIGHWAY 1000 2 N THROUGH
## 5 E6 M 23 1838 HIGHWAY ? 2 N THROUGH
## 6 E7 A 27 1840 HIGHWAY 990 2 N THROUGH
## MATERIAL SPAN REL-L TYPE
## 1 WOOD SHORT S WOOD
## 2 WOOD SHORT S WOOD
## 3 WOOD ? S WOOD
## 4 WOOD SHORT S WOOD
## 5 WOOD ? S WOOD
## 6 WOOD MEDIUM S WOOD