A quick example on how to uncompress/unzip a file and read a text file. Packages used in this example: ggplot2, dplyr and plyr
file_geonames_cities <- "http://download.geonames.org/export/dump/cities15000.zip"
file_geonames_cities.path <- "cities15000.txt"
temp <- tempfile()
download.file(file_geonames_cities, temp)
geonames.cities <-
read.csv(
unz(temp, file_geonames_cities.path),
header = FALSE,
stringsAsFactors = FALSE,
encoding = "UTF-8",
sep = "\t",
quote = ""
)
unlink(temp)
colnames(geonames.cities) <- c(
"geonameid",
"name",
"asciiname",
"alternatenames",
"latitude",
"longitude",
"featureclass",
"featurecode",
"countrycode",
"cc2",
"admin1code",
"admin2code",
"admin3code",
"admin4code",
"population",
"elevation",
"dem",
"timezone",
"modificationdate"
)
geonames.cities %>%
ddply(.(countrycode), summarize, freq = length(countrycode)) %>% #freq by country
arrange(-freq) %>% #sort
head(25) %>% #get top 25
ggplot() +
geom_bar(aes(x = countrycode, y = freq), stat = "identity")