Synopsis:

This file illustrates how to import different data files (csv, text, excel) using two differnt approaches - From local machine and from the web directly (without downloading data locally)

Packages required:

Homework Problems:

1. Importing a csv file stored in the working directory

reddit_imported <- read.csv("reddit.csv", stringsAsFactors = FALSE)
#head_data <- head(reddit_imported)
#library(knitr)
#knitr::kable(head_data, format = "html")
DT::datatable(reddit_imported,options= list(pageLength=5))
#kable(head(reddit_imported,n=4))
str(reddit_imported)
## 'data.frame':    32754 obs. of  14 variables:
##  $ id               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender           : int  0 0 1 0 1 0 0 0 0 0 ...
##  $ age.range        : chr  "25-34" "25-34" "18-24" "25-34" ...
##  $ marital.status   : chr  NA NA NA NA ...
##  $ employment.status: chr  "Employed full time" "Employed full time" "Freelance" "Freelance" ...
##  $ military.service : chr  NA NA NA NA ...
##  $ children         : chr  "No" "No" "No" "No" ...
##  $ education        : chr  "Bachelor's degree" "Bachelor's degree" "Some college" "Bachelor's degree" ...
##  $ country          : chr  "United States" "United States" "United States" "United States" ...
##  $ state            : chr  "New York" "New York" "Virginia" "New York" ...
##  $ income.range     : chr  "$150,000 or more" "$150,000 or more" "Under $20,000" "$150,000 or more" ...
##  $ fav.reddit       : chr  "getmotivated" "gaming" "snackexchange" "spacedicks" ...
##  $ dog.cat          : chr  NA NA NA NA ...
##  $ cheese           : chr  NA NA NA NA ...

2. Importing a csv file from web

url <- "https://bradleyboehmke.github.io/public/data/reddit.csv"
reddit_web <- read.csv(url)
DT::datatable(reddit_web, options=list(pageLength=5))

3. Importing Xlsx file stored in working directory

library(xlsx)
library(readxl)
xlsx_imported <- read_excel("FY2017_4050_FMR.xlsx" , sheet = "EXCEL_DATA")
DT::datatable(xlsx_imported,options = list(pageLength=5))
str(xlsx_imported)
## Classes 'tbl_df', 'tbl' and 'data.frame':    4769 obs. of  21 variables:
##  $ fips2010         : chr  "2300512300" "6099999999" "6999999999" "0100199999" ...
##  $ fips2000         : chr  NA NA NA "0100199999" ...
##  $ fmr2             : num  1078 677 666 822 977 ...
##  $ fmr0             : num  755 502 411 587 807 501 665 665 491 464 ...
##  $ fmr1             : num  851 506 498 682 847 505 751 751 494 467 ...
##  $ fmr3             : num  1454 987 961 1054 1422 ...
##  $ fmr4             : num  1579 1038 1158 1425 1634 ...
##  $ State            : num  23 60 69 1 1 1 1 1 1 1 ...
##  $ Metro_code       : chr  "METRO38860MM6400" "NCNTY60999N60999" "NCNTY69999N69999" "METRO33860M33860" ...
##  $ areaname         : chr  "Portland, ME HUD Metro FMR Area" "American Samoa" "Northern Mariana Islands" "Montgomery, AL MSA" ...
##  $ county           : num  NA 999 999 1 3 5 7 9 11 13 ...
##  $ CouSub           : chr  "12300" "99999" "99999" "99999" ...
##  $ countyname       : chr  "Cumberland County" "American Samoa" "Northern Mariana Islands" "Autauga County" ...
##  $ county_town_name : chr  "Chebeague Island town" "American Samoa" "Northern Mariana Islands" "Autauga County" ...
##  $ pop2010          : num  341 55519 53883 54571 182265 ...
##  $ acs_2016_2       : num  1109 653 642 788 873 ...
##  $ state_alpha      : chr  "ME" "AS" "MP" "AL" ...
##  $ fmr_type         : num  40 40 40 40 40 40 40 40 40 40 ...
##  $ metro            : num  1 0 0 1 1 0 1 1 0 0 ...
##  $ FMR_PCT_Change   : num  0.972 1.037 1.037 1.043 1.119 ...
##  $ FMR_Dollar_Change: num  -31 24 24 34 104 35 26 26 52 52 ...

4. Importing Xlsx file from web

library(gdata)
url_xlsx <- "http://www.huduser.gov/portal/datasets/fmr/fmr2017/FY2017_4050_FMR.xlsx"
xlsx_web <- read.xls(url_xlsx , sheet = "EXCEL_DATA")
DT::datatable(xlsx_web,options=list(pageLength=5))

5. Importing file from a web link

library(XML)
library(stringr)
url_udayton <-"http://academic.udayton.edu/kissock/http/Weather/citylistUS.htm"
url_link_cincy <- url_udayton %>%
         getHTMLLinks() %>%
         .[str_detect(.,"CINCIN")] %>%
         paste0(str_sub(url_udayton,1,regexpr("/citylistUS",url_udayton)),.)
cincy_data <- read.table(url_link_cincy, col.names=c("Month","Day","Year","Temp"))
DT::datatable(cincy_data,options=list(pageLength=5))
str(cincy_data)
## 'data.frame':    7963 obs. of  4 variables:
##  $ Month: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Day  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year : int  1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
##  $ Temp : num  41.1 22.2 22.8 14.9 9.5 23.8 31.1 26.9 31.3 31.5 ...