The front side of this sheet shows how to read text files into R with readr.
Try one of the following packages to import other types of files:
haven - SPSS, Stata, and SAS files
readxl - excel files (.xls and .xlsx)
DBI - databases
jsonlite - json
xml2 - XML
httr - Web APIs
rvest - HTML (Web Scraping)
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.2.1 √ purrr 0.3.3
## √ tibble 2.1.3 √ dplyr 0.8.3
## √ tidyr 1.0.0 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
library(tidyr)
library(corrplot)
## corrplot 0.84 loaded
library(DT)
read_*(file,
col_names = TRUE,
col_types = NULL,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = Inf,
guess_max = min(1000,n_max),
progress = interactive())
write_file(x = "a,b,c\n1,2,3\n4,5,NA", path = "file.csv")
read_csv("file.csv")
write_file(x = "a;b;c\n1;2;3\n4;5;NA", path = "file2.csv")
read_csv2("file2.csv")
write_file(x = "a|b|c\n1|2|3\n4|5|NA", path = "file.txt")
read_delim("file.txt", delim = "|")
write_file(x = "a b c\n1 2 3\n4 5 NA", path = "file.fwf")
read_fwf("file.fwf", col_positions = c(1, 3, 5))
write_file(x = "a\tb\tc\n1\t2\t3\n4\t5\tNA", path = "file.tsv")
read_tsv("file.tsv") # Also read_table()
Use problems() to diagnose problems
x <- read_csv("file.csv");
problems(x)
View full data set with View() or glimpse()
Revert to data frame with as.data.frame()
tidy_example <- tibble(country = LETTERS[1:20],
`1999` = c(0.7,37,212,2,45,8,9,41,12,12,1:10),
`2000` = `1999`+rnorm(20,100,1),
`2001` = `2000` + rnorm(20,10,10))
corrplot::corrplot(cor(tidy_example[,-1]),method = "number",type = "upper")
tidy_example_gather <- tidy_example %>%
gather(key = "year",value = "cases",-country)
tidy_example_gather %>% datatable()
tidy_example_gather %>%
spread(key = "year",value = "cases") %>%
datatable()
drop_na(data, ...) Drop rows containing NA’s in … columns
fill(data, ..., .direction = c("down", "up")) Fill in NA’s in … columns with most recent non-NA values.
replace_na(data,replace = list(), ...) Replace NA’s by column
tidy_example[c(2,5),] <- NA
tidy_example %>% datatable()
drop_na(tidy_example) %>%
datatable()
fill(tidy_example,country:`2001`) %>%
datatable()
tidy_example %>%
mutate(x = replace_na(`1999`,0)) %>% datatable()
tidy_example %>%
replace_na(list(`2000` = 2)) %>%
datatable()
mtcars %>%
select(cyl,gear,carb,everything()) %>%
arrange(cyl,gear) %>%
datatable()
mtcars %>% has_rownames()
## [1] TRUE
complete(mtcars, cyl, gear, carb) %>% datatable()
expand(mtcars, cyl, gear, carb) %>% datatable()
separate(data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE,extra = "warn", fill = "warn", ...) Separate each cell in a column to make several columns.
unite(data, col, ..., sep = "_", remove = TRUE) Collapse cells across several columns to make a single column
separate_rows(data, ..., sep = "[^[:alnum:].]+", convert = FALSE) Separate each cell in a column to make several rows
split_exam <- nycflights13::flights[1:100,1:8]
split_exam %>% datatable()
split_exam <- split_exam %>%
unite(year,month,day,col = "date",sep = "-") # 那几列按照什么规则sep合成col
split_exam$date <- as.Date(split_exam$date)
split_exam %>% datatable()
split_exam %>%
separate(col = date,sep = "-",into = c("year","month","day")) # 那一列col按照什么规则sep拆分into
## # A tibble: 100 x 8
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <chr> <chr> <chr> <int> <int> <dbl> <int> <int>
## 1 2013 01 01 517 515 2 830 819
## 2 2013 01 01 533 529 4 850 830
## 3 2013 01 01 542 540 2 923 850
## 4 2013 01 01 544 545 -1 1004 1022
## 5 2013 01 01 554 600 -6 812 837
## 6 2013 01 01 554 558 -4 740 728
## 7 2013 01 01 555 600 -5 913 854
## 8 2013 01 01 557 600 -3 709 723
## 9 2013 01 01 557 600 -3 838 846
## 10 2013 01 01 558 600 -2 753 745
## # ... with 90 more rows