DataCamp : Importing Data in R

Ctrl + L ## clear screen


detach("package:readr", unload=TRUE) ## unload a package

x = edit(data.frame()) ## starts empty GUI spreadsheet editor
x = edit(x) # opens existing data frame
x = scan(w="c") # enter values from the keyboard or by copy & paste

search() ## Lists which packages are currently loaded
ls() ## Lists R objects created during session
rm(my_object1, my_object2, ...)  # Remove objects
rm(list = ls()) ## Remove all objects

dir()

pools <- read.csv("swimming_pools.csv", stringsAsFactors=FALSE)

path <- file.path("data", "hotdogs.txt")
hotdogs <- read.table(path, 
                      sep = "", 
                      col.names = c("type", "calories", "sodium"))

hotdogs <- read.delim("hotdogs.txt", header=FALSE, sep="", col.names = c("type", "calories", "sodium"))

lily <- hotdogs[which.min(hotdogs$calories), ]
lily

##       type calories sodium
## 50 Poultry       86    358

hotdogs2 <- read.delim("hotdogs.txt", header = FALSE, col.names = c("type", "calories", "sodium"), colClasses = c("factor", "NULL", "numeric"))

head(hotdogs)

##   type calories sodium
## 1 Beef      186    495
## 2 Beef      181    477
## 3 Beef      176    425
## 4 Beef      149    322
## 5 Beef      184    482
## 6 Beef      190    587

head(hotdogs2)

##   type sodium
## 1 Beef    495
## 2 Beef    477
## 3 Beef    425
## 4 Beef    322
## 5 Beef    482
## 6 Beef    587

identical(hotdogs, hotdogs2)

## [1] FALSE

readr package

library(readr)
properties <- c("area", "temp", "size", "storage", "method",
                "texture", "flavor", "moistness")
potatoes <- read_tsv("potatoes.txt", col_names=properties)

## Parsed with column specification:
## cols(
##   area = col_integer(),
##   temp = col_integer(),
##   size = col_integer(),
##   storage = col_integer(),
##   method = col_integer(),
##   texture = col_double(),
##   flavor = col_double(),
##   moistness = col_double()
## )

head(potatoes)

## # A tibble: 6 x 8
##    area  temp  size storage method texture flavor moistness
##   <int> <int> <int>   <int>  <int>   <dbl>  <dbl>     <dbl>
## 1     1     1     1       1      1     2.9    3.2       3.0
## 2     1     1     1       1      1     2.3    2.3       2.6
## 3     1     1     1       1      1     2.5    2.8       2.8
## 4     1     1     1       1      1     2.1    2.9       2.4
## 5     1     1     1       1      1     1.9    2.8       2.2
## 6     1     1     1       1      1     1.1    3.0       1.7

# Import 4 observations from potatoes.txt
potatoes_fragment <- read_tsv("potatoes.txt", skip = 3, n_max = 4, col_names = properties)

## Parsed with column specification:
## cols(
##   area = col_integer(),
##   temp = col_integer(),
##   size = col_integer(),
##   storage = col_integer(),
##   method = col_integer(),
##   texture = col_double(),
##   flavor = col_double(),
##   moistness = col_double()
## )

potatoes_fragment

## # A tibble: 4 x 8
##    area  temp  size storage method texture flavor moistness
##   <int> <int> <int>   <int>  <int>   <dbl>  <dbl>     <dbl>
## 1     1     1     1       1      1     2.1    2.9       2.4
## 2     1     1     1       1      1     1.9    2.8       2.2
## 3     1     1     1       1      1     1.1    3.0       1.7
## 4     1     1     1       1      1     2.6    3.1       2.4

# Import all data, but force all columns to be character
potatoes_char <- read_tsv("potatoes.txt", col_types = "cccccccc", col_names = properties)

str(potatoes_char)

## Classes 'tbl_df', 'tbl' and 'data.frame':    10 obs. of  8 variables:
##  $ area     : chr  "1" "1" "1" "1" ...
##  $ temp     : chr  "1" "1" "1" "1" ...
##  $ size     : chr  "1" "1" "1" "1" ...
##  $ storage  : chr  "1" "1" "1" "1" ...
##  $ method   : chr  "1" "1" "1" "1" ...
##  $ texture  : chr  "2.9" "2.3" "2.5" "2.1" ...
##  $ flavor   : chr  "3.2" "2.3" "2.8" "2.9" ...
##  $ moistness: chr  "3.0" "2.6" "2.8" "2.4" ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 8
##   .. ..$ area     : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ temp     : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ size     : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ storage  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ method   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ texture  : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ flavor   : list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ moistness: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"

collectors

# Import without col_types
hotdogs <- read_tsv("hotdogs.txt", col_names = c("type", "calories", "sodium"))

## Parsed with column specification:
## cols(
##   type = col_character(),
##   calories = col_integer(),
##   sodium = col_integer()
## )

summary(hotdogs)

##      type              calories         sodium     
##  Length:54          Min.   : 86.0   Min.   :144.0  
##  Class :character   1st Qu.:132.0   1st Qu.:362.5  
##  Mode  :character   Median :145.0   Median :405.0  
##                     Mean   :145.4   Mean   :424.8  
##                     3rd Qu.:172.8   3rd Qu.:503.5  
##                     Max.   :195.0   Max.   :645.0

# Import with col_types
fac <- col_factor(levels = c("Beef", "Meat", "Poultry"))
int <- col_integer()

hotdogs_factor <- read_tsv("hotdogs.txt", col_names = c("type", "calories", "sodium"), col_types = list(fac, int, int))
summary(hotdogs_factor)

##       type       calories         sodium     
##  Beef   :20   Min.   : 86.0   Min.   :144.0  
##  Meat   :17   1st Qu.:132.0   1st Qu.:362.5  
##  Poultry:17   Median :145.0   Median :405.0  
##               Mean   :145.4   Mean   :424.8  
##               3rd Qu.:172.8   3rd Qu.:503.5  
##               Max.   :195.0   Max.   :645.0

class(hotdogs_factor)

## [1] "tbl_df"     "tbl"        "data.frame"

fread

library(data.table)

#potatoes <- fread("potatoes.csv", select = c("texture", "moistness"))
potatoes <- fread("potatoes.csv", select = c(3,6))
head(potatoes)

##    V3  V6
## 1:  1 2.9
## 2:  1 2.3
## 3:  1 2.5
## 4:  1 2.1
## 5:  1 1.9
## 6:  1 1.1

class(potatoes)

## [1] "data.table" "data.frame"

paste

paste("test", 1:5)

## [1] "test 1" "test 2" "test 3" "test 4" "test 5"

paste0("test", 1:5)

## [1] "test1" "test2" "test3" "test4" "test5"

read xlsx files

# Read all Excel sheets with lapply()
pop_list <- lapply(excel_sheets("urbanpop.xlsx"), read_excel, path = "urbanpop.xlsx")

# Import the the first Excel sheet of urbanpop_nonames.xlsx (R gives names)
pop_a <- read_excel("urbanpop_nonames.xlsx", sheet = 1, col_names = FALSE)

# Import the the first Excel sheet of urbanpop_nonames.xlsx (specify col_names)
cols <- c("country", paste0("year_", 1960:1966))
pop_b <- read_excel("urbanpop_nonames.xlsx", sheet = 1, col_names = cols)

# Import the second sheet of urbanpop.xlsx, skipping the first 21 rows
urbanpop_sel <- read_excel("urbanpop.xlsx", sheet = 2, col_names = FALSE, skip = 21)

# Print out the first observation from urbanpop_sel
head(urbanpop_sel, n = 1)

Read xls files

library(gdata)
urban_pop <- read.xls("urbanpop.xls", sheet = 2)

columns <- c("country", paste0("year_", 1967:1974))
urban_pop <- read.xls("urbanpop.xls", sheet = 2, skip = 50, header = FALSE, stringsAsFactors = FALSE, col.names = columns)

path <- "urbanpop.xls"
urban_sheet1 <- read.xls(path, sheet = 1, stringsAsFactors = FALSE)
urban_sheet2 <- read.xls(path, sheet = 2, stringsAsFactors = FALSE)
urban_sheet3 <- read.xls(path, sheet = 3, stringsAsFactors = FALSE)

# Extend the cbind() call to include urban_sheet3: urban
urban <- cbind(urban_sheet1, urban_sheet2[-1], urban_sheet3[-1])

# Remove all rows with NAs from urban: urban_clean
urban_clean <- na.omit(urban)

# Print out a summary of urban_clean
summary(urban_clean)

Mac 용 R에서 잘 읽지 못 하는 한글 파일을 잘 읽는 방법

http://www.epistemology.pe.kr/2017/07/13/1121?ckattempt=1

DataCamp : Importing Data in R

Rose Park

July 19, 2017

readr package

collectors

fread

paste

read xlsx files

Read xls files

Mac 용 R에서 잘 읽지 못 하는 한글 파일을 잘 읽는 방법