read.csv 에서 encoding 옵션을 활용하여 설정 할 수 있다. csv는 ,를 구분자로 사용하며, tsv는 teb을 구분자로 사용한다. sep옵션을 사용하여 구분자를 설정 할 수 있다.
setwd('R:/HDD1/data/iris')
iris=read.csv('iris.csv')
head(iris)
## X Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 1 5.1 3.5 1.4 0.2 setosa
## 2 2 4.9 3.0 1.4 0.2 setosa
## 3 3 4.7 3.2 1.3 0.2 setosa
## 4 4 4.6 3.1 1.5 0.2 setosa
## 5 5 5.0 3.6 1.4 0.2 setosa
## 6 6 5.4 3.9 1.7 0.4 setosa
iris=read.csv('iris.csv',encoding = 'cp949')
head(iris)
## X Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 1 5.1 3.5 1.4 0.2 setosa
## 2 2 4.9 3.0 1.4 0.2 setosa
## 3 3 4.7 3.2 1.3 0.2 setosa
## 4 4 4.6 3.1 1.5 0.2 setosa
## 5 5 5.0 3.6 1.4 0.2 setosa
## 6 6 5.4 3.9 1.7 0.4 setosa
read.table 을 통해서도 자료를 가져올 수 있다.
setwd('R:/HDD1/data/iris')
iris=read.table('iris.csv')
head(iris,3)
## V1 V2
## 1 NA ,"Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species"
## 2 1 ,5.1,3.5,1.4,0.2,"setosa"
## 3 2 ,4.9,3,1.4,0.2,"setosa"
setwd('R:/HDD1/data/iris')
iris=read.table('iris.csv',sep=' ')
tail(iris,4)
## V1
## 148 147,6.3,2.5,5,1.9,virginica
## 149 148,6.5,3,5.2,2,virginica
## 150 149,6.2,3.4,5.4,2.3,virginica
## 151 150,5.9,3,5.1,1.8,virginica
setwd('R:/HDD1/data/iris')
iris=read.table('iris.csv',sep=',')
tail(iris,4)
## V1 V2 V3 V4 V5 V6
## 148 147 6.3 2.5 5 1.9 virginica
## 149 148 6.5 3 5.2 2 virginica
## 150 149 6.2 3.4 5.4 2.3 virginica
## 151 150 5.9 3 5.1 1.8 virginica
library(xlsx)
## Warning: package 'xlsx' was built under R version 3.5.2
setwd('R:/HDD1/data/iris')
write.xlsx(iris,'iris.xls')
write.xlsx(iris,'iris.xlsx')
iris=read.xlsx('iris.xls',sheetIndex =1,encoding='cp949')
head(iris)
## NA. V1 V2 V3 V4 V5 V6
## 1 1 NA Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2 2 1 5.1 3.5 1.4 0.2 setosa
## 3 3 2 4.9 3 1.4 0.2 setosa
## 4 4 3 4.7 3.2 1.3 0.2 setosa
## 5 5 4 4.6 3.1 1.5 0.2 setosa
## 6 6 5 5 3.6 1.4 0.2 setosa
iris=read.xlsx('iris.xlsx',sheetIndex =1,encoding='utf-8')
head(iris)
## NA. V1 V2 V3 V4 V5 V6
## 1 1 NA Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2 2 1 5.1 3.5 1.4 0.2 setosa
## 3 3 2 4.9 3 1.4 0.2 setosa
## 4 4 3 4.7 3.2 1.3 0.2 setosa
## 5 5 4 4.6 3.1 1.5 0.2 setosa
## 6 6 5 5 3.6 1.4 0.2 setosa
library('readxl')
setwd('R:/HDD1/data/iris')
read_excel("iris.xlsx",sheet=1)
## # A tibble: 151 x 7
## X__1 V1 V2 V3 V4 V5 V6
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 NA Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2 2 1 5.1 3.5 1.4 0.2 setosa
## 3 3 2 4.9 3 1.4 0.2 setosa
## 4 4 3 4.7 3.2 1.3 0.2 setosa
## 5 5 4 4.6 3.1 1.5 0.2 setosa
## 6 6 5 5 3.6 1.4 0.2 setosa
## 7 7 6 5.4 3.9 1.7 0.4 setosa
## 8 8 7 4.6 3.4 1.4 0.3 setosa
## 9 9 8 5 3.4 1.5 0.2 setosa
## 10 10 9 4.4 2.9 1.4 0.2 setosa
## # ... with 141 more rows
read_excel("iris.xls",sheet=1)
## # A tibble: 151 x 7
## X__1 V1 V2 V3 V4 V5 V6
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 NA Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2 2 1 5.1 3.5 1.4 0.2 setosa
## 3 3 2 4.9 3 1.4 0.2 setosa
## 4 4 3 4.7 3.2 1.3 0.2 setosa
## 5 5 4 4.6 3.1 1.5 0.2 setosa
## 6 6 5 5 3.6 1.4 0.2 setosa
## 7 7 6 5.4 3.9 1.7 0.4 setosa
## 8 8 7 4.6 3.4 1.4 0.3 setosa
## 9 9 8 5 3.4 1.5 0.2 setosa
## 10 10 9 4.4 2.9 1.4 0.2 setosa
## # ... with 141 more rows
readr 의 경우 오류가 발생하면 오류를 출력해주고 오류를 제외한 자료를 불러온다.
library(readr)
setwd('R:/HDD1/data/iris')
iris <- read_csv("iris.csv", locale = locale(encoding = "cp949"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## Sepal.Length = col_double(),
## Sepal.Width = col_double(),
## Petal.Length = col_double(),
## Petal.Width = col_double(),
## Species = col_character()
## )
iris
## # A tibble: 150 x 6
## X1 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1 5.1 3.5 1.4 0.2 setosa
## 2 2 4.9 3 1.4 0.2 setosa
## 3 3 4.7 3.2 1.3 0.2 setosa
## 4 4 4.6 3.1 1.5 0.2 setosa
## 5 5 5 3.6 1.4 0.2 setosa
## 6 6 5.4 3.9 1.7 0.4 setosa
## 7 7 4.6 3.4 1.4 0.3 setosa
## 8 8 5 3.4 1.5 0.2 setosa
## 9 9 4.4 2.9 1.4 0.2 setosa
## 10 10 4.9 3.1 1.5 0.1 setosa
## # ... with 140 more rows
iris <- read_csv("iris.csv", locale = locale(encoding = "utf-8"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## Sepal.Length = col_double(),
## Sepal.Width = col_double(),
## Petal.Length = col_double(),
## Petal.Width = col_double(),
## Species = col_character()
## )
iris
## # A tibble: 150 x 6
## X1 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1 5.1 3.5 1.4 0.2 setosa
## 2 2 4.9 3 1.4 0.2 setosa
## 3 3 4.7 3.2 1.3 0.2 setosa
## 4 4 4.6 3.1 1.5 0.2 setosa
## 5 5 5 3.6 1.4 0.2 setosa
## 6 6 5.4 3.9 1.7 0.4 setosa
## 7 7 4.6 3.4 1.4 0.3 setosa
## 8 8 5 3.4 1.5 0.2 setosa
## 9 9 4.4 2.9 1.4 0.2 setosa
## 10 10 4.9 3.1 1.5 0.1 setosa
## # ... with 140 more rows
자료가 일반적으로 불러지지 않아 어쩔수 없이 불러와야 할 때 readLines를 활용하여 data를 불러오는 것도 방법이 될 수 있다.
f <- file ("./input/train.csv")
f
## A connection with
## description "./input/train.csv"
## class "file"
## mode "r"
## text "text"
## opened "closed"
## can read "yes"
## can write "yes"
line=readLines(file('R:/HDD1/data/iris/iris.csv'),n=150)
head(read.csv(textConnection(line),header=F,sep=','))
## V1 V2 V3 V4 V5 V6
## 1 NA Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2 1 5.1 3.5 1.4 0.2 setosa
## 3 2 4.9 3 1.4 0.2 setosa
## 4 3 4.7 3.2 1.3 0.2 setosa
## 5 4 4.6 3.1 1.5 0.2 setosa
## 6 5 5 3.6 1.4 0.2 setosa
github에 있는 자료를 바로 다운로드 받아 불러오는 코드
url<-"https://github.com/mrchypark/sejongFinData/raw/master/dataAll.csv"
setwd('R:/HDD1/data/iris')
download.file(url,destfile = "./dataAll.csv")
dataAll<-read.csv("./dataAll.csv",stringsAsFactors = F)
head(dataAll)
## country year 매출액 영업이익 순이익 연결순이익 자산총계
## 1 삼성전자 1997.12(GAAP연결) 226,820 21,387 -9,383 -6,069 320,316
## 2 삼성전자 1998.12(GAAP연결) 257,723 27,063 -4,128 -3,547 240,757
## 3 삼성전자 1999.12(GAAP연결) 320,877 53,760 31,857 31,753 291,786
## 4 삼성전자 2000.12(GAAP연결) 435,278 90,603 61,921 60,029 464,215
## 5 삼성전자 2001.12(GAAP연결) 464,438 39,514 33,709 30,551 521,149
## 6 삼성전자 2002.12(GAAP연결) 595,687 92,456 73,246 70,528 649,550
## 부채총계 자본총계 부채비율 영업이익률 순이익률 연결순이익률 ROE.순이익.
## 1 273,860 46,457 589.49 9.43 -4.14 -2.68 -20.2
## 2 190,162 50,595 375.85 10.5 -1.6 -1.38 -8.16
## 3 160,039 131,747 121.47 16.75 9.93 9.9 24.18
## 4 296,360 167,855 176.56 20.82 14.23 13.79 36.89
## 5 317,164 203,985 155.48 8.51 7.26 6.58 16.53
## 6 394,623 254,927 154.8 15.52 12.3 11.84 28.73
## ROE.연결순이익. 매출액.성장률 영업이익.성장률 순이익.성장률
## 1 -13.06 14.11 55.95 적지
## 2 -7.01 13.62 26.54 적지
## 3 24.1 24.5 98.64 흑전
## 4 35.76 35.65 68.53 94.37
## 5 14.98 6.7 -56.39 -45.56
## 6 27.67 28.26 133.98 117.29