第三章 用R获取数据

3.1获取内置数据集

library("MASS")
data( package = "MASS" ) # 注意这里没有s
?Insurance # 查看数据集的情况
## starting httpd help server ...
##  done
data( package = .packages(all.available=T) ) #查看所有数据集
## Warning in data(package = .packages(all.available = T)): datasets have been
## moved from package 'base' to package 'datasets'
## Warning in data(package = .packages(all.available = T)): datasets have been
## moved from package 'stats' to package 'datasets'
data( Insurance ) # 获取数据集

3.2 获取数据

3.2.1 从csv和txt格式文件获取数据
setwd("./") # 设定保存目录为当前目录
write.csv(Insurance,"Insurance.csv")
content1<-read.csv( "./Insurance.csv" )
head( content1 )
##   X District  Group   Age Holders Claims
## 1 1        1    <1l   <25     197     38
## 2 2        1    <1l 25-29     264     35
## 3 3        1    <1l 30-35     246     20
## 4 4        1    <1l   >35    1680    156
## 5 5        1 1-1.5l   <25     284     63
## 6 6        1 1-1.5l 25-29     536     84
content2<-read.table("./Insurance.csv") # 默认head=F,sep=""
head(content2)
##   V1                                           V2
## 1 NA ,"District","Group","Age","Holders","Claims"
## 2  1                      ,"1","<1l","<25",197,38
## 3  2                    ,"1","<1l","25-29",264,35
## 4  3                    ,"1","<1l","30-35",246,20
## 5  4                    ,"1","<1l",">35",1680,156
## 6  5                   ,"1","1-1.5l","<25",284,63
dim(content2)
## [1] 65  2
content3<-read.table("./Insurance.csv",sep=",",head=T)
head(content3)
##   X District  Group   Age Holders Claims
## 1 1        1    <1l   <25     197     38
## 2 2        1    <1l 25-29     264     35
## 3 3        1    <1l 30-35     246     20
## 4 4        1    <1l   >35    1680    156
## 5 5        1 1-1.5l   <25     284     63
## 6 6        1 1-1.5l 25-29     536     84
dim(content3)
## [1] 64  6
write.table(Insurance[1:10,],"Insurance.txt")
content1<-read.table( "./Insurance.txt" )
head( content1 )
##   District  Group   Age Holders Claims
## 1        1    <1l   <25     197     38
## 2        1    <1l 25-29     264     35
## 3        1    <1l 30-35     246     20
## 4        1    <1l   >35    1680    156
## 5        1 1-1.5l   <25     284     63
## 6        1 1-1.5l 25-29     536     84
content2<-read.csv("./Insurance.txt") # 默认head=F,sep=""
head(content2)
##   District.Group.Age.Holders.Claims
## 1                1 1 <1l <25 197 38
## 2              2 1 <1l 25-29 264 35
## 3              3 1 <1l 30-35 246 20
## 4              4 1 <1l >35 1680 156
## 5             5 1 1-1.5l <25 284 63
## 6           6 1 1-1.5l 25-29 536 84
dim(content2)
## [1] 10  1
content3<-read.table("./Insurance.txt",sep=" ",head=T)
head(content3)
##   District  Group   Age Holders Claims
## 1        1    <1l   <25     197     38
## 2        1    <1l 25-29     264     35
## 3        1    <1l 30-35     246     20
## 4        1    <1l   >35    1680    156
## 5        1 1-1.5l   <25     284     63
## 6        1 1-1.5l 25-29     536     84
dim(content3)
## [1] 10  5

注意seq和head的设置
##### 3.2.2 从Excel中获取数据

library("RODBC")
channel<-odbcConnectExcel( file.choose() ) # 只能是xls格式的
channel
## RODBC Connection 1
## Details:
##   case=nochange
##   DBQ=E:\Lang\R\practise\test.xls
##   DefaultDir=E:\Lang\R\practise
##   Driver={Microsoft Excel Driver (*.xls)}
##   DriverId=790
##   MaxBufferSize=2048
##   PageTimeout=5
sqlTables( channel ) # 列出ODBC连接到的表格
##                     TABLE_CAT TABLE_SCHEM TABLE_NAME   TABLE_TYPE REMARKS
## 1 E:\\Lang\\R\\practise\\test        <NA>    Sheet1$ SYSTEM TABLE    <NA>
sqlFetch( channel,'Sheet1' ) # 显示表格中的Sheet1
##   姓名    学号
## 1 张珊 2013222
## 2 李四 2013289
## 3 王五 2013356
## 4 刘店 2013423
odbcClose( channel)

3.3 从数据库获取数据

library("RODBC")
conn <- odbcConnect( "R_data",uid="root",pwd="" )
sqlFetch(conn,"stu") # 从数据库取出表
## [1] name  score
## <0 rows> (or 0-length row.names)
sqlQuery(conn,"select * from stu limit 1") # 向数据库递交查询
## [1] name  score
## <0 rows> (or 0-length row.names)
table<-data.frame(name=c("zhangsan","lier"),
                  score=c(77,78) )
sqlSave(conn,table,tablename="stu2",append=F) #appent=F 新建表
sqlFetch( conn,"stu2" )
##       name score
## 1 zhangsan    77
## 2     lier    78
table<-data.frame(name=c("zhangsan","lier"),
                  score=c(100,100) )
sqlSave(conn,table,tablename="stu2",append=T) # append=T 插入而非更新,不存在表时新建
sqlQuery(conn,"select * from stu2")
##   rownames     name score
## 1        1 zhangsan    77
## 2        2     lier    78
## 3        1 zhangsan   100
## 4        2     lier   100
sqlDrop( conn,"stu2" ) # 删除表
sqlQuery( conn,"show tables" )
##   Tables_in_r_data
## 1              stu
## 2              ???
sqlClear( conn,"stu" ) # 清除表内容
sqlFetch(conn,"stu") # 从数据库取出表
## [1] name  score
## <0 rows> (or 0-length row.names)
sqlTables(conn) # 返回数据库表信息
##   TABLE_CAT TABLE_SCHEM TABLE_NAME TABLE_TYPE REMARKS
## 1    r_data                    stu      TABLE        
## 2    r_data                    ???      TABLE
sqlColumns( conn,"stu" ) # 返回表列信息
##   TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME
## 1                  <NA>        stu        name         1      char
## 2                  <NA>        stu       score         4   integer
##   COLUMN_SIZE BUFFER_LENGTH DECIMAL_DIGITS NUM_PREC_RADIX NULLABLE REMARKS
## 1          50            50             NA             NA        1        
## 2          10             4              0             10        1        
##   COLUMN_DEF SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH
## 1       <NA>             1               NA                50
## 2       <NA>             4               NA                NA
##   ORDINAL_POSITION IS_NULLABLE
## 1                1         YES
## 2                2         YES
close( conn ) # 关闭连接

3.4 从网页获取数据

library("XML")
url<-"http://stockdata.stock.hexun.com/2008en/zxcwzb.aspx?stockid=000002&type=1&date=2013.06.30"
course<-readHTMLTable(url)
names(course)
## [1] "NULL" "NULL" "NULL" "NULL"
course[[2]]
##                                       Period End Date       June 30 2013
## 1                                   Operating  Income  41,390,345,567.72
## 2                                          Net Profit   4,556,304,906.89
## 3                                        Total Profit   7,133,412,305.51
## 4            Net Profit Excluding Extraordinary Items   4,536,753,831.66
## 5                                        Total Assets 432,241,960,220.85
## 6                                Shareholders' Equity  66,644,627,234.02
## 7            Net Cash Flows From Operating Activities  -9,792,399,309.57
## 8                            Basic Earnings Per Share               0.41
## 9                        The Rate Of Return On Equity               6.84
## 10 Net Cash Flows From Operating Activities Per Share              -0.89
## 11                         Net Assets Value Per Share               6.05
## 12                Net Assets Per Share After Adjusted               0.00
## 13   Foreign Financial Accounting Standard Net Profit               0.00
## 14                  EPS Excluding Extraordinary Items               0.41
## 15                                  Report Start Time         2013-01-01
## 16                                    Report End Time         2013-06-30
##         March 31 2013
## 1   13,999,905,876.13
## 2    1,613,904,228.33
## 3    2,394,885,503.42
## 4    1,615,472,700.40
## 5  417,894,248,034.75
## 6   65,578,003,059.72
## 7   -2,383,260,770.37
## 8                0.15
## 9                2.46
## 10              -0.22
## 11               5.96
## 12               0.00
## 13               0.00
## 14               0.15
## 15         2013-01-01
## 16         2013-03-31