R data structures
a <- c(1, 2, 5, 3, 6, -2, 4)
a
## [1] 1 2 5 3 6 -2 4
b <- c("one", "two", "three")
b
## [1] "one" "two" "three"
c <- c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE)
c
## [1] TRUE TRUE TRUE FALSE TRUE FALSE
Here, a is numeric vector, b is a character vector, and c is a logical vector.
e <- c(1, 3, 5)
f <- c(2, 4, 6)
c(e, f)
## [1] 1 3 5 2 4 6
:
used in the last statement is used to generate a sequence of numbers. For examplea <- c(1:6)
a
## [1] 1 2 3 4 5 6
or
a <- 1:6
a
## [1] 1 2 3 4 5 6
is equivalent to
a <- c(1, 2, 3, 4, 5, 6)
a
## [1] 1 2 3 4 5 6
a <- 6:1
a
## [1] 6 5 4 3 2 1
seq()
is a more general facility for generating sequences. For exampleseq(-5, 5, by = .2)
## [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4
## [15] -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4
## [29] 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2
## [43] 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0
seq(from = -5, to = 5, length.out = 51)
## [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4
## [15] -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4
## [29] 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2
## [43] 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0
seq.int(-5, 5, by = 2)
## [1] -5 -3 -1 1 3 5
seq(10)
## [1] 1 2 3 4 5 6 7 8 9 10
or
seq(length.out= 10)
## [1] 1 2 3 4 5 6 7 8 9 10
is equivalent to
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
rep()
which can be used for replicating an object in various complicated ways. The simplest form is a1 <- rep(x, times = 5)
which will put five copies of x
end-to-end in a1
. Another useful version is
a2 <- rep(x, each = 5)
which repeats each element of x
five times before moving on to the next.
For example
rep(1:3, times = 5)
## [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
rep(1:3, each = 5)
## [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
a[c(2, 4)]
refers to the 2th and 4th element of vector a
.a <- c(1, 2, 5, 3, 5, -2, 5)
a[c(2, 4)]
## [1] 2 3
a[5]
## [1] 5
a[c(1, 3, 5)]
## [1] 1 5 5
a[2:6]
## [1] 2 5 3 5 -2
a[-1]
## [1] 2 5 3 5 -2 5
a[-c(1, 3, 5)]
## [1] 2 3 -2 5
which(a==5)
## [1] 3 5 7
matrix
function . The general format ismatrix(vector, nrow = number_of_rows, ncol = number_of_columns,
byrow = logical_value,
dimnames = list(char_vector_rownames, char_vector_colnames))
where vector contains the elements for the matrix, nrow
and ncol
specify the row and column dimensions, and dimnames
contains optional row and column labels stored in character vectors. The option byrow
indicates whether the matrix should be filled in by row (byrow=TRUE
) or by column (byrow=FALSE
). The default is by column. The following listing demonstrates the matrix
function .
y <- matrix(1:20, nrow=5, ncol=4)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
cells <- c(1,26,24,68)
rnames <- c("R1", "R2")
cnames <- c("C1", "C2")
matrix(cells, nrow=2, ncol=2, byrow=TRUE, dimnames=list(rnames, cnames))
## C1 C2
## R1 1 26
## R2 24 68
matrix(cells, nrow=2, ncol=2, byrow=FALSE, dimnames=list(rnames, cnames))
## C1 C2
## R1 1 24
## R2 26 68
dim
attribute. For examplesx <- 1:15
dim(x)
## NULL
dim(x) <- c(5, 3)
x
## [,1] [,2] [,3]
## [1,] 1 6 11
## [2,] 2 7 12
## [3,] 3 8 13
## [4,] 4 9 14
## [5,] 5 10 15
x <- matrix(1:15,5,3)
x
## [,1] [,2] [,3]
## [1,] 1 6 11
## [2,] 2 7 12
## [3,] 3 8 13
## [4,] 4 9 14
## [5,] 5 10 15
dim(x) <- c(3, 5)
x
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 4 7 10 13
## [2,] 2 5 8 11 14
## [3,] 3 6 9 12 15
matrix(rnorm(24),6,4)
## [,1] [,2] [,3] [,4]
## [1,] -1.1132946 0.4269011 0.4083483 -1.1517921
## [2,] -1.7783563 -0.4838834 1.0144093 0.1171856
## [3,] -1.0865778 -0.6550947 0.7400840 1.1979374
## [4,] 1.5422933 0.2837415 1.6786151 1.1635374
## [5,] 1.7875962 0.2856816 -0.6179048 -0.6974258
## [6,] 0.7060166 0.2504176 -1.1966705 -0.3744002
x <- 1:6
diag(x)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 0 0 0 0 0
## [2,] 0 2 0 0 0 0
## [3,] 0 0 3 0 0 0
## [4,] 0 0 0 4 0 0
## [5,] 0 0 0 0 5 0
## [6,] 0 0 0 0 0 6
x <- rep(1,6)
diag(x)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 0 0 0 0 0
## [2,] 0 1 0 0 0 0
## [3,] 0 0 1 0 0 0
## [4,] 0 0 0 1 0 0
## [5,] 0 0 0 0 1 0
## [6,] 0 0 0 0 0 1
or
x <- 1
diag(x, nrow=6)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 0 0 0 0 0
## [2,] 0 1 0 0 0 0
## [3,] 0 0 1 0 0 0
## [4,] 0 0 0 1 0 0
## [5,] 0 0 0 0 1 0
## [6,] 0 0 0 0 0 1
x <- matrix(1:36,6,6)
x
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 7 13 19 25 31
## [2,] 2 8 14 20 26 32
## [3,] 3 9 15 21 27 33
## [4,] 4 10 16 22 28 34
## [5,] 5 11 17 23 29 35
## [6,] 6 12 18 24 30 36
diag(x)
## [1] 1 8 15 22 29 36
matrix(rep(0,20),4,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
## [3,] 0 0 0 0 0
## [4,] 0 0 0 0 0
or
matrix(0,4,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
## [3,] 0 0 0 0 0
## [4,] 0 0 0 0 0
matrix(1,4,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 1 1 1
## [2,] 1 1 1 1 1
## [3,] 1 1 1 1 1
## [4,] 1 1 1 1 1
X[i,]
refers to the i
th row of matrix X
, X[,j]
refers to jth column, and X[i, j]
refers to the ij
th element, respectively. The subscripts i
and j
can be numeric vectors in order to select multiple rows or columns, as shown in the following listing. x <- matrix(1:10, nrow=2)
x
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
x[2,]
## [1] 2 4 6 8 10
x[,2]
## [1] 3 4
x[1,4]
## [1] 7
x[1, c(4,5)]
## [1] 7 9
x[-1, ]
will suppress the first row, while x[-c(1, 5), ]
will do the same for the 1st and 15th rows. For examplex <- matrix(1:20, nrow=5)
x
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
x[-c(1, 5), ]
## [,1] [,2] [,3] [,4]
## [1,] 2 7 12 17
## [2,] 3 8 13 18
## [3,] 4 9 14 19
x[-1,]
## [,1] [,2] [,3] [,4]
## [1,] 2 7 12 17
## [2,] 3 8 13 18
## [3,] 4 9 14 19
## [4,] 5 10 15 20
x[,-1]
## [,1] [,2] [,3]
## [1,] 6 11 16
## [2,] 7 12 17
## [3,] 8 13 18
## [4,] 9 14 19
## [5,] 10 15 20
x[,-c(1, 3)]
## [,1] [,2]
## [1,] 6 16
## [2,] 7 17
## [3,] 8 18
## [4,] 9 19
## [5,] 10 20
array(vector, dimensions, dimnames)
where vector contains the data for the array, dimensions is a numeric vector giving the maximal index for each dimension, and dimnames is an optional list of dimension labels. For example
array(1:24, dim = c(3, 4, 2))
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
dim1 <- c("A1", "A2")
dim2 <- c("B1", "B2", "B3")
dim3 <- c("C1", "C2", "C3", "C4")
z <- array(1:24, c(2, 3, 4), dimnames=list(dim1, dim2, dim3))
z
## , , C1
##
## B1 B2 B3
## A1 1 3 5
## A2 2 4 6
##
## , , C2
##
## B1 B2 B3
## A1 7 9 11
## A2 8 10 12
##
## , , C3
##
## B1 B2 B3
## A1 13 15 17
## A2 14 16 18
##
## , , C4
##
## B1 B2 B3
## A1 19 21 23
## A2 20 22 24
z[,,1];z[,,2];z[,,3];z[,,4]
## B1 B2 B3
## A1 1 3 5
## A2 2 4 6
## B1 B2 B3
## A1 7 9 11
## A2 8 10 12
## B1 B2 B3
## A1 13 15 17
## A2 14 16 18
## B1 B2 B3
## A1 19 21 23
## A2 20 22 24
z[1,,]
## C1 C2 C3 C4
## B1 1 7 13 19
## B2 3 9 15 21
## B3 5 11 17 23
z[,2,]
## C1 C2 C3 C4
## A1 3 9 15 21
## A2 4 10 16 22
z[1,2,3]
## [1] 15
data.frame()
function:mydata <- data.frame(col1, col2, col3,...)
where col1, col2, col3, … are column vectors of any type (such as character, numeric, or logical). Names for each column can be provided with the names function. The following listing makes this clear.
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata
patientID | age | diabetes | status |
---|---|---|---|
1 | 25 | Type1 | Poor |
2 | 34 | Type2 | Improved |
3 | 28 | Type1 | Excellent |
4 | 52 | Type1 | Poor |
NOTE: Each column must have only one mode, but you can put columns of different modes together to form the data frame.
patientdata[1:2,]
patientID | age | diabetes | status |
---|---|---|---|
1 | 25 | Type1 | Poor |
2 | 34 | Type2 | Improved |
patientdata[,1:2]
patientID | age |
---|---|
1 | 25 |
2 | 34 |
3 | 28 |
4 | 52 |
patientdata[,-1]
age | diabetes | status |
---|---|---|
25 | Type1 | Poor |
34 | Type2 | Improved |
28 | Type1 | Excellent |
52 | Type1 | Poor |
patientdata[,c("diabetes", "status")]
diabetes | status |
---|---|
Type1 | Poor |
Type2 | Improved |
Type1 | Excellent |
Type1 | Poor |
patientdata[,"age"]
## [1] 25 34 28 52
patientdata$age
## [1] 25 34 28 52
The $
is used to indicate a particular variable from a given data frame.
Diabetes (Type1, Type2)
is an example of a nominal variable.Status (poor, improved, excellent)
is a good example of an ordinal variable.Categorical (nominal) and ordered categorical (ordinal) variables in R are called factors.
The function factor()
stores the categorical values as a vector of integers in the range [1... k]
(where k
is the number of unique values in the nominal variable), and an internal vector of character strings (the original values) mapped to these integers.
For example, assume that you have the vector
diabetes <- c("Type1", "Type2", "Type1", "Type1")
diabetes <- factor(diabetes)
diabetes
## [1] Type1 Type2 Type1 Type1
## Levels: Type1 Type2
The statement diabetes <- factor(diabetes)
stores this vector as (1, 2, 1, 1)
and associates it with 1=Type1 and 2=Type2 internally (the assignment is alphabetical). Any analyses performed on the vector diabetes will treat the variable as nominal and select the statistical methods appropriate for this level of measurement.
ordered=TRUE
is required in the function factor()
. For examplestatus <- c("Poor", "Improved", "Excellent", "Poor")
status <- factor(status, ordered=TRUE)
status
## [1] Poor Improved Excellent Poor
## Levels: Excellent < Improved < Poor
the statement status <- factor(status, ordered=TRUE)
will encode the vector as (3, 2, 1, 3)
and associate these values internally as 1=Excellent, 2=Improved, and 3=Poor. Additionally, any analyses performed on this vector will treat the variable as ordinal and select the statistical methods appropriately.
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
diabetes <- factor(diabetes)
status <- factor(status, order=TRUE)
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata
patientID | age | diabetes | status |
---|---|---|---|
1 | 25 | Type1 | Poor |
2 | 34 | Type2 | Improved |
3 | 28 | Type1 | Excellent |
4 | 52 | Type1 | Poor |
str(patientdata)
## 'data.frame': 4 obs. of 4 variables:
## $ patientID: num 1 2 3 4
## $ age : num 25 34 28 52
## $ diabetes : Factor w/ 2 levels "Type1","Type2": 1 2 1 1
## $ status : Ord.factor w/ 3 levels "Excellent"<"Improved"<..: 3 2 1 3
summary(patientdata)
## patientID age diabetes status
## Min. :1.00 Min. :25.00 Type1:3 Excellent:1
## 1st Qu.:1.75 1st Qu.:27.25 Type2:1 Improved :1
## Median :2.50 Median :31.00 Poor :2
## Mean :2.50 Mean :34.75
## 3rd Qu.:3.25 3rd Qu.:38.50
## Max. :4.00 Max. :52.00
NOTE: The function str(object)
provides information on an object in R. The summary()
function treats the variables differently. It provides the minimum, maximum, mean, and quartiles for the continuous variable age, and frequency counts for the categorical variables diabetes and status.
list()
function:mylist <- list(object1, object2, ...)
where the objects are any of the structures seen so far. Optionally, you can name the objects in a list:
mylist <- list(name1=object1, name2=object2,...)
The following listing shows an example.
g <- "My First List"
h <- c(25, 26, 18, 39)
j <- matrix(1:10, nrow=5)
k <- c("one", "two", "three")
mylist <- list(title=g, ages=h, j, k)
mylist
## $title
## [1] "My First List"
##
## $ages
## [1] 25 26 18 39
##
## [[3]]
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
##
## [[4]]
## [1] "one" "two" "three"
mylist[[1]]
## [1] "My First List"
mylist[['title']]
## [1] "My First List"
mylist$title
## [1] "My First List"
mylist[[2]]
## [1] 25 26 18 39
mylist[['ages']]
## [1] 25 26 18 39
mylist$ages
## [1] 25 26 18 39
mylist[[3]]
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
mylist[[4]]
## [1] "one" "two" "three"
mymatrix=matrix(c(1,3,5,7,9,11,13,15,17),3,3)
mymatrix
## [,1] [,2] [,3]
## [1,] 1 7 13
## [2,] 3 9 15
## [3,] 5 11 17
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata
patientID | age | diabetes | status |
---|---|---|---|
1 | 25 | Type1 | Poor |
2 | 34 | Type2 | Improved |
3 | 28 | Type1 | Excellent |
4 | 52 | Type1 | Poor |
read.table()
, a function that reads a file in table format and saves it as a data frame. Here’s the syntax:mydataframe <- read.table(file, header=logical_value, sep="delimiter"...)
where file is a delimited ASCII file , header is a logical value indicating whether the first row contains variable names (TRUE or FALSE). sep specifies the delimiter separating data values. For example
grades <- read.table("studentgrades.txt", header=TRUE, sep=",")
grades
StudentID | First | Last | Math | Science | Social.Studies |
---|---|---|---|---|---|
11 | Bob | Smith | 90 | 80 | 67 |
12 | Jane | Weary | 75 | NA | 80 |
10 | Dan | Thornton | 65 | 75 | 70 |
40 | Mary | O’Leary | 90 | 95 | 92 |
read.table
are useful, for example, read.csv
.grades <- read.csv("studentgrades.csv", header=TRUE)##Usefull
grades
StudentID | First | Last | Math | Science | Social.Studies |
---|---|---|---|---|---|
11 | Bob | Smith | 90 | 80 | 67 |
12 | Jane | Weary | 75 | NA | 80 |
10 | Dan | Thornton | 65 | 75 | 70 |
40 | Mary | O’Leary | 90 | 95 | 92 |
Excel 2007 uses an XLSX file format, which is essentially a zipped set of XML files. The XLSX package can be used to access spreadsheets in this format. Be sure to download and install it before first use. The read.xlsx()
function imports a worksheet from an XLSX file into a data frame. The simplest format is read.xlsx(file, n)
where file is the path to an Excel 2007 workbook and n is the number of the worksheet to be imported. For example,
#install.packages('xlsx')
library(xlsx)
mydataframe <- read.xlsx("studentgrades.xlsx", 1)
mydataframe
StudentID | First | Last | Math | Science | Social.Studies | NA. |
---|---|---|---|---|---|---|
11 | Bob | Smith | 90 | 80 | 67 | NA |
12 | Jane | Weary | 75 | NA | 80 | NA |
10 | Dan | Thornton | 65 | 75 | 70 | NA |
40 | Mary | O’Leary | 90 | 95 | 92 | NA |
Install following packages sequentially
```r
install.packages("rJava")
install.packages("xlsxjars")
install.packages("xlsx")
library(rJava)
library(xlsxjars)
library(xlsx)
```
Remember to setup environment variables. On cmd command line, type
set JAVA_HOME=C:\Program Files\Java\jdk1.8.0_171 Path=C:\Program Files\Java\jdk1.8.0_211\bin
Or setup environment variables manually by adding path C:\Program Files\Java\jdk1.8.0_211\bin
Before using RODBC, it is required to configure the ODBC connection. After configuring R to communicate with your database, it is ready to use RODBC.
Installing the R package RODBC. A quick way to install the RODBC package is to use the function install.packages
:
install.packages("RODBC")
library(RODBC)
help(package="RODBC")
odbcConnect()
.close()
.odbcConnect()
:odbcConnect(dsn, uid = "", pwd = "", ...)
For example:
library(RODBC)
con<-odbcConnect('mydata',uid="root",pwd='lx5609')
sqlTables(con) # List the table-like objects accessible from an ODBC connection.
TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS |
---|---|---|---|---|
mysql | bmw_stock | TABLE | ||
mysql | columns_priv | TABLE | Column privileges | |
mysql | db | TABLE | Database privileges | |
mysql | engine_cost | TABLE | ||
mysql | event | TABLE | Events | |
mysql | func | TABLE | User defined functions | |
mysql | general_log | TABLE | General log | |
mysql | gtid_executed | TABLE | ||
mysql | help_category | TABLE | help categories | |
mysql | help_keyword | TABLE | help keywords | |
mysql | help_relation | TABLE | keyword-topic relation | |
mysql | help_topic | TABLE | help topics | |
mysql | innodb_index_stats | TABLE | ||
mysql | innodb_table_stats | TABLE | ||
mysql | ndb_binlog_index | TABLE | ||
mysql | patient | TABLE | ||
mysql | patientdata | TABLE | ||
mysql | plugin | TABLE | MySQL plugins | |
mysql | proc | TABLE | Stored Procedures | |
mysql | procs_priv | TABLE | Procedure privileges | |
mysql | proxies_priv | TABLE | User proxy privileges | |
mysql | server_cost | TABLE | ||
mysql | servers | TABLE | MySQL Foreign Servers table | |
mysql | slave_master_info | TABLE | Master Information | |
mysql | slave_relay_log_info | TABLE | Relay Log Information | |
mysql | slave_worker_info | TABLE | Worker Information | |
mysql | slow_log | TABLE | Slow log | |
mysql | tables_priv | TABLE | Table privileges | |
mysql | time_zone | TABLE | Time zones | |
mysql | time_zone_leap_second | TABLE | Leap seconds information for time zones | |
mysql | time_zone_name | TABLE | Time zone names | |
mysql | time_zone_transition | TABLE | Time zone transitions | |
mysql | time_zone_transition_type | TABLE | Time zone transition types | |
mysql | usarrests | TABLE | ||
mysql | user | TABLE | Users and global privileges |
odbcGetInfo(con) #Request information on an ODBC connection.
## DBMS_Name DBMS_Ver Driver_ODBC_Ver
## "MySQL" "5.7.26" "03.51"
## Data_Source_Name Driver_Name Driver_Ver
## "mydata" "myodbc5a.dll" "05.02.0007"
## ODBC_Ver Server_Name
## "03.80.0000" "127.0.0.1 via TCP/IP"
sqlSave
. For example, write the data in R into MySQL. data(USArrests)
head(USArrests)
Murder | Assault | UrbanPop | Rape | |
---|---|---|---|---|
Alabama | 13.2 | 236 | 58 | 21.2 |
Alaska | 10.0 | 263 | 48 | 44.5 |
Arizona | 8.1 | 294 | 80 | 31.0 |
Arkansas | 8.8 | 190 | 50 | 19.5 |
California | 9.0 | 276 | 91 | 40.6 |
Colorado | 7.9 | 204 | 78 | 38.7 |
sqlDrop(con,"USArrests") # Delete the USArrests data from MySQL.
sqlSave(con,USArrests, rownames = "state", addPK = TRUE)
rm(USArrests)
sqlFetch()
. For example: result = sqlFetch( con,'USArrests' )
head(result)
state | murder | assault | urbanpop | rape |
---|---|---|---|---|
Alabama | 13.2 | 236 | 58 | 21.2 |
Alaska | 10.0 | 263 | 48 | 44.5 |
Arizona | 8.1 | 294 | 80 | 31.0 |
Arkansas | 8.8 | 190 | 50 | 19.5 |
California | 9.0 | 276 | 91 | 40.6 |
Colorado | 7.9 | 204 | 78 | 38.7 |
close( con )
help(package="RMySQL")
Or get the detailed usage from RMySQL index
Use the function dbConnect
to establish a connection. For example
#install.packages("RMySQL")
library(RMySQL)
## Loading required package: DBI
#con <- dbConnect(RMySQL::MySQL(), dbname = "mydata", username="root", password="lx5609",
# host="127.0.0.1", port=3306)
con <- dbConnect(MySQL(), dbname = "mysql", username="root", password="lx5609")
dbGetInfo(con)
## $host
## [1] "localhost"
##
## $user
## [1] "root"
##
## $dbname
## [1] "mysql"
##
## $conType
## [1] "localhost via TCP/IP"
##
## $serverVersion
## [1] "5.7.26"
##
## $protocolVersion
## [1] 10
##
## $threadId
## [1] 194
##
## $rsId
## list()
dbListTables(con)
## [1] "bmw_stock" "columns_priv"
## [3] "db" "engine_cost"
## [5] "event" "func"
## [7] "general_log" "gtid_executed"
## [9] "help_category" "help_keyword"
## [11] "help_relation" "help_topic"
## [13] "innodb_index_stats" "innodb_table_stats"
## [15] "ndb_binlog_index" "patient"
## [17] "patientdata" "plugin"
## [19] "proc" "procs_priv"
## [21] "proxies_priv" "server_cost"
## [23] "servers" "slave_master_info"
## [25] "slave_relay_log_info" "slave_worker_info"
## [27] "slow_log" "tables_priv"
## [29] "time_zone" "time_zone_leap_second"
## [31] "time_zone_name" "time_zone_transition"
## [33] "time_zone_transition_type" "usarrests"
## [35] "user"
Use the function dbWriteTable
to write or update a table. For example, write the BMW stock data into MySQL.
#install.packages('quantmod')
library(quantmod)
bmw_stock = new.env()
getSymbols('BMW.DE', env = bmw_stock, src = 'yahoo',
from = as.Date("2016-01-01"), to = as.Date("2018-12-31"))
BMW = bmw_stock$BMW.DE
write.csv(BMW,'BMW_stock.csv',row.names = index(BMW))
BMW_STOCK <- read.csv("BMW_stock.csv", head = T)
head(BMW_STOCK)
Date | BMW.DE.Open | BMW.DE.High | BMW.DE.Low | BMW.DE.Close | BMW.DE.Volume | BMW.DE.Adjusted |
---|---|---|---|---|---|---|
2016/1/4 | 94.71 | 94.71 | 91.76 | 92.25 | 2376296 | 77.20447 |
2016/1/5 | 93.42 | 93.50 | 90.63 | 91.82 | 1533191 | 76.84460 |
2016/1/6 | 91.00 | 91.00 | 87.14 | 88.78 | 2301978 | 74.30040 |
2016/1/7 | 85.30 | 86.00 | 83.52 | 85.44 | 2812427 | 71.50513 |
2016/1/8 | 86.16 | 87.14 | 83.41 | 83.44 | 1969156 | 69.83134 |
2016/1/11 | 83.11 | 85.59 | 82.88 | 83.14 | 1832479 | 69.58026 |
dbRemoveTable(con, "bmw_stock")#Delete the data from MySQL.
## [1] TRUE
dbWriteTable(con,"BMW_STOCK",BMW_STOCK)
## [1] TRUE
dbListTables(con)
## [1] "bmw_stock" "columns_priv"
## [3] "db" "engine_cost"
## [5] "event" "func"
## [7] "general_log" "gtid_executed"
## [9] "help_category" "help_keyword"
## [11] "help_relation" "help_topic"
## [13] "innodb_index_stats" "innodb_table_stats"
## [15] "ndb_binlog_index" "patient"
## [17] "patientdata" "plugin"
## [19] "proc" "procs_priv"
## [21] "proxies_priv" "server_cost"
## [23] "servers" "slave_master_info"
## [25] "slave_relay_log_info" "slave_worker_info"
## [27] "slow_log" "tables_priv"
## [29] "time_zone" "time_zone_leap_second"
## [31] "time_zone_name" "time_zone_transition"
## [33] "time_zone_transition_type" "usarrests"
## [35] "user"
If there is something wrong, please see this website.
Weite the BMW_stock.xlsx data into MySQL.
BMW_STOCK <- read.xlsx("BMW_stock.xlsx", sheetIndex = 1, head = T,colIndex = 1:7)
head(BMW_STOCK)
Date | BMW.DE.Open | BMW.DE.High | BMW.DE.Low | BMW.DE.Close | BMW.DE.Volume | BMW.DE.Adjusted |
---|---|---|---|---|---|---|
2016-01-04 | 94.71 | 94.71 | 91.76 | 92.25 | 2376296 | 77.20447 |
2016-01-05 | 93.42 | 93.50 | 90.63 | 91.82 | 1533191 | 76.84460 |
2016-01-06 | 91.00 | 91.00 | 87.14 | 88.78 | 2301978 | 74.30040 |
2016-01-07 | 85.30 | 86.00 | 83.52 | 85.44 | 2812427 | 71.50513 |
2016-01-08 | 86.16 | 87.14 | 83.41 | 83.44 | 1969156 | 69.83134 |
2016-01-11 | 83.11 | 85.59 | 82.88 | 83.14 | 1832479 | 69.58026 |
dbRemoveTable(con, "bmw_stock")#Delete the data from MySQL.
## [1] TRUE
dbWriteTable(con,"BMW_STOCK",BMW_STOCK)
## [1] TRUE
dbListTables(con)
## [1] "bmw_stock" "columns_priv"
## [3] "db" "engine_cost"
## [5] "event" "func"
## [7] "general_log" "gtid_executed"
## [9] "help_category" "help_keyword"
## [11] "help_relation" "help_topic"
## [13] "innodb_index_stats" "innodb_table_stats"
## [15] "ndb_binlog_index" "patient"
## [17] "patientdata" "plugin"
## [19] "proc" "procs_priv"
## [21] "proxies_priv" "server_cost"
## [23] "servers" "slave_master_info"
## [25] "slave_relay_log_info" "slave_worker_info"
## [27] "slow_log" "tables_priv"
## [29] "time_zone" "time_zone_leap_second"
## [31] "time_zone_name" "time_zone_transition"
## [33] "time_zone_transition_type" "usarrests"
## [35] "user"
dbReadTable
function. For example: result = dbReadTable(con,"BMW_STOCK")
head(result)
Date | BMW.DE.Open | BMW.DE.High | BMW.DE.Low | BMW.DE.Close | BMW.DE.Volume | BMW.DE.Adjusted |
---|---|---|---|---|---|---|
2016-01-04 | 94.71 | 94.71 | 91.76 | 92.25 | 2376296 | 77.20447 |
2016-01-05 | 93.42 | 93.50 | 90.63 | 91.82 | 1533191 | 76.84460 |
2016-01-06 | 91.00 | 91.00 | 87.14 | 88.78 | 2301978 | 74.30040 |
2016-01-07 | 85.30 | 86.00 | 83.52 | 85.44 | 2812427 | 71.50513 |
2016-01-08 | 86.16 | 87.14 | 83.41 | 83.44 | 1969156 | 69.83134 |
2016-01-11 | 83.11 | 85.59 | 82.88 | 83.14 | 1832479 | 69.58026 |
Remember to disconnect con
dbDisconnect(con)
## [1] TRUE
data()
data(infert)
head(infert)
education | age | parity | induced | case | spontaneous | stratum | pooled.stratum |
---|---|---|---|---|---|---|---|
0-5yrs | 26 | 6 | 1 | 1 | 2 | 1 | 3 |
0-5yrs | 42 | 1 | 1 | 1 | 0 | 2 | 1 |
0-5yrs | 39 | 6 | 2 | 1 | 0 | 3 | 4 |
0-5yrs | 34 | 4 | 2 | 1 | 0 | 4 | 2 |
6-11yrs | 35 | 3 | 1 | 1 | 1 | 5 | 32 |
6-11yrs | 36 | 4 | 2 | 1 | 1 | 6 | 36 |
str(infert)
## 'data.frame': 248 obs. of 8 variables:
## $ education : Factor w/ 3 levels "0-5yrs","6-11yrs",..: 1 1 1 1 2 2 2 2 2 2 ...
## $ age : num 26 42 39 34 35 36 23 32 21 28 ...
## $ parity : num 6 1 6 4 3 4 1 2 1 2 ...
## $ induced : num 1 1 2 2 1 2 0 0 0 0 ...
## $ case : num 1 1 1 1 1 1 1 1 1 1 ...
## $ spontaneous : num 2 0 0 0 1 1 0 0 1 0 ...
## $ stratum : int 1 2 3 4 5 6 7 8 9 10 ...
## $ pooled.stratum: num 3 1 4 2 32 36 6 22 5 19 ...
data()
. For example#install.packages('rpart')
#data(package="rpart")#To see the list of datasets in R package rpart.
data(car90, package="rpart")
car90
data(car90, package="rpart")
dim(car90)
## [1] 111 34
car90[1:15,1:8]
Country | Disp | Disp2 | Eng.Rev | Front.Hd | Frt.Leg.Room | Frt.Shld | Gear.Ratio | |
---|---|---|---|---|---|---|---|---|
Acura Integra | Japan | 112 | 1.8 | 2935 | 3.5 | 41.5 | 53.0 | 3.26 |
Acura Legend | Japan | 163 | 2.7 | 2505 | 2.0 | 41.5 | 55.5 | 2.95 |
Audi 100 | Germany | 141 | 2.3 | 2775 | 2.5 | 41.5 | 56.5 | 3.27 |
Audi 80 | Germany | 121 | 2.0 | 2835 | 4.0 | 42.0 | 52.5 | 3.25 |
BMW 325i | Germany | 152 | 2.5 | 2625 | 2.0 | 42.0 | 52.0 | 3.02 |
BMW 535i | Germany | 209 | 3.5 | 2285 | 3.0 | 42.0 | 54.5 | 2.80 |
Buick Century | USA | 151 | 2.5 | NA | 4.0 | 42.0 | 56.5 | NA |
Buick Electra | USA | 231 | 3.8 | NA | 6.0 | 42.0 | 58.5 | NA |
Buick Le Sabre | USA | 231 | 3.8 | NA | 5.0 | 41.0 | 59.0 | NA |
Buick Regal | NA | 189 | 3.1 | NA | 5.5 | 41.0 | 58.0 | NA |
Buick Riviera | USA | 231 | 3.8 | NA | 3.5 | 41.5 | 58.0 | NA |
Cadillac Brougham | USA | 307 | 5.0 | NA | 5.0 | 43.0 | 60.5 | NA |
Cadillac De Ville | USA | 273 | 4.5 | NA | 6.0 | 42.0 | 59.5 | NA |
Cadillac Eldorado | USA | 273 | 4.5 | NA | 3.0 | 42.0 | 58.0 | NA |
Cadillac Seville | NA | NA | NA | NA | 3.0 | 42.0 | 57.5 | NA |
Table Useful functions for working with data objects
Function | Purpose |
---|---|
length(object) | Number of elements/components. |
dim(object) | Dimensions of an object. |
str(object) | Structure of an object. |
class(object) | Class or type of an object. |
mode(object) | How an object is stored. |
names(object) | Names of components in an object. |
c(object, object,…) | Combines objects into a vector. |
cbind(object, object, …) | Combines objects as columns. |
rbind(object, object, …) | Combines objects as rows. |
object | Prints the object. |
head(object) | Lists the first part of the object. |
tail(object) | Lists the last part of the object. |
hist(x) | histogram of the frequencies of x. |
barplot(x) | histogram of the values of x. |
boxplot(x) | “box-and-whiskers”" plot. |
mean(x) | Mean of x. |
median(x) | Median of x. |
sd(x) | Standard deviation of x. |
var(x) | Variance or covariance of x. |
quantile(x, probs = τ) | The τth quantile of x. |
range(x) | Range of x. |
list() | Construct, coerce and check for both kinds of R lists. |
sum(x) | Sum of x. |
diff(x, lag = n) | Lagged Differences. |
min(x) | Maximum of x. |
max(x) | Minimum of x. |
scale(x, center = TURE, scale = TRUE) | Scaling and centering of matrix-like objects x. |
ls() | List objects. |
rm(object, object, …) | Deletes one or more objects. The statement rm(list = ls()) will remove most objects from the working environment. |
For example
x1 <- matrix(rnorm(40), 10, 4)
x1
## [,1] [,2] [,3] [,4]
## [1,] 0.4131018 -2.8646636 0.01274560 2.68080296
## [2,] 1.6479966 -0.4008321 -0.32436041 0.75550749
## [3,] 0.5800363 0.4664694 1.58975393 0.82006017
## [4,] 2.4732208 -0.9401913 1.06583860 -2.31966693
## [5,] -0.8146677 0.4089989 0.30808113 1.58804158
## [6,] 0.7027294 1.9608918 0.07449545 -0.78445920
## [7,] -1.1837963 1.4603943 1.67093577 0.04816794
## [8,] -0.6992176 1.3177634 0.08922955 -0.15467310
## [9,] -1.4003259 0.5958976 -0.56039363 0.17799311
## [10,] 0.1367936 1.6772493 0.40355336 2.59258884
x2 <- matrix(rnorm(40), 10, 4)
x2
## [,1] [,2] [,3] [,4]
## [1,] 1.080078810 1.78448289 -0.18895411 -0.46901667
## [2,] 0.782677273 -0.02549573 -0.45202248 -0.01574861
## [3,] 0.006482925 0.13343757 1.51852686 -0.43071095
## [4,] 0.719970749 0.31709912 -0.91860203 -0.37460270
## [5,] -0.830066950 -0.65177550 0.11903770 -0.14453095
## [6,] 0.260424362 0.58089884 2.23171698 0.06341242
## [7,] -1.571724660 3.01377261 0.60592513 0.08943512
## [8,] -1.323866284 -0.76753685 -0.23633026 -0.62377987
## [9,] 0.517493301 0.17316816 0.31482968 0.81807969
## [10,] 0.560142214 -0.38316495 -0.08698075 0.48187483
length(x1)
## [1] 40
length(x1[1,])
## [1] 4
dim(x1);dim(x2)
## [1] 10 4
## [1] 10 4
class(x1)
## [1] "matrix"
mode(x1)
## [1] "numeric"
c(x1[1,],x2[,2])
## [1] 0.41310176 -2.86466361 0.01274560 2.68080296 1.78448289
## [6] -0.02549573 0.13343757 0.31709912 -0.65177550 0.58089884
## [11] 3.01377261 -0.76753685 0.17316816 -0.38316495
cbind(x1,x2)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.4131018 -2.8646636 0.01274560 2.68080296 1.080078810
## [2,] 1.6479966 -0.4008321 -0.32436041 0.75550749 0.782677273
## [3,] 0.5800363 0.4664694 1.58975393 0.82006017 0.006482925
## [4,] 2.4732208 -0.9401913 1.06583860 -2.31966693 0.719970749
## [5,] -0.8146677 0.4089989 0.30808113 1.58804158 -0.830066950
## [6,] 0.7027294 1.9608918 0.07449545 -0.78445920 0.260424362
## [7,] -1.1837963 1.4603943 1.67093577 0.04816794 -1.571724660
## [8,] -0.6992176 1.3177634 0.08922955 -0.15467310 -1.323866284
## [9,] -1.4003259 0.5958976 -0.56039363 0.17799311 0.517493301
## [10,] 0.1367936 1.6772493 0.40355336 2.59258884 0.560142214
## [,6] [,7] [,8]
## [1,] 1.78448289 -0.18895411 -0.46901667
## [2,] -0.02549573 -0.45202248 -0.01574861
## [3,] 0.13343757 1.51852686 -0.43071095
## [4,] 0.31709912 -0.91860203 -0.37460270
## [5,] -0.65177550 0.11903770 -0.14453095
## [6,] 0.58089884 2.23171698 0.06341242
## [7,] 3.01377261 0.60592513 0.08943512
## [8,] -0.76753685 -0.23633026 -0.62377987
## [9,] 0.17316816 0.31482968 0.81807969
## [10,] -0.38316495 -0.08698075 0.48187483
rbind(x1,x2)
## [,1] [,2] [,3] [,4]
## [1,] 0.413101757 -2.86466361 0.01274560 2.68080296
## [2,] 1.647996617 -0.40083208 -0.32436041 0.75550749
## [3,] 0.580036319 0.46646936 1.58975393 0.82006017
## [4,] 2.473220803 -0.94019133 1.06583860 -2.31966693
## [5,] -0.814667671 0.40899895 0.30808113 1.58804158
## [6,] 0.702729374 1.96089178 0.07449545 -0.78445920
## [7,] -1.183796316 1.46039430 1.67093577 0.04816794
## [8,] -0.699217572 1.31776344 0.08922955 -0.15467310
## [9,] -1.400325945 0.59589760 -0.56039363 0.17799311
## [10,] 0.136793553 1.67724930 0.40355336 2.59258884
## [11,] 1.080078810 1.78448289 -0.18895411 -0.46901667
## [12,] 0.782677273 -0.02549573 -0.45202248 -0.01574861
## [13,] 0.006482925 0.13343757 1.51852686 -0.43071095
## [14,] 0.719970749 0.31709912 -0.91860203 -0.37460270
## [15,] -0.830066950 -0.65177550 0.11903770 -0.14453095
## [16,] 0.260424362 0.58089884 2.23171698 0.06341242
## [17,] -1.571724660 3.01377261 0.60592513 0.08943512
## [18,] -1.323866284 -0.76753685 -0.23633026 -0.62377987
## [19,] 0.517493301 0.17316816 0.31482968 0.81807969
## [20,] 0.560142214 -0.38316495 -0.08698075 0.48187483
head(x1)
## [,1] [,2] [,3] [,4]
## [1,] 0.4131018 -2.8646636 0.01274560 2.6808030
## [2,] 1.6479966 -0.4008321 -0.32436041 0.7555075
## [3,] 0.5800363 0.4664694 1.58975393 0.8200602
## [4,] 2.4732208 -0.9401913 1.06583860 -2.3196669
## [5,] -0.8146677 0.4089989 0.30808113 1.5880416
## [6,] 0.7027294 1.9608918 0.07449545 -0.7844592
tail(x2)
## [,1] [,2] [,3] [,4]
## [5,] -0.8300669 -0.6517755 0.11903770 -0.14453095
## [6,] 0.2604244 0.5808988 2.23171698 0.06341242
## [7,] -1.5717247 3.0137726 0.60592513 0.08943512
## [8,] -1.3238663 -0.7675369 -0.23633026 -0.62377987
## [9,] 0.5174933 0.1731682 0.31482968 0.81807969
## [10,] 0.5601422 -0.3831650 -0.08698075 0.48187483
x3 <- rnorm(1000)
hist(x3)
barplot(x3)
boxplot(x3)
mean(x3)
## [1] 0.007581723
median(x3)
## [1] -0.006744484
sd(x3)
## [1] 0.9926357
var(x3)
## [1] 0.9853256
quantile(x3, 0.3)
## 30%
## -0.4686491
quantile(x3, 0.7)
## 70%
## 0.5246369
range(x3)
## [1] -3.358779 3.522037
sum(x3)
## [1] 7.581723
min(x3)
## [1] -3.358779
max(x3)
## [1] 3.522037
ls()
## [1] "a" "age" "b" "BMW_STOCK" "c"
## [6] "car90" "cells" "cnames" "con" "diabetes"
## [11] "dim1" "dim2" "dim3" "e" "f"
## [16] "g" "grades" "h" "infert" "j"
## [21] "k" "mydataframe" "mylist" "mymatrix" "patientdata"
## [26] "patientID" "result" "rnames" "status" "x"
## [31] "x1" "x2" "x3" "y" "z"
rm(list=ls())
ls()
## character(0)