This tutorial will provide you Hands-On practice in R programming. This tutorial assumes that you have basic familiarity with programming in any other language. Little theory is provided but if you have any queries then use ?functionName for details.
# install.packages("ggplot2")
# install.packages("coefplot")
I’ve commented the instructions since I already have them installed.
library(ggplot2)
require(coefplot)
## Loading required package: coefplot
require(useful)
## Loading required package: useful
ggplot
## function (data = NULL, ...)
## UseMethod("ggplot")
## <environment: namespace:ggplot2>
1 + 1
## [1] 2
2 * 3
## [1] 6
3 / 4
## [1] 0.75
3 %% 2
## [1] 1
5 - 3
## [1] 2
8 / (3 + 4)
## [1] 1.142857
2 ** 3
## [1] 8
1 + 2i
## [1] 1+2i
(1 + 2i) - (3 + 4i)
## [1] -2-2i
R follows PEMDAS rules. Maths students should know that.
x <- 2
x + 1
## [1] 3
y <- 3
x * y
## [1] 6
a <- b <- 5
a
## [1] 5
b
## [1] 5
assign(x = "var", value = 10)
var
## [1] 10
rm(a)
rm(list = ls())
Using = for assignment isn’t recommended in R community, although it works equally well in most situations.
a = 23
R is case-sensitive
hero <- 1
# Hero
# Error : object 'X' not found
rm(list = ls())
R is dynamically-typed.
x <- 2
class(x)
## [1] "numeric"
is.numeric(x)
## [1] TRUE
i <- 5L
class(i)
## [1] "integer"
is.integer(i)
## [1] TRUE
is.numeric(i)
## [1] TRUE
class(4L)
## [1] "integer"
4L * 2.8
## [1] 11.2
class(4L * 2.8)
## [1] "numeric"
5L / 2L
## [1] 2.5
class(5L / 2L)
## [1] "numeric"
x <- "data"
x
## [1] "data"
class(x)
## [1] "character"
y <- factor("data")
y
## [1] data
## Levels: data
class(y)
## [1] "factor"
nchar(x)
## [1] 4
nchar("hello")
## [1] 5
nchar(3)
## [1] 1
nchar(452)
## [1] 3
Note: nchar() does not work with factor
date1 <- as.Date("2012-06-23")
date1
## [1] "2012-06-23"
class(date1)
## [1] "Date"
as.numeric(date1)
## [1] 15514
date2 <- as.POSIXct(x = "2012-06-23 17:32")
date2
## [1] "2012-06-23 17:32:00 IST"
class(date2)
## [1] "POSIXct" "POSIXt"
as.numeric(date2)
## [1] 1340452920
TRUE
## [1] TRUE
FALSE
## [1] FALSE
TRUE * 5
## [1] 5
FALSE * 5
## [1] 0
as.numeric(TRUE)
## [1] 1
as.numeric(FALSE)
## [1] 0
class(TRUE)
## [1] "logical"
is.logical(TRUE)
## [1] TRUE
T
## [1] TRUE
F
## [1] FALSE
T <- 1
T <- 100
T
## [1] 100
class(T)
## [1] "numeric"
T <- TRUE
T
## [1] TRUE
2 == 2
## [1] TRUE
2 == 3
## [1] FALSE
2 != 3
## [1] TRUE
2 < 3
## [1] TRUE
2 > 3
## [1] FALSE
2 <= 3
## [1] TRUE
2 >= 3
## [1] FALSE
"data" == "data"
## [1] TRUE
"data" == "Data"
## [1] FALSE
"data" < "Data"
## [1] TRUE
rm(list = ls())
x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
x * 3
## [1] 3 6 9 12 15 18 21 24 27 30
x + 2
## [1] 3 4 5 6 7 8 9 10 11 12
x - 4
## [1] -3 -2 -1 0 1 2 3 4 5 6
x / 8
## [1] 0.125 0.250 0.375 0.500 0.625 0.750 0.875 1.000 1.125 1.250
x ** 6
## [1] 1 64 729 4096 15625 46656 117649 262144
## [9] 531441 1000000
sqrt(x)
## [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
## [8] 2.828427 3.000000 3.162278
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
10:1
## [1] 10 9 8 7 6 5 4 3 2 1
-2:5
## [1] -2 -1 0 1 2 3 4 5
5:-6
## [1] 5 4 3 2 1 0 -1 -2 -3 -4 -5 -6
x <- 1:10
y <- -5:4
x + y
## [1] -4 -2 0 2 4 6 8 10 12 14
x - y
## [1] 6 6 6 6 6 6 6 6 6 6
x * y
## [1] -5 -8 -9 -8 -5 0 7 16 27 40
x / y
## [1] -0.2 -0.5 -1.0 -2.0 -5.0 Inf 7.0 4.0 3.0 2.5
x ** y
## [1] 1.000000e+00 6.250000e-02 3.703704e-02 6.250000e-02 2.000000e-01
## [6] 1.000000e+00 7.000000e+00 6.400000e+01 7.290000e+02 1.000000e+04
x ^ y
## [1] 1.000000e+00 6.250000e-02 3.703704e-02 6.250000e-02 2.000000e-01
## [6] 1.000000e+00 7.000000e+00 6.400000e+01 7.290000e+02 1.000000e+04
length(x)
## [1] 10
length(y)
## [1] 10
length(x + y)
## [1] 10
x + c(1, 3)
## [1] 2 5 4 7 6 9 8 11 10 13
x + c(1, 3, 5)
## Warning in x + c(1, 3, 5): longer object length is not a multiple of
## shorter object length
## [1] 2 5 8 5 8 11 8 11 14 11
x <= 4
## [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
x[x <= 4]
## [1] 1 2 3 4
x > y
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
y > x
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
x <- 10:1
y <- -4:5
x < y
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
any(x < y)
## [1] TRUE
all(x < y)
## [1] FALSE
sports <- c("Hockey", "Basketball", "Football", "Cricket", "Badminton", "Table Tennis", "Rugby", "Baseball", "Lawn Tennis", "Soccer")
sports
## [1] "Hockey" "Basketball" "Football" "Cricket"
## [5] "Badminton" "Table Tennis" "Rugby" "Baseball"
## [9] "Lawn Tennis" "Soccer"
nchar(sports)
## [1] 6 10 8 7 9 12 5 8 11 6
number <- 7
number
## [1] 7
x
## [1] 10 9 8 7 6 5 4 3 2 1
x[1]
## [1] 10
x[c(1, 2)]
## [1] 10 9
x[1:2]
## [1] 10 9
x[c(1, 3, 5, 9)]
## [1] 10 8 6 2
a <- c(One = "a", Two = "y", Three = "r")
names(a)
## [1] "One" "Two" "Three"
w <- 1:3
names(w)
## NULL
names(w) <- c("One", "Two", "Three")
w
## One Two Three
## 1 2 3
names(w)
## [1] "One" "Two" "Three"
sports2 <- c(sports, "Hockey", "Badminton", "Cricket", "Football", "Hockey", "Water Polo")
sports2
## [1] "Hockey" "Basketball" "Football" "Cricket"
## [5] "Badminton" "Table Tennis" "Rugby" "Baseball"
## [9] "Lawn Tennis" "Soccer" "Hockey" "Badminton"
## [13] "Cricket" "Football" "Hockey" "Water Polo"
sports2.factor <- factor(sports2)
sports2.factor
## [1] Hockey Basketball Football Cricket Badminton
## [6] Table Tennis Rugby Baseball Lawn Tennis Soccer
## [11] Hockey Badminton Cricket Football Hockey
## [16] Water Polo
## 11 Levels: Badminton Baseball Basketball Cricket Football ... Water Polo
table(sports2.factor)
## sports2.factor
## Badminton Baseball Basketball Cricket Football
## 2 1 1 2 2
## Hockey Lawn Tennis Rugby Soccer Table Tennis
## 3 1 1 1 1
## Water Polo
## 1
class(sports2.factor)
## [1] "factor"
as.numeric(sports2.factor)
## [1] 6 3 5 4 1 10 8 2 7 9 6 1 4 5 6 11
rm(list = ls())
z <- c(1, 2, 3, NA, 8, 9, 10, NA)
z
## [1] 1 2 3 NA 8 9 10 NA
is.na(z)
## [1] FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
z[!is.na(z)]
## [1] 1 2 3 8 9 10
z.character <- c("Hockey", NA, "Cricket")
is.na(z.character)
## [1] FALSE TRUE FALSE
z <- c(1, NULL, 3)
z
## [1] 1 3
d <- NULL
is.null(d)
## [1] TRUE
is.null(z)
## [1] FALSE
rm(list = ls())
x <- 1:10
mean(x)
## [1] 5.5
sum(x)
## [1] 55
nchar(x)
## [1] 1 1 1 1 1 1 1 1 1 2
x <- 1:1000
x[200:300] <- NA
mean(x)
## [1] NA
mean(x = x, na.rm = TRUE)
## [1] 528.6429
mean(x = x, trim = 0.1 , na.rm = TRUE)
## [1] 535.5908
x <- 10:1
y <- -4:5
q <- c("Hockey", "Basketball", "Cricket", "Billiards", "Chess", "Table Tennis", "Rugby", "Water Polo", "Lawn Tennis", "Football")
df <- data.frame(x, y, q)
df
## x y q
## 1 10 -4 Hockey
## 2 9 -3 Basketball
## 3 8 -2 Cricket
## 4 7 -1 Billiards
## 5 6 0 Chess
## 6 5 1 Table Tennis
## 7 4 2 Rugby
## 8 3 3 Water Polo
## 9 2 4 Lawn Tennis
## 10 1 5 Football
df <- data.frame(First = x, Second = y, Sports = q)
df
## First Second Sports
## 1 10 -4 Hockey
## 2 9 -3 Basketball
## 3 8 -2 Cricket
## 4 7 -1 Billiards
## 5 6 0 Chess
## 6 5 1 Table Tennis
## 7 4 2 Rugby
## 8 3 3 Water Polo
## 9 2 4 Lawn Tennis
## 10 1 5 Football
class(df$Sports)
## [1] "factor"
df <- data.frame(First = x, Second = y, Sports = q, stringsAsFactors = FALSE)
class(df$Sports)
## [1] "character"
nrow(df)
## [1] 10
ncol(df)
## [1] 3
dim(df)
## [1] 10 3
NROW(x = df)
## [1] 10
NCOL(x = df)
## [1] 3
x
## [1] 10 9 8 7 6 5 4 3 2 1
nrow(x)
## NULL
NROW(x)
## [1] 10
length(x)
## [1] 10
ncol(x)
## NULL
NCOL(x)
## [1] 1
names(df)
## [1] "First" "Second" "Sports"
names(df)[3]
## [1] "Sports"
rownames(df)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
rownames(df) <- letters[1:10]
df
## First Second Sports
## a 10 -4 Hockey
## b 9 -3 Basketball
## c 8 -2 Cricket
## d 7 -1 Billiards
## e 6 0 Chess
## f 5 1 Table Tennis
## g 4 2 Rugby
## h 3 3 Water Polo
## i 2 4 Lawn Tennis
## j 1 5 Football
rownames(df)
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
rownames(df) <- NULL
rownames(df)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
head(df)
## First Second Sports
## 1 10 -4 Hockey
## 2 9 -3 Basketball
## 3 8 -2 Cricket
## 4 7 -1 Billiards
## 5 6 0 Chess
## 6 5 1 Table Tennis
tail(df)
## First Second Sports
## 5 6 0 Chess
## 6 5 1 Table Tennis
## 7 4 2 Rugby
## 8 3 3 Water Polo
## 9 2 4 Lawn Tennis
## 10 1 5 Football
head(x = df, n = 7)
## First Second Sports
## 1 10 -4 Hockey
## 2 9 -3 Basketball
## 3 8 -2 Cricket
## 4 7 -1 Billiards
## 5 6 0 Chess
## 6 5 1 Table Tennis
## 7 4 2 Rugby
tail(x = df, n = 7)
## First Second Sports
## 4 7 -1 Billiards
## 5 6 0 Chess
## 6 5 1 Table Tennis
## 7 4 2 Rugby
## 8 3 3 Water Polo
## 9 2 4 Lawn Tennis
## 10 1 5 Football
class(df)
## [1] "data.frame"
df$Sports
## [1] "Hockey" "Basketball" "Cricket" "Billiards"
## [5] "Chess" "Table Tennis" "Rugby" "Water Polo"
## [9] "Lawn Tennis" "Football"
df[3, 2]
## [1] -2
df[4, 3]
## [1] "Billiards"
df[3:5, 2:3]
## Second Sports
## 3 -2 Cricket
## 4 -1 Billiards
## 5 0 Chess
df[c(3, 6), 2]
## [1] -2 1
df[, c(1, 3)]
## First Sports
## 1 10 Hockey
## 2 9 Basketball
## 3 8 Cricket
## 4 7 Billiards
## 5 6 Chess
## 6 5 Table Tennis
## 7 4 Rugby
## 8 3 Water Polo
## 9 2 Lawn Tennis
## 10 1 Football
df[, 3]
## [1] "Hockey" "Basketball" "Cricket" "Billiards"
## [5] "Chess" "Table Tennis" "Rugby" "Water Polo"
## [9] "Lawn Tennis" "Football"
class(df[, 3])
## [1] "character"
df[, 3, drop = FALSE]
## Sports
## 1 Hockey
## 2 Basketball
## 3 Cricket
## 4 Billiards
## 5 Chess
## 6 Table Tennis
## 7 Rugby
## 8 Water Polo
## 9 Lawn Tennis
## 10 Football
class(df[, 3, drop = FALSE])
## [1] "data.frame"
df[2, ]
## First Second Sports
## 2 9 -3 Basketball
class(df[2, ])
## [1] "data.frame"
df[2:4, ]
## First Second Sports
## 2 9 -3 Basketball
## 3 8 -2 Cricket
## 4 7 -1 Billiards
df[, "Sports"]
## [1] "Hockey" "Basketball" "Cricket" "Billiards"
## [5] "Chess" "Table Tennis" "Rugby" "Water Polo"
## [9] "Lawn Tennis" "Football"
df[, c("First", "Sports")]
## First Sports
## 1 10 Hockey
## 2 9 Basketball
## 3 8 Cricket
## 4 7 Billiards
## 5 6 Chess
## 6 5 Table Tennis
## 7 4 Rugby
## 8 3 Water Polo
## 9 2 Lawn Tennis
## 10 1 Football
df[, c("Sports", "First")]
## Sports First
## 1 Hockey 10
## 2 Basketball 9
## 3 Cricket 8
## 4 Billiards 7
## 5 Chess 6
## 6 Table Tennis 5
## 7 Rugby 4
## 8 Water Polo 3
## 9 Lawn Tennis 2
## 10 Football 1
df[, "Sports", drop = FALSE]
## Sports
## 1 Hockey
## 2 Basketball
## 3 Cricket
## 4 Billiards
## 5 Chess
## 6 Table Tennis
## 7 Rugby
## 8 Water Polo
## 9 Lawn Tennis
## 10 Football
df["Sports"]
## Sports
## 1 Hockey
## 2 Basketball
## 3 Cricket
## 4 Billiards
## 5 Chess
## 6 Table Tennis
## 7 Rugby
## 8 Water Polo
## 9 Lawn Tennis
## 10 Football
df[["Sports"]]
## [1] "Hockey" "Basketball" "Cricket" "Billiards"
## [5] "Chess" "Table Tennis" "Rugby" "Water Polo"
## [9] "Lawn Tennis" "Football"
df[c("First", "Sports")]
## First Sports
## 1 10 Hockey
## 2 9 Basketball
## 3 8 Cricket
## 4 7 Billiards
## 5 6 Chess
## 6 5 Table Tennis
## 7 4 Rugby
## 8 3 Water Polo
## 9 2 Lawn Tennis
## 10 1 Football
rm(list = ls())
list1 <- list(1, 2, 3)
list1
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
list2 <- list(c(1, 2, 3))
list2
## [[1]]
## [1] 1 2 3
list3 <- list(c(1, 2, 3), 3:7)
list3
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] 3 4 5 6 7
df <- data.frame(First = 1:5, Second = 5:1, Sport = c("Hockey", "Cricket", "Football", "Rugby", "Badminton"), stringsAsFactors = FALSE)
df
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
list4 <- list(df, 1:10)
list4
## [[1]]
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10
list5 <- list(df, 1:10, list3)
list5
## [[1]]
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## [[2]]
## [1] 1 2 3 4 5 6 7 8 9 10
##
## [[3]]
## [[3]][[1]]
## [1] 1 2 3
##
## [[3]][[2]]
## [1] 3 4 5 6 7
names(list5)
## NULL
names(list5) <- c("data.frame", "vector", "list")
names(list5)
## [1] "data.frame" "vector" "list"
list5
## $data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## $vector
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $list
## $list[[1]]
## [1] 1 2 3
##
## $list[[2]]
## [1] 3 4 5 6 7
list6 <- list(DataFrame = df, Vector = 1:10, List = list3)
names(list6)
## [1] "DataFrame" "Vector" "List"
list6
## $DataFrame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## $Vector
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $List
## $List[[1]]
## [1] 1 2 3
##
## $List[[2]]
## [1] 3 4 5 6 7
empty.list <- vector(mode = "list", length = 4L)
empty.list
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
empty.list[[1]] <- 5
empty.list
## [[1]]
## [1] 5
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
list5[1]
## $data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
list5[[1]]
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
list5[["data.frame"]]
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
list5$data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
list5[["data.frame"]]$First
## [1] 1 2 3 4 5
list5[[1]][2]
## Second
## 1 5
## 2 4
## 3 3
## 4 2
## 5 1
list5[[1]][[2]]
## [1] 5 4 3 2 1
list5[[1]][, "Second", drop = FALSE]
## Second
## 1 5
## 2 4
## 3 3
## 4 2
## 5 1
length(list5)
## [1] 3
NROW(list5)
## [1] 3
list5[[4]] <- 2 # memory and processor inefficient
list5
## $data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## $vector
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $list
## $list[[1]]
## [1] 1 2 3
##
## $list[[2]]
## [1] 3 4 5 6 7
##
##
## [[4]]
## [1] 2
list5[["new.element"]] <- 3:7
list5
## $data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## $vector
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $list
## $list[[1]]
## [1] 1 2 3
##
## $list[[2]]
## [1] 3 4 5 6 7
##
##
## [[4]]
## [1] 2
##
## $new.element
## [1] 3 4 5 6 7
names(list5)
## [1] "data.frame" "vector" "list" "" "new.element"
list5[1:3]
## $data.frame
## First Second Sport
## 1 1 5 Hockey
## 2 2 4 Cricket
## 3 3 3 Football
## 4 4 2 Rugby
## 5 5 1 Badminton
##
## $vector
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $list
## $list[[1]]
## [1] 1 2 3
##
## $list[[2]]
## [1] 3 4 5 6 7
rm(list = ls())
A <- matrix(data = 1:10, nrow = 5, ncol = 2)
A
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
B <- matrix(data = 21:30, nrow = 5, ncol = 2)
B
## [,1] [,2]
## [1,] 21 26
## [2,] 22 27
## [3,] 23 28
## [4,] 24 29
## [5,] 25 30
C <- matrix(data = 21:40, nrow = 2, ncol = 10)
C
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 21 23 25 27 29 31 33 35 37 39
## [2,] 22 24 26 28 30 32 34 36 38 40
nrow(A)
## [1] 5
ncol(A)
## [1] 2
dim(A)
## [1] 5 2
A + B
## [,1] [,2]
## [1,] 22 32
## [2,] 24 34
## [3,] 26 36
## [4,] 28 38
## [5,] 30 40
A * B
## [,1] [,2]
## [1,] 21 156
## [2,] 44 189
## [3,] 69 224
## [4,] 96 261
## [5,] 125 300
A == B
## [,1] [,2]
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
ncol(A)
## [1] 2
nrow(B)
## [1] 5
t(B)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 21 22 23 24 25
## [2,] 26 27 28 29 30
A %*% t(B)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 177 184 191 198 205
## [2,] 224 233 242 251 260
## [3,] 271 282 293 304 315
## [4,] 318 331 344 357 370
## [5,] 365 380 395 410 425
A %*% C
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 153 167 181 195 209 223 237 251 265 279
## [2,] 196 214 232 250 268 286 304 322 340 358
## [3,] 239 261 283 305 327 349 371 393 415 437
## [4,] 282 308 334 360 386 412 438 464 490 516
## [5,] 325 355 385 415 445 475 505 535 565 595
colnames(A)
## NULL
rownames(A)
## NULL
colnames(A) <- c("Left", "Right")
rownames(A) <- c("First", "Second", "Third", "Fourth", "Fifth")
A
## Left Right
## First 1 6
## Second 2 7
## Third 3 8
## Fourth 4 9
## Fifth 5 10
colnames(B) <- c("First", "Second")
rownames(B) <- c("One", "Two", "Three", "Four", "Five")
B
## First Second
## One 21 26
## Two 22 27
## Three 23 28
## Four 24 29
## Five 25 30
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
LETTERS
## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q"
## [18] "R" "S" "T" "U" "V" "W" "X" "Y" "Z"
colnames(C) <- LETTERS[1:10]
rownames(C) <- c("Top", "Bottom")
C
## A B C D E F G H I J
## Top 21 23 25 27 29 31 33 35 37 39
## Bottom 22 24 26 28 30 32 34 36 38 40
A
## Left Right
## First 1 6
## Second 2 7
## Third 3 8
## Fourth 4 9
## Fifth 5 10
t(A)
## First Second Third Fourth Fifth
## Left 1 2 3 4 5
## Right 6 7 8 9 10
A %*% C
## A B C D E F G H I J
## First 153 167 181 195 209 223 237 251 265 279
## Second 196 214 232 250 268 286 304 322 340 358
## Third 239 261 283 305 327 349 371 393 415 437
## Fourth 282 308 334 360 386 412 438 464 490 516
## Fifth 325 355 385 415 445 475 505 535 565 595
arr <- array(data = 1:12, dim = c(2, 3, 2))
arr
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
arr[1, , ]
## [,1] [,2]
## [1,] 1 7
## [2,] 3 9
## [3,] 5 11
arr[, 2, ]
## [,1] [,2]
## [1,] 3 9
## [2,] 4 10
arr[, , 2]
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
arr[1, , 1]
## [1] 1 3 5
arr[, , 1]
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
rm(list = ls())
fileURL <- "http://www.jaredlander.com/data/Tomato%20First.csv"
tomato <- read.table(file = fileURL, header = TRUE, sep = ",")
head(tomato)
## Round Tomato Price Source Sweet Acid Color Texture
## 1 1 Simpson SM 3.99 Whole Foods 2.8 2.8 3.7 3.4
## 2 1 Tuttorosso (blue) 2.99 Pioneer 3.3 2.8 3.4 3.0
## 3 1 Tuttorosso (green) 0.99 Pioneer 2.8 2.6 3.3 2.8
## 4 1 La Fede SM DOP 3.99 Shop Rite 2.6 2.8 3.0 2.3
## 5 2 Cento SM DOP 5.49 D Agostino 3.3 3.1 2.9 2.8
## 6 2 Cento Organic 4.99 D Agostino 3.2 2.9 2.9 3.1
## Overall Avg.of.Totals Total.of.Avg
## 1 3.4 16.1 16.1
## 2 2.9 15.3 15.3
## 3 2.9 14.3 14.3
## 4 2.8 13.4 13.4
## 5 3.1 14.4 15.2
## 6 2.9 15.5 15.1
class(tomato)
## [1] "data.frame"
class(tomato$Tomato)
## [1] "factor"
tomato <- read.table(file = fileURL, header = TRUE, sep = ",", stringsAsFactors = FALSE)
head(tomato)
## Round Tomato Price Source Sweet Acid Color Texture
## 1 1 Simpson SM 3.99 Whole Foods 2.8 2.8 3.7 3.4
## 2 1 Tuttorosso (blue) 2.99 Pioneer 3.3 2.8 3.4 3.0
## 3 1 Tuttorosso (green) 0.99 Pioneer 2.8 2.6 3.3 2.8
## 4 1 La Fede SM DOP 3.99 Shop Rite 2.6 2.8 3.0 2.3
## 5 2 Cento SM DOP 5.49 D Agostino 3.3 3.1 2.9 2.8
## 6 2 Cento Organic 4.99 D Agostino 3.2 2.9 2.9 3.1
## Overall Avg.of.Totals Total.of.Avg
## 1 3.4 16.1 16.1
## 2 2.9 15.3 15.3
## 3 2.9 14.3 14.3
## 4 2.8 13.4 13.4
## 5 3.1 14.4 15.2
## 6 2.9 15.5 15.1
class(tomato$Tomato)
## [1] "character"
Also refer to ?read.csv and ?read.csv2
# library(RODBC)
# RShowDoc("RODBC", package = "RODBC")
Uncomment and execute the instructions to read the vignette for more information.
Download and read the documentation of foreign package from CRAN.
Read about the following functions:
read.spssread.dtaread.ssdread.octaveread.mtpread.systatfileURL <- "http://www.jaredlander.com/data/Tomato%20First.csv"
tomato <- read.csv(file = fileURL, stringsAsFactors = FALSE)
save(list = c("tomato"), file = "tomato.rdata")
rm(tomato)
load(file = "tomato.rdata")
head(tomato)
## Round Tomato Price Source Sweet Acid Color Texture
## 1 1 Simpson SM 3.99 Whole Foods 2.8 2.8 3.7 3.4
## 2 1 Tuttorosso (blue) 2.99 Pioneer 3.3 2.8 3.4 3.0
## 3 1 Tuttorosso (green) 0.99 Pioneer 2.8 2.6 3.3 2.8
## 4 1 La Fede SM DOP 3.99 Shop Rite 2.6 2.8 3.0 2.3
## 5 2 Cento SM DOP 5.49 D Agostino 3.3 3.1 2.9 2.8
## 6 2 Cento Organic 4.99 D Agostino 3.2 2.9 2.9 3.1
## Overall Avg.of.Totals Total.of.Avg
## 1 3.4 16.1 16.1
## 2 2.9 15.3 15.3
## 3 2.9 14.3 14.3
## 4 2.8 13.4 13.4
## 5 3.1 14.4 15.2
## 6 2.9 15.5 15.1
n <- 20
r <- 1:10
w <- data.frame(n, r)
w
## n r
## 1 20 1
## 2 20 2
## 3 20 3
## 4 20 4
## 5 20 5
## 6 20 6
## 7 20 7
## 8 20 8
## 9 20 9
## 10 20 10
save(list = c("n", "r", "w"), file = "multiple.rdata")
rm(list = c("n", "r", "w"))
load(file = "multiple.rdata")
n
## [1] 20
r
## [1] 1 2 3 4 5 6 7 8 9 10
w
## n r
## 1 20 1
## 2 20 2
## 3 20 3
## 4 20 4
## 5 20 5
## 6 20 6
## 7 20 7
## 8 20 8
## 9 20 9
## 10 20 10
rm(list = ls())
library(ggplot2)
data("diamonds")
head(diamonds)
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
data(list = c("tips"), package = "reshape2")
head(tips)
## total_bill tip sex smoker day time size
## 1 16.99 1.01 Female No Sun Dinner 2
## 2 10.34 1.66 Male No Sun Dinner 3
## 3 21.01 3.50 Male No Sun Dinner 3
## 4 23.68 3.31 Male No Sun Dinner 2
## 5 24.59 3.61 Female No Sun Dinner 4
## 6 25.29 4.71 Male No Sun Dinner 4
data()
rm(list = ls())
library(XML)
fileURL <- "http://www.w3schools.com/html/html_tables.asp"
myTable <- readHTMLTable(doc = fileURL, which = 1, header = TRUE, stringsAsFactors = FALSE)
myTable
## Number First Name Last Name Points
## 1 1 Eve Jackson 94
## 2 2 John Doe 80
## 3 3 Adam Johnson 67
## 4 4 Jill Smith 50
rm(list = ls())
diamonds datasetlibrary(ggplot2)
data("diamonds")
head(diamonds)
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
hist(x = diamonds$carat)
hist(x = diamonds$carat, main = "Carat Histogram", xlab = "Carat")
plot(x = diamonds$carat, y = diamonds$price)
plot(formula = price ~ carat, data = diamonds)
plot(formula = price ~ carat, data = diamonds, main = "Price vs Carat")
boxplot(x = diamonds$carat)
ggplot2library(ggplot2)
data(diamonds)
ggplot(data = diamonds) + geom_histogram(mapping = aes(x = diamonds$carat))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = diamonds) + geom_histogram(mapping = aes(x = diamonds$carat), binwidth = 0.5)
ggplot(data = diamonds) + geom_histogram(mapping = aes(x = diamonds$carat), binwidth = 0.1)
ggplot(data = diamonds) + geom_density(mapping = aes(x = carat))
ggplot(data = diamonds) + geom_density(mapping = aes(x = carat), fill = "grey50")
ggplot(data = diamonds, aes(x = carat, y = price)) + geom_point()
g <- ggplot(data = diamonds, aes(x = carat, y = price))
g + geom_point()
g + geom_point(aes(color = color))
g + geom_point(aes(color = color, shape = clarity))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 5445 rows containing missing values (geom_point).
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.
g + geom_point(aes(color = color, shape = cut))
ggplot(data = diamonds, aes(y = carat, x = 1)) + geom_boxplot()
ggplot(data = diamonds, aes(y = carat, x = cut)) + geom_boxplot()
ggplot(data = diamonds, aes(y = carat, x = cut)) + geom_violin()
g <- ggplot(data = diamonds, aes(y = carat, x = cut))
g + geom_point() + geom_violin()
g + geom_violin() + geom_point()
g + geom_jitter()
g + geom_jitter() + geom_violin()
g + geom_jitter(aes(color = color)) + geom_violin()
rm(list = ls())
data("economics")
head(economics)
## date pce pop psavert uempmed unemploy
## 1 1967-06-30 507.8 198712 9.8 4.5 2944
## 2 1967-07-31 510.9 198911 9.8 4.7 2945
## 3 1967-08-31 516.7 199113 9.0 4.6 2958
## 4 1967-09-30 513.3 199311 9.8 4.9 3143
## 5 1967-10-31 518.5 199498 9.7 4.7 3066
## 6 1967-11-30 526.2 199657 9.4 4.8 3018
ggplot(data = economics, mapping = aes(x = date, y = pop)) + geom_line()
library(lubridate)
economics$year <- year(economics$date)
economics$month <- month(economics$date)
head(economics)
## date pce pop psavert uempmed unemploy year month
## 1 1967-06-30 507.8 198712 9.8 4.5 2944 1967 6
## 2 1967-07-31 510.9 198911 9.8 4.7 2945 1967 7
## 3 1967-08-31 516.7 199113 9.0 4.6 2958 1967 8
## 4 1967-09-30 513.3 199311 9.8 4.9 3143 1967 9
## 5 1967-10-31 518.5 199498 9.7 4.7 3066 1967 10
## 6 1967-11-30 526.2 199657 9.4 4.8 3018 1967 11
econ2000 <- economics[which(economics$year >= 2000), ]
nrow(economics)
## [1] 478
nrow(econ2000)
## [1] 87
head(econ2000)
## date pce pop psavert uempmed unemploy year month
## 392 2000-01-31 6618.5 281190 2.4 6.1 5858 2000 1
## 393 2000-02-29 6685.3 281409 2.0 6.0 5733 2000 2
## 394 2000-03-31 6664.2 281653 2.4 6.1 5481 2000 3
## 395 2000-04-30 6688.0 281891 2.4 5.8 5758 2000 4
## 396 2000-05-31 6712.1 282156 2.5 5.7 5651 2000 5
## 397 2000-06-30 6745.8 282430 2.9 6.0 5747 2000 6
econ2000$month <- month(econ2000$date, label = TRUE)
head(econ2000)
## date pce pop psavert uempmed unemploy year month
## 392 2000-01-31 6618.5 281190 2.4 6.1 5858 2000 Jan
## 393 2000-02-29 6685.3 281409 2.0 6.0 5733 2000 Feb
## 394 2000-03-31 6664.2 281653 2.4 6.1 5481 2000 Mar
## 395 2000-04-30 6688.0 281891 2.4 5.8 5758 2000 Apr
## 396 2000-05-31 6712.1 282156 2.5 5.7 5651 2000 May
## 397 2000-06-30 6745.8 282430 2.9 6.0 5747 2000 Jun
library(scales)
g <- ggplot(data = econ2000, aes(x = month, y = pop))
g <- g + geom_line(aes(color = factor(year), group = year))
g
g <- g + scale_color_discrete(name = "Year")
g
g <- g + scale_y_continuous(labels = comma)
g
g <- g + labs(title = "Population Growth", x = "Month", y = "Population")
g
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g
g <- ggplot(data = diamonds, mapping = aes(x = carat, y = price))
g + geom_point(mapping = aes(color = color)) + facet_wrap(~color)
g + geom_point(mapping = aes(color = color)) + facet_grid(cut ~ clarity)
ggplot(diamonds, aes(x = carat)) + geom_histogram() + facet_wrap(~color)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = diamonds, aes(x = carat, y = price, shape = cut, size = depth, color = color)) + geom_point()
library(ggthemes)
g <- ggplot(data = diamonds, aes(x = carat, y = price, color = color)) + geom_point()
g + theme_wsj()
g + theme_economist() + scale_color_economist()
g + theme_tufte()
g + theme_excel() + scale_color_excel()
rm(list = ls())
ggplot2: Digging Deeperqplot functionlibrary(ggplot2)
data("mpg")
str(mpg)
## 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: Factor w/ 15 levels "audi","chevrolet",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ model : Factor w/ 38 levels "4runner 4wd",..: 2 2 2 2 2 2 2 3 3 3 ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : Factor w/ 10 levels "auto(av)","auto(l3)",..: 4 9 10 1 4 9 1 9 4 10 ...
## $ drv : Factor w/ 3 levels "4","f","r": 2 2 2 2 2 2 2 1 1 1 ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : Factor w/ 5 levels "c","d","e","p",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ class : Factor w/ 7 levels "2seater","compact",..: 2 2 2 2 2 2 2 2 2 2 ...
qplot(x = displ, y = hwy, data = mpg)
qplot(x = displ, y = hwy, data = mpg, color = drv)
qplot(x = displ, y = hwy, data = mpg, geom = c("point", "smooth"))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
qplot(x = displ, y = hwy, data = mpg, color = drv, geom = c("point", "smooth"))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
qplot(x = hwy, data = mpg)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(x = hwy, data = mpg, fill = drv)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(x = displ, y = hwy, data = mpg, facets = . ~ drv)
qplot(x = hwy, data = mpg, facets = drv ~ ., binwidth = 2)
# The data file can be made available upon request
load("maacs.Rda")
head(maacs)
## id eno duBedMusM pm25 mopos
## 1 1 141 2423 15.560 yes
## 2 2 124 2793 34.370 yes
## 3 3 126 3055 38.953 yes
## 4 4 164 775 33.249 yes
## 5 5 99 1634 27.060 yes
## 6 6 68 939 18.890 yes
str(maacs)
## 'data.frame': 750 obs. of 5 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ eno : num 141 124 126 164 99 68 41 50 12 30 ...
## $ duBedMusM: num 2423 2793 3055 775 1634 ...
## $ pm25 : num 15.6 34.4 39 33.2 27.1 ...
## $ mopos : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
qplot(x = log(eno), data = maacs)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(x = log(eno), data = maacs, fill = mopos)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(x = log(eno), data = maacs, geom = "density")
## Warning: Removed 108 rows containing non-finite values (stat_density).
qplot(x = log(eno), data = maacs, geom = "density", color = mopos)
## Warning: Removed 49 rows containing non-finite values (stat_density).
## Warning: Removed 59 rows containing non-finite values (stat_density).
qplot(x = log(pm25), y = log(eno), data = maacs)
## Warning: Removed 184 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, shape = mopos)
## Warning: Removed 184 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, color = mopos)
## Warning: Removed 184 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, facets = . ~ mopos)
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, color = mopos, geom = c("point", "smooth"), method = "lm")
## Warning: Removed 86 rows containing missing values (stat_smooth).
## Warning: Removed 98 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
qplot(x = log(pm25), y = log(eno), data = maacs, facets = . ~ mopos, geom = c("point", "smooth"), method = "lm")
## Warning: Removed 86 rows containing missing values (stat_smooth).
## Warning: Removed 98 rows containing missing values (stat_smooth).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_point).
ggplot functionqplot(x = log(pm25), y = eno, data = maacs, facets = . ~ mopos, geom = c("point", "smooth"), method = "lm")
## Warning: Removed 86 rows containing missing values (stat_smooth).
## Warning: Removed 98 rows containing missing values (stat_smooth).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_point).
g <- ggplot(data = maacs, mapping = aes(x = log(pm25), y = eno))
summary(g)
## data: id, eno, duBedMusM, pm25, mopos [750x5]
## mapping: x = log(pm25), y = eno
## faceting: facet_null()
g + geom_point()
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point() + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 184 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point() + geom_smooth(method = "lm")
## Warning: Removed 184 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point() + facet_grid(facets = . ~ mopos) + geom_smooth(method = "lm")
## Warning: Removed 86 rows containing missing values (stat_smooth).
## Warning: Removed 98 rows containing missing values (stat_smooth).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_point).
g + geom_point(color = "steelblue", size = 4, alpha = 1/2)
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point(aes(color = mopos), size = 4, alpha = 1/2)
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS")
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 4, linetype = 3, method = "lm")
## Warning: Removed 184 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE)
## Warning: Removed 184 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE)
## Warning: Removed 184 rows containing missing values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).
testData <- data.frame(x = 1:100, y = rnorm(100))
head(testData)
## x y
## 1 1 1.03867599
## 2 2 -0.82879997
## 3 3 -0.74745592
## 4 4 -0.96679162
## 5 5 0.08761936
## 6 6 -0.96893042
# Setting Outlier
testData[50, 2] <- 100
plot(testData$x, testData$y, type = "l", ylim = c(-3, 3))
g <- ggplot(testData, aes(x = x, y = y))
g + geom_line()
# Outlier Missing
g + geom_line() + ylim(c(-3, 3))
# Outlier Included
g + geom_line() + coord_cartesian(ylim = c(-3, 3))
cutpoints <- quantile(x = maacs$duBedMusM, breaks = seq(0, 1, length.out = 4), na.rm = TRUE)
cutpoints
## 0% 25% 50% 75% 100%
## 0.01 308.00 1151.00 3881.00 124919.00
maacs$newCol <- cut(x = maacs$duBedMusM, cutpoints)
levels(maacs$newCol)
## [1] "(0.01,308]" "(308,1.15e+03]" "(1.15e+03,3.88e+03]"
## [4] "(3.88e+03,1.25e+05]"
library(ggthemes)
g <- ggplot(data = maacs, aes(x = log(pm25), y = eno))
g + geom_point(alpha = 1/3) + facet_wrap(facets = newCol ~ mopos) + geom_smooth(method = "lm", se = FALSE, col = "steelblue") + theme_bw(base_size = 10) + labs(x = expression("log " * PM[2.5]), title = "MAACS")
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 15 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 14 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 40 rows containing missing values (stat_smooth).
## Warning: Removed 67 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 40 rows containing missing values (geom_point).
## Warning: Removed 67 rows containing missing values (geom_point).
rm(list = ls())