Base Graphics
# plot(finger ~ height, data = crimtab.long.df)
plot(crimtab.long.df)

par(mfrow = c(1, 2))
hist(crimtab.long.df$height, main="Histogram of Height", xlab="height(inches)")
hist(crimtab.long.df$finger, main="Histogram of Finger Length", xlab= "finger length(cm)")

# hist(crimtab.long.df["height"], main="Histogram of Height", xlab="height(inches)")
# hist(crimtab.long.df["finger"], main="Histogram of Finger Length", xlab= "finger length(cm)")
- 평균과 표준편차를 한번에 구하려면 다음과 같이 anonymous function을 작성하고
mapply()
를 이용하는 게 편함. 이를 모수로 하는 정규곡선을 덧씌워 볼 것.
mean_sd()
도 anonymous function 으로 평균과 표준편차를 계산해서 출력하는 함수임. 이와 같은 함수를 저장해 놓으려면 dump()
를 이용함.
- 이와 같이 계산한 평균과 표준편차를 모수로 하는 정규곡선을 덧씌워 볼 것.
mean_sd <- function(x) {
mean <- mean(x, na.rm = TRUE)
sd <- sd(x)
list(mean = mean, sd = sd)
}
(crimtab.stat <- mapply(mean_sd, crimtab.long.df))
## finger height
## mean 11.54737 65.473
## sd 0.5487137 2.557757
str(crimtab.stat)
## List of 4
## $ : num 11.5
## $ : num 0.549
## $ : num 65.5
## $ : num 2.56
## - attr(*, "dim")= int [1:2] 2 2
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:2] "mean" "sd"
## ..$ : chr [1:2] "finger" "height"
# apply(crimtab.long, 2, mean)
# apply(crimtab.long, 2, sd)
dump("mean_sd", file = "mean_sd.R")
crimtab.stat
이 어떤 성격을 갖는지 다음 질문과 추출 작업을 통해서 알아보자.
is.matrix(crimtab.stat)
## [1] TRUE
is.table(crimtab.stat)
## [1] FALSE
is.list(crimtab.stat)
## [1] TRUE
is.data.frame(crimtab.stat)
## [1] FALSE
crimtab.stat[, 1]
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
crimtab.stat[, "finger"]
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
crimtab.stat[, "finger"][1]
## $mean
## [1] 11.54737
crimtab.stat[, "finger"][[1]]
## [1] 11.54737
crimtab.stat[1]
## [[1]]
## [1] 11.54737
crimtab.stat["finger"]
## [[1]]
## NULL
crimtab.stat$finger
## NULL
(crimtab.stat.df <- data.frame(crimtab.stat))
## finger height
## mean 11.54737 65.473
## sd 0.5487137 2.557757
is.matrix(crimtab.stat.df)
## [1] FALSE
is.table(crimtab.stat.df)
## [1] FALSE
is.list(crimtab.stat.df)
## [1] TRUE
is.data.frame(crimtab.stat.df)
## [1] TRUE
crimtab.stat.df[, 1]
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
str(crimtab.stat.df[, 1])
## List of 2
## $ mean: num 11.5
## $ sd : num 0.549
crimtab.stat.df[, "finger"]
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
str(crimtab.stat.df[, "finger"])
## List of 2
## $ mean: num 11.5
## $ sd : num 0.549
crimtab.stat.df[, "finger"][1]
## $mean
## [1] 11.54737
str(crimtab.stat.df[, "finger"][1])
## List of 1
## $ mean: num 11.5
crimtab.stat.df[, "finger"][[1]]
## [1] 11.54737
str(crimtab.stat.df[, "finger"][[1]])
## num 11.5
crimtab.stat.df[1]
## finger
## mean 11.54737
## sd 0.5487137
str(crimtab.stat.df[1])
## 'data.frame': 2 obs. of 1 variable:
## $ finger:List of 2
## ..$ mean: num 11.5
## ..$ sd : num 0.549
crimtab.stat.df["finger"]
## finger
## mean 11.54737
## sd 0.5487137
str(crimtab.stat.df["finger"])
## 'data.frame': 2 obs. of 1 variable:
## $ finger:List of 2
## ..$ mean: num 11.5
## ..$ sd : num 0.549
crimtab.stat.df["finger"][1]
## finger
## mean 11.54737
## sd 0.5487137
str(crimtab.stat.df["finger"][1])
## 'data.frame': 2 obs. of 1 variable:
## $ finger:List of 2
## ..$ mean: num 11.5
## ..$ sd : num 0.549
crimtab.stat.df["finger"][[1]]
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
str(crimtab.stat.df["finger"][[1]])
## List of 2
## $ mean: num 11.5
## $ sd : num 0.549
crimtab.stat.df$finger
## $mean
## [1] 11.54737
##
## $sd
## [1] 0.5487137
str(crimtab.stat.df$finger)
## List of 2
## $ mean: num 11.5
## $ sd : num 0.549
crimtab.stat.df$finger[1]
## $mean
## [1] 11.54737
str(crimtab.stat.df$finger[1])
## List of 1
## $ mean: num 11.5
crimtab.stat.df$finger[[1]]
## [1] 11.54737
str(crimtab.stat.df$finger[[1]])
## num 11.5
ggplot
library(ggplot2)
g1 <- ggplot(crimtab.long.df, aes(x = height, y = finger))
g1 + geom_point()

g1 + geom_point(alpha = 0.9)

g1 + geom_point(alpha = 0.5)

g1 + geom_point(alpha = 0.1)

g1 + geom_point(position = "jitter")

g1 + geom_point(position = "jitter", size = 0.7)

g1 + geom_point(position = position_jitter(), size = 0.7)

g1 + geom_point(position = position_jitter(width = 1, height = 0), size = 0.7)

g1 + geom_point(position = position_jitter(width = 1, height = 0.1), size = 0.7)

g1 + geom_point(position = position_jitter(width = 1, height = 0.1), size = 0.7) + theme_bw()

h1 <- ggplot(crimtab.long.df, aes(x = height))
h1 + geom_histogram(alpha=0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

f1 <- ggplot(crimtab.long.df, aes(x = finger))
f1 + geom_histogram(alpha=0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

h1 + geom_histogram(aes(y = ..density..), binwidth=1, alpha=0.5)

f1 + geom_histogram(aes(y = ..density..), binwidth=0.1, alpha=0.5)

(g.h.1 <- h1 + geom_histogram(aes(y = ..density..), binwidth = 1, fill = "white", colour = "black"))

(g.f.1 <- f1 + geom_histogram(aes(y = ..density..), binwidth = 0.2, fill = "white", colour = "black"))

breaks
설정의 필요성
- default 로 그린 히스토그램에 평균과 표준편차를 위치시켜보면, Quetelet의 가슴둘레 자료를 히스토그램으로 표현할 때와 같이 0.5만큼 치우친 것을 알 수 있음.
- 따라서
geom_histogram
의 원형인 stat_bin
에 breaks=
를 재설정할 필요.
library(grid)
(mean.finger <- crimtab.stat[, 1][[1]])
## [1] 11.54737
(sd.finger <- crimtab.stat[, 1][[2]])
## [1] 0.5487137
(mean.height <- crimtab.stat[, 2][[1]])
## [1] 65.473
(sd.height <- crimtab.stat[, 2][[2]])
## [1] 2.557757
x.finger <- seq(9.5, 13.5, length.out = 3000)
y.finger <- dnorm(x.finger, mean = mean.finger, sd = sd.finger)
x.height <- seq(56, 77, length.out = 3000)
y.height <- dnorm(x.height, mean = mean.height, sd = sd.height)
(g.h.2 <- g.h.1 + annotate("segment", x = mean.height, xend = mean.height, y= -0.02, yend = 0, arrow = arrow(length=unit(0.3, "cm"))))

(g.h.3 <- g.h.2 + theme_bw())

(g.h.4 <- g.h.3 + geom_line(aes(x = x.height, y = y.height)))

(g.h.5 <- ggplot(crimtab.long.df, aes(x = height)) + stat_bin(aes(y = ..density..), binwidth = 1, breaks = 55.5:77.5, fill = "white", colour="black"))

(g.h.6 <- g.h.5 + annotate("segment", x = mean.height, xend = mean.height, y= -0.02, yend = 0, arrow = arrow(length = unit(0.3, "cm"))))

(g.h.7 <- g.h.6 + theme_bw())

(g.h.8 <- g.h.7 + geom_line(aes(x = x.height, y = y.height)))

(g.f.1 <- ggplot(crimtab.long.df, aes(x = finger)) + geom_histogram(aes(y = ..density..), binwidth = 0.2, fill = "white", colour = "black"))

(g.f.2 <- g.f.1 + annotate("segment", x = mean.finger, xend = mean.finger, y= -0.1, yend = 0, arrow = arrow(length = unit(0.3, "cm"))))

(g.f.3 <- g.f.2 + theme_bw())

(g.f.4 <- g.f.3 + geom_line(aes(x = x.finger, y = y.finger)))

(g.f.5 <- ggplot(crimtab.long.df, aes(x = finger)) + stat_bin(aes(y = ..density..), breaks = seq(9.45, 13.55, by=0.2), fill = "white", colour="black"))

(g.f.6 <- g.f.5 + annotate("segment", x=mean.finger, xend = mean.finger, y=-0.1, yend=0, arrow=arrow(length=unit(0.3, "cm"))))

(g.f.7 <- g.f.6 + theme_bw())

(g.f.8 <- g.f.7 + geom_line(aes(x=x.finger, y=y.finger)))
