library(slidify)
create_deck(“Topic02”, git = TRUE)
用R來玩玩Sudoku
install.packages("sudoku")
library(help = sudoku)
library(sudoku)
generateSudoku()
example(generateSudoku)
外觀比較漂亮的Sudoku
myPuzzle <- generateSudoku(Nblank = 20, print.it = F)
printSudoku(myPuzzle)
互動式的玩法
playSudoku(fetchSudokuUK(), solve = T)
printSudoku(fetchSudokuUK())
load("wgcoll.rda")
summary(wgc$aa)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.0 59.0 72.5 71.4 85.8 99.0
head(wgc) #預設是前6列
## id aa pe sm ae r g c
## 1 1 93 19 1 2 0 0 1
## 2 2 46 12 0 0 0 0 0
## 3 3 57 15 1 1 0 0 0
## 4 4 94 18 2 2 1 1 1
## 5 5 82 13 2 1 1 1 1
## 6 6 59 12 0 0 2 0 0
head(wgc, 20) #顯示前20列
## id aa pe sm ae r g c
## 1 1 93 19 1 2 0 0 1
## 2 2 46 12 0 0 0 0 0
## 3 3 57 15 1 1 0 0 0
## 4 4 94 18 2 2 1 1 1
## 5 5 82 13 2 1 1 1 1
## 6 6 59 12 0 0 2 0 0
## 7 7 61 12 1 2 0 0 0
## 8 8 29 9 0 0 1 1 0
## 9 9 36 13 1 1 0 0 0
## 10 10 91 16 2 2 1 1 0
## 11 11 55 10 0 0 1 0 0
## 12 12 58 11 0 1 0 0 0
## 13 13 67 14 1 1 0 1 1
## 14 14 77 14 1 2 2 1 0
## 15 15 71 12 0 0 2 1 0
## 16 16 83 16 2 2 1 0 1
## 17 17 96 15 2 2 2 0 1
## 18 18 87 12 1 1 0 0 1
## 19 19 62 11 0 0 0 0 0
## 20 20 52 9 0 1 2 1 0
tail(wgc,20) #顯示最後20列
## id aa pe sm ae r g c
## 31 31 64 13 1 1 0 0 0
## 32 32 77 13 1 0 1 1 1
## 33 33 88 16 2 2 0 1 0
## 34 34 54 9 0 1 1 0 0
## 35 35 86 17 1 2 1 0 1
## 36 36 73 15 1 1 0 1 0
## 37 37 79 15 2 1 0 0 1
## 38 38 85 14 2 1 2 1 1
## 39 39 96 16 0 1 1 0 1
## 40 40 59 12 1 0 0 1 0
## 41 41 84 14 1 0 1 0 1
## 42 42 71 15 2 1 1 0 0
## 43 43 89 15 0 1 0 1 1
## 44 44 38 12 1 0 1 1 0
## 45 45 62 11 1 1 2 0 1
## 46 46 93 16 1 0 1 0 1
## 47 47 71 13 2 1 1 0 0
## 48 48 55 11 0 1 0 0 0
## 49 49 74 15 1 2 0 1 0
## 50 50 88 18 1 1 0 1 0
boxplot(wgc$aa)
# 直接把圖從印表機印出
dev.print(width = 6, height = 6, horizontal = FALSE)
# 把圖「印」成pdf檔
dev.print(file = "test_boxplot.pdf", device = pdf)
hist(wgc$aa)
-Summarizing data using bar charts.
barplot(wgc$g) #錯誤示範
# 正確做法
barplot(table(wgc$g), ylim = c(0, 30))
pie(wgc$g) #錯誤示範
pie(table(wgc$g), radius = 0.8, init.angle = 270)
-例如ggplot2和googleVis:
install.pakages(googleVis)
library(googleVis)
gender <- as.data.frame(table(wgc$g))
rownames(gender) <- c("女", "男")
pie <- gvisPieChart(gender, labelvar = "row.names")
plot(pie)
準備1:製作兩個資料物件x與y
x <- matrix(rnorm(30, 1), ncol = 5)
y <- c(1, seq(5))
準備2:把物件x與物件y合併為一個矩陣z(matrix)
z <- cbind(x, y)
準備3:把物件x轉為資料框格式(data frame)
z.df <- data.frame(z)
z.df
## V1 V2 V3 V4 V5 y
## 1 1.23351 -0.14503 1.4441 1.4516 1.60937 1
## 2 0.04472 0.18228 3.1658 1.6901 1.75227 1
## 3 0.18440 1.21110 1.5945 3.0075 1.63307 2
## 4 1.05453 1.65460 1.4643 -1.1552 -0.09981 3
## 5 3.26648 0.91051 1.4718 1.8692 1.06903 4
## 6 1.85547 -0.08774 0.3348 0.5796 -0.37785 5
subset()
指令來切割資料框names(z.df)
## [1] "V1" "V2" "V3" "V4" "V5" "y"
z.sub <- subset(z.df, y > 2)
z.sub
## V1 V2 V3 V4 V5 y
## 4 1.055 1.65460 1.4643 -1.1552 -0.09981 3
## 5 3.266 0.91051 1.4718 1.8692 1.06903 4
## 6 1.855 -0.08774 0.3348 0.5796 -0.37785 5
z.sub1 <- subset(z.df, y > 2 & V1 > 0.6)
z.sub1
## V1 V2 V3 V4 V5 y
## 4 1.055 1.65460 1.4643 -1.1552 -0.09981 3
## 5 3.266 0.91051 1.4718 1.8692 1.06903 4
## 6 1.855 -0.08774 0.3348 0.5796 -0.37785 5
z.sub2 <- subset(z.df, y > 2 & V2 > 0.4, select = c(V1, V4))
z.sub2
## V1 V4
## 4 1.055 -1.155
## 5 3.266 1.869
z.sub3 <- subset(z.df, y > 3, select = V2:V5)
z.sub3
## V2 V3 V4 V5
## 5 0.91051 1.4718 1.8692 1.0690
## 6 -0.08774 0.3348 0.5796 -0.3779
z.sub4 <- z.df[z.df$y == 1, ]
z.sub4
## V1 V2 V3 V4 V5 y
## 1 1.23351 -0.1450 1.444 1.452 1.609 1
## 2 0.04472 0.1823 3.166 1.690 1.752 1
%in%
來選取在一個變數有不同值的觀察值z.sub5 <- z.df[z.df$y %in% c(1, 4), ] #只選出y=1 或y=4的觀察值
z.sub5
## V1 V2 V3 V4 V5 y
## 1 1.23351 -0.1450 1.444 1.452 1.609 1
## 2 0.04472 0.1823 3.166 1.690 1.752 1
## 5 3.26648 0.9105 1.472 1.869 1.069 4
z.sub6 <- z.df[, 1:2]
z.sub6
## V1 V2
## 1 1.23351 -0.14503
## 2 0.04472 0.18228
## 3 0.18440 1.21110
## 4 1.05453 1.65460
## 5 3.26648 0.91051
## 6 1.85547 -0.08774
z.sub7 <- z.df[, c(1, 3, 5)]
z.sub7
## V1 V3 V5
## 1 1.23351 1.4441 1.60937
## 2 0.04472 3.1658 1.75227
## 3 0.18440 1.5945 1.63307
## 4 1.05453 1.4643 -0.09981
## 5 3.26648 1.4718 1.06903
## 6 1.85547 0.3348 -0.37785
z.sub8 <- z.df[c(1, 3), 3:6]
z.sub8
## V3 V4 V5 y
## 1 1.444 1.452 1.609 1
## 3 1.595 3.007 1.633 2
When all of the possible sample means are computed, then the following properties are true:
# Normal parent population
n <- 1
curve(dnorm(x, mean = 0, sd = 1/sqrt(n)), -3, 3, ylim = c(0, 5),
xlab = "x", ylab = "Densities of sample mean", bty = "l")
n <- 25
curve(dnorm(x, mean = 0, sd = 1/sqrt(n)), add = TRUE, lty = 2)
n <- 100
curve(dnorm(x, mean = 0, sd = 1/sqrt(n)), add = TRUE, lty = 3)
# Nonnormal parent population
m <- 200
p <- 1/2
n <- 5
res <- rbinom(m, n, p)
hist(res, prob = TRUE, main = "n=5")
curve(dnorm(x, n * p, sqrt(n * p * (1 - p))), add = TRUE)
n <- 10
res <- rbinom(m, n, p)
hist(res, prob = TRUE, main = "n=10")
curve(dnorm(x, n * p, sqrt(n * p * (1 - p))), add = TRUE) #已近常態分配
n <- 25
res <- rbinom(m, n, p)
hist(res, prob = TRUE, main = "n=25")
curve(dnorm(x, n * p, sqrt(n * p * (1 - p))), add = TRUE)
plot(0, 0, type = "n", xlim = c(0, 1), ylim = c(0, 13.5), xlab = "Density estimate",
ylab = "f(x)")
m <- 500
a <- 0
b <- 1
n <- 2
for (i in 1:m) res[i] <- mean(runif(n, a, b)) #store the sample mean
lines(density(res), lwd = 2)
n <- 10
for (i in 1:m) res[i] <- mean(runif(n, a, b))
lines(density(res), lwd = 2)
n <- 100
for (i in 1:m) {
res[i] <- mean(runif(n, a, b))
lines(density(res), lwd = 2)
}