Flow Control
x=5;
if(x>3){
print("x > 3")
}else{
print("x <= 3")
}
## [1] "x > 3"
if(x>3) print("x > 3") else print("x <= 3")
## [1] "x > 3"
test = ifelse(x>3,"x > 3","x <= 3")
test
## [1] "x > 3"
x=5;
if(x>3){
print ("x > 3");
} else if (x ==3){
print ("x == 3");
}else{
print("x <= 3");
}
## [1] "x > 3"
switch(2,print("aaa"),print("bbb"),print("ccc"))
## [1] "bbb"
switch("third",first=print("aaa"),second=print("bbb"),third=print("ccc"))
## [1] "ccc"
for(i in 1:10){
print(i);
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
sum=0
for(i in 1:100){
sum= sum+ i;
}
sum
## [1] 5050
sum(1:100)
## [1] 5050
sum = 0;
cnt = 0;
while(cnt <= 100){
sum = sum + cnt;
cnt = cnt + 1;
}
sum
## [1] 5050
mat = matrix(1:9, byrow=TRUE, nrow=3)
for(i in 1:nrow(mat)){
for(j in 1:ncol(mat)){
print(mat[i,j])
}
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
for(i in seq_len(nrow(mat))) {
for(j in seq_len(ncol(mat))) {
print(mat[i, j])
}
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
p86 example
#method1
mat = matrix(rep(1,9^2),nrow = 9)
#mat = matrix(nrow=9,ncol=9)
for(i in 1:nrow(mat)){
for(j in 1:ncol(mat)){
#mat[i,j] = i * j;
#mat[i,j] = paste(i,"*",j,"= ",i*j)
mat[i,j] = sprintf(" %s * %s = %s",i,j,i*j)
}
}
#method2
mat1 = matrix(1:9, nrow = 9);
mat2 = matrix(1:9, nrow = 1);
mat = mat1 %*% mat2;
mat
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 2 3 4 5 6 7 8 9
## [2,] 2 4 6 8 10 12 14 16 18
## [3,] 3 6 9 12 15 18 21 24 27
## [4,] 4 8 12 16 20 24 28 32 36
## [5,] 5 10 15 20 25 30 35 40 45
## [6,] 6 12 18 24 30 36 42 48 54
## [7,] 7 14 21 28 35 42 49 56 63
## [8,] 8 16 24 32 40 48 56 64 72
## [9,] 9 18 27 36 45 54 63 72 81
p90 example
match_func = function(filename){
match = read.table(filename, sep= "|")
mat = matrix(rep(-1,5^2), nrow=5)
rownames(mat) = c("A","B","C","D","E")
colnames(mat) = c("A","B","C","D","E")
for (i in 1:nrow(match)){
mat[match[i,1], match[i,2]] = match[i,3];
}
return(mat)
}
match_func('~/lecture/riii/data/match.txt')
## A B C D E
## A -1 1 3 2 0
## B 2 -1 1 3 0
## C 2 0 -1 1 5
## D 1 1 2 -1 0
## E 1 1 2 3 -1
#general case
match_func = function(filename,header=T,sep='|'){
t = read.table(filename,header=header,sep = sep);
mat = matrix(rep(-1,length(levels(t[,1]))^2),
nrow = length(levels(t[,1])),
dimnames = list( levels(t[,1]), levels(t[,2] ) ));
for(i in 1:nrow(t)){
mat[t[i,1], t[i,2]] = t[i,3];
}
return(mat)
}
match_func('~/lecture/riii/data/match.txt',F)
## A B C D E
## A -1 1 3 2 0
## B 2 -1 1 3 0
## C 2 0 -1 1 5
## D 1 1 2 -1 0
## E 1 1 2 3 -1
lapply sapply apply tapply
x = list(c(1,2,3,4), c(5,6,7,8))
lapply(x, sum)
## [[1]]
## [1] 10
##
## [[2]]
## [1] 26
m1 = matrix(1:4, byrow=TRUE, nrow=2)
m2 = matrix(5:8, byrow=TRUE, nrow=2)
li = list(m1, m2)
lapply(li, mean)
## [[1]]
## [1] 2.5
##
## [[2]]
## [1] 6.5
grades =list(kevin = c(80,60,92), marry = c(56,75,64,84,56), QOO = c(10,20,3,4,10))
lapply(grades, sum)
## $kevin
## [1] 232
##
## $marry
## [1] 335
##
## $QOO
## [1] 47
lapply(grades, mean)
## $kevin
## [1] 77.33333
##
## $marry
## [1] 67
##
## $QOO
## [1] 9.4
lapply(grades, function(e){list(sum = sum(e), min = min(e))})
## $kevin
## $kevin$sum
## [1] 232
##
## $kevin$min
## [1] 60
##
##
## $marry
## $marry$sum
## [1] 335
##
## $marry$min
## [1] 56
##
##
## $QOO
## $QOO$sum
## [1] 47
##
## $QOO$min
## [1] 3
class(lapply(grades, sum))
## [1] "list"
sapply(grades, sum)
## kevin marry QOO
## 232 335 47
class(sapply(grades, sum))
## [1] "numeric"
sapply(li, mean)
## [1] 2.5 6.5
sapply(li,function(e) e[1,])
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
m = matrix(1:4, byrow=TRUE, nrow=2)
apply(m, 1, sum) # rowsums
## [1] 3 7
apply(m, 2, sum) # colsums
## [1] 4 6
rowmeans = apply(m, 1, mean)
colmeans = apply(m, 2, mean)
x = c(80,70,59,88,72,57)
t = c(1,1,2,1,1,2)
tapply(x,t, mean)
## 1 2
## 77.5 58.0
data(iris)
tapply(iris$Sepal.Length, iris$Species, mean)
## setosa versicolor virginica
## 5.006 5.936 6.588
lapply(names(iris[1:4]),function(e){tapply(iris[,e],iris$Species,mean) })
## [[1]]
## setosa versicolor virginica
## 5.006 5.936 6.588
##
## [[2]]
## setosa versicolor virginica
## 3.428 2.770 2.974
##
## [[3]]
## setosa versicolor virginica
## 1.462 4.260 5.552
##
## [[4]]
## setosa versicolor virginica
## 0.246 1.326 2.026
探索性資料分析
表格
#download file:
#download.file("https://github.com/YuHsuanLin/riii/raw/master/Statistics/cdc.Rdata","~/lecture/riii/Statistics/cdc.Rdata")
#import data
#getwd()
setwd("~/lecture/riii")
load("Statistics/cdc.Rdata")
str(cdc)
## 'data.frame': 20000 obs. of 9 variables:
## $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
## $ exerany : num 0 0 1 1 0 1 1 0 0 1 ...
## $ hlthplan: num 1 1 1 1 1 1 1 1 1 1 ...
## $ smoke100: num 0 1 1 0 0 0 0 0 1 0 ...
## $ height : num 70 64 60 66 61 64 71 67 65 70 ...
## $ weight : int 175 125 105 132 150 114 194 170 150 180 ...
## $ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
## $ age : int 77 33 49 42 55 55 31 45 27 44 ...
## $ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
names(cdc)
## [1] "genhlth" "exerany" "hlthplan" "smoke100" "height" "weight"
## [7] "wtdesire" "age" "gender"
#轉換資料類型
cdc$exerany = as.factor(cdc$exerany)
cdc$hlthplan = as.factor(cdc$hlthplan)
cdc$smoke100 = as.factor(cdc$smoke100)
str(cdc)
## 'data.frame': 20000 obs. of 9 variables:
## $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
## $ exerany : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 2 1 1 2 ...
## $ hlthplan: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ smoke100: Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ height : num 70 64 60 66 61 64 71 67 65 70 ...
## $ weight : int 175 125 105 132 150 114 194 170 150 180 ...
## $ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
## $ age : int 77 33 49 42 55 55 31 45 27 44 ...
## $ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
#一維次數分配表
table(cdc$exerany)
##
## 0 1
## 5086 14914
#相對比例
table(cdc$exerany) / length(cdc$exerany)
##
## 0 1
## 0.2543 0.7457
#二維次數分配表
table(cdc$gender,cdc$exerany)
##
## 0 1
## m 2149 7420
## f 2937 7494
#連續型資料作表
table(cdc$height)
##
## 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 2 1 1 2 2 7 3 4 17 20 51 170 613 594 1272
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
## 1368 1662 1568 1843 1671 1505 1380 1500 1296 1393 784 605 321 189 80
## 78 79 80 81 82 83 84 93
## 43 15 10 3 2 1 1 1
summary(cdc$height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 48.00 64.00 67.00 67.18 70.00 93.00
head(cut(cdc$height,seq(45,95,by=5)))
## [1] (65,70] (60,65] (55,60] (65,70] (60,65] (60,65]
## 10 Levels: (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] ... (90,95]
#包含上界不包含下界
table(cut(cdc$height,seq(45,95,by=5),right=T))
##
## (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] (75,80] (80,85] (85,90]
## 4 18 871 6464 7899 4399 337 7 0
## (90,95]
## 1
#包含下界不包含上界
table(cut(cdc$height,seq(45,95,by=5),right=F))
##
## [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) [85,90)
## 3 15 262 5509 7967 5578 648 17 0
## [90,95)
## 1
## 加上labels(組別名稱)
table(cut(cdc$height,seq(45,95,by=5),right=F,labels=seq(1,length(seq(45,95,by=5))-1)))
##
## 1 2 3 4 5 6 7 8 9 10
## 3 15 262 5509 7967 5578 648 17 0 1
## 把分組後資料存在cdc變數的h_group欄位中
cdc$h_group = cut(cdc$height,seq(45,95,by=5),right=F)
str(cdc)
## 'data.frame': 20000 obs. of 10 variables:
## $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
## $ exerany : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 2 1 1 2 ...
## $ hlthplan: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ smoke100: Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ height : num 70 64 60 66 61 64 71 67 65 70 ...
## $ weight : int 175 125 105 132 150 114 194 170 150 180 ...
## $ wtdesire: int 175 115 105 124 130 114 185 160 130 170 ...
## $ age : int 77 33 49 42 55 55 31 45 27 44 ...
## $ gender : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
## $ h_group : Factor w/ 10 levels "[45,50)","[50,55)",..: 6 4 4 5 4 4 6 5 5 6 ...
apply example
apply(table(cdc$exerany,cdc$genhlth),1,function(e){ e/sum(e) })
##
## 0 1
## excellent 0.14982304 0.26116401
## very good 0.26582776 0.37682714
## good 0.34034605 0.26444951
## fair 0.16850177 0.07791337
## poor 0.07550138 0.01964597
統計量
#集中量數:mean median mode
a = c(100,120,130,110,100,90,80,90,100,110)
sum(a) / 10
## [1] 103
mean(a)
## [1] 103
#有極端值
b = c(a, 10000)
mean(b)
## [1] 1002.727
a
## [1] 100 120 130 110 100 90 80 90 100 110
sort(a)
## [1] 80 90 90 100 100 100 110 110 120 130
median(a)
## [1] 100
sort(b)
## [1] 80 90 90 100 100 100 110 110 120 130 10000
median(b)
## [1] 100
table(c(1,4,4,3))
##
## 1 3 4
## 1 1 2
which.max(table(c(1,4,4,3)))
## 4
## 3
names(which.max(table(c(1,4,4,3))))
## [1] "4"
mean(cdc$weight)
## [1] 169.683
median(cdc$weight)
## [1] 165
as.integer(names(which.max(table(cdc$weight))))
## [1] 160
#離差量數:range IQR variance stardard deviation
a = c(173,162,150,160,155,168,171,185,175,178,182)
sort(a)
## [1] 150 155 160 162 168 171 173 175 178 182 185
range(a)
## [1] 150 185
quantile(a,0.5)
## 50%
## 171
quantile(a,0.25)
## 25%
## 161
quantile(a,0.75)
## 75%
## 176.5
quantile(a,0.75) - quantile(a,0.25)
## 75%
## 15.5
IQR(a)
## [1] 15.5
fivenum(a)
## [1] 150.0 161.0 171.0 176.5 185.0
summary(a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 150.0 161.0 171.0 169.0 176.5 185.0
boxplot(a,horizontal = T)

b = c(a,226)
range(b)
## [1] 150 226
IQR(b)
## [1] 17.5
boxplot(b,horizontal = T)

#全距
range(cdc$weight)
## [1] 68 500
#四分位距
IQR(cdc$weight)
## [1] 50
#變異數
var(cdc$weight)
## [1] 1606.484
#標準差
sqrt(var(cdc$weight))
## [1] 40.08097
sd(cdc$weight)
## [1] 40.08097
#摘要數據
summary(cdc$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 68.0 140.0 165.0 169.7 190.0 500.0
Covariance & Correlation
x = c(160,170,180)
y = c(64, 68, 72)
#計算共變異數
cov_xy = sum((x - mean(x)) * (y - mean(y))) / 2
cov_xy
## [1] 40
cov(x,y)
## [1] 40
#計算相關係數
cor_xy = cov(x,y) / (sd(x) * sd(y))
cor_xy
## [1] 1
cor(x,y)
## [1] 1
plot(x,y)

#example1:
data(mtcars)
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
cov(mtcars)
## mpg cyl disp hp drat
## mpg 36.324103 -9.1723790 -633.09721 -320.732056 2.19506351
## cyl -9.172379 3.1895161 199.66028 101.931452 -0.66836694
## disp -633.097208 199.6602823 15360.79983 6721.158669 -47.06401915
## hp -320.732056 101.9314516 6721.15867 4700.866935 -16.45110887
## drat 2.195064 -0.6683669 -47.06402 -16.451109 0.28588135
## wt -5.116685 1.3673710 107.68420 44.192661 -0.37272073
## qsec 4.509149 -1.8868548 -96.05168 -86.770081 0.08714073
## vs 2.017137 -0.7298387 -44.37762 -24.987903 0.11864919
## am 1.803931 -0.4657258 -36.56401 -8.320565 0.19015121
## gear 2.135685 -0.6491935 -50.80262 -6.358871 0.27598790
## carb -5.363105 1.5201613 79.06875 83.036290 -0.07840726
## wt qsec vs am gear
## mpg -5.1166847 4.50914919 2.01713710 1.80393145 2.1356855
## cyl 1.3673710 -1.88685484 -0.72983871 -0.46572581 -0.6491935
## disp 107.6842040 -96.05168145 -44.37762097 -36.56401210 -50.8026210
## hp 44.1926613 -86.77008065 -24.98790323 -8.32056452 -6.3588710
## drat -0.3727207 0.08714073 0.11864919 0.19015121 0.2759879
## wt 0.9573790 -0.30548161 -0.27366129 -0.33810484 -0.4210806
## qsec -0.3054816 3.19316613 0.67056452 -0.20495968 -0.2804032
## vs -0.2736613 0.67056452 0.25403226 0.04233871 0.0766129
## am -0.3381048 -0.20495968 0.04233871 0.24899194 0.2923387
## gear -0.4210806 -0.28040323 0.07661290 0.29233871 0.5443548
## carb 0.6757903 -1.89411290 -0.46370968 0.04637097 0.3266129
## carb
## mpg -5.36310484
## cyl 1.52016129
## disp 79.06875000
## hp 83.03629032
## drat -0.07840726
## wt 0.67579032
## qsec -1.89411290
## vs -0.46370968
## am 0.04637097
## gear 0.32661290
## carb 2.60887097
cor(mtcars)
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
cov(mtcars[1:3])
## mpg cyl disp
## mpg 36.324103 -9.172379 -633.0972
## cyl -9.172379 3.189516 199.6603
## disp -633.097208 199.660282 15360.7998
#example2:
setwd('~/lecture/riii')
gdp = read.csv("data/gdp.csv",header=TRUE)
#gdp = gdp[1:15,]
gdp = gdp[complete.cases(gdp),]
gdp$GDP = as.numeric(sub(",", "", gdp$GDP))
gdp$Export = as.numeric(sub(",", "", gdp$Export))
cor(gdp$Export, gdp$GDP)
## [1] 0.982525
統計圖
#屬貭資料: 長條圖、圓餅圖
barplot(table(cdc$smoke100))

?barplot
barplot(table(cdc$smoke100),xlab='有無吸菸',ylab='人數',main='title',col='blue',family="Songti SC")

pie(table(cdc$smoke100))

pie(table(cdc$genhlth))

pie(table(cdc$genhlth),col = rainbow(5))

#加上各分類比例
pct = round(table(cdc$genhlth) / length(cdc$genhlth) *100,1)
labels = paste(names(pct),pct,"%")
pie(table(cdc$genhlth), labels = labels)

gender_smokers = table(cdc$gender,cdc$smoke100)
mosaicplot(gender_smokers)

#屬量資料: 直方圖、盒鬚圖、莖葉圖
hist(cdc$age)

par(mfrow=c(3,1))
hist(cdc$height)
hist(cdc$height,breaks = 30)
hist(cdc$height,breaks = 50)

stem(cdc$age)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 88888888888888888888888888888888888888888888888888888888888888888888+509
## 2 | 00000000000000000000000000000000000000000000000000000000000000000000+1694
## 2 | 55555555555555555555555555555555555555555555555555555555555555555555+1835
## 3 | 00000000000000000000000000000000000000000000000000000000000000000000+1954
## 3 | 55555555555555555555555555555555555555555555555555555555555555555555+2154
## 4 | 00000000000000000000000000000000000000000000000000000000000000000000+2109
## 4 | 55555555555555555555555555555555555555555555555555555555555555555555+1842
## 5 | 00000000000000000000000000000000000000000000000000000000000000000000+1578
## 5 | 55555555555555555555555555555555555555555555555555555555555555555555+1224
## 6 | 00000000000000000000000000000000000000000000000000000000000000000000+969
## 6 | 55555555555555555555555555555555555555555555555555555555555555555555+975
## 7 | 00000000000000000000000000000000000000000000000000000000000000000000+889
## 7 | 55555555555555555555555555555555555555555555555555555555555555555555+614
## 8 | 00000000000000000000000000000000000000000000000000000000000000000000+344
## 8 | 55555555555555555555555555555555555555555555555555555555566666666666+69
## 9 | 00000000011111111112222223333333444
## 9 | 556799
tmp=sample(cdc$age,100)
stem(sample(cdc$age,100))
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 88889
## 2 | 122234455677777788899
## 3 | 011222333444577999
## 4 | 00001111233555667889
## 5 | 00011224566789
## 6 | 001135577799
## 7 | 24568
## 8 | 1125
## 9 | 4
?stem
stem(sample(cdc$age,100),scale=2)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 8889
## 2 | 112333444
## 2 | 555577889
## 3 | 00011233444
## 3 | 5678888899
## 4 | 00001122223334444
## 4 | 55666678889
## 5 | 01133334
## 5 | 5556777799
## 6 | 2
## 6 | 588
## 7 | 02444
## 7 |
## 8 | 03
par(mfrow=c(1,1))
boxplot(cdc$weight)

boxplot(cdc$weight, horizontal=TRUE)

boxplot(cdc$weight ~ cdc$gender)

boxplot(cdc$height ~ cdc$gender)

bmi = (cdc$weight/cdc$height^2) * 703
boxplot(bmi ~ cdc$genhlth)

#觀察兩組資料間關係:點散布圖
plot(cdc$weight, cdc$height)

plot(cdc$weight, cdc$wtdesire)

png(filename='test123.png')
plot(cdc$weight, cdc$height)
dev.off()
## quartz_off_screen
## 2
data explorer
#install.packages('DataExplorer')
library('DataExplorer')
help(package = 'DataExplorer')
introduce(iris)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 150 5 1 4 0
## total_missing_values total_observations memory_usage
## 1 0 750 7256
dummify(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species_setosa
## 1 5.1 3.5 1.4 0.2 1
## 2 4.9 3.0 1.4 0.2 1
## 3 4.7 3.2 1.3 0.2 1
## 4 4.6 3.1 1.5 0.2 1
## 5 5.0 3.6 1.4 0.2 1
## 6 5.4 3.9 1.7 0.4 1
## 7 4.6 3.4 1.4 0.3 1
## 8 5.0 3.4 1.5 0.2 1
## 9 4.4 2.9 1.4 0.2 1
## 10 4.9 3.1 1.5 0.1 1
## 11 5.4 3.7 1.5 0.2 1
## 12 4.8 3.4 1.6 0.2 1
## 13 4.8 3.0 1.4 0.1 1
## 14 4.3 3.0 1.1 0.1 1
## 15 5.8 4.0 1.2 0.2 1
## 16 5.7 4.4 1.5 0.4 1
## 17 5.4 3.9 1.3 0.4 1
## 18 5.1 3.5 1.4 0.3 1
## 19 5.7 3.8 1.7 0.3 1
## 20 5.1 3.8 1.5 0.3 1
## 21 5.4 3.4 1.7 0.2 1
## 22 5.1 3.7 1.5 0.4 1
## 23 4.6 3.6 1.0 0.2 1
## 24 5.1 3.3 1.7 0.5 1
## 25 4.8 3.4 1.9 0.2 1
## 26 5.0 3.0 1.6 0.2 1
## 27 5.0 3.4 1.6 0.4 1
## 28 5.2 3.5 1.5 0.2 1
## 29 5.2 3.4 1.4 0.2 1
## 30 4.7 3.2 1.6 0.2 1
## 31 4.8 3.1 1.6 0.2 1
## 32 5.4 3.4 1.5 0.4 1
## 33 5.2 4.1 1.5 0.1 1
## 34 5.5 4.2 1.4 0.2 1
## 35 4.9 3.1 1.5 0.2 1
## 36 5.0 3.2 1.2 0.2 1
## 37 5.5 3.5 1.3 0.2 1
## 38 4.9 3.6 1.4 0.1 1
## 39 4.4 3.0 1.3 0.2 1
## 40 5.1 3.4 1.5 0.2 1
## 41 5.0 3.5 1.3 0.3 1
## 42 4.5 2.3 1.3 0.3 1
## 43 4.4 3.2 1.3 0.2 1
## 44 5.0 3.5 1.6 0.6 1
## 45 5.1 3.8 1.9 0.4 1
## 46 4.8 3.0 1.4 0.3 1
## 47 5.1 3.8 1.6 0.2 1
## 48 4.6 3.2 1.4 0.2 1
## 49 5.3 3.7 1.5 0.2 1
## 50 5.0 3.3 1.4 0.2 1
## 51 7.0 3.2 4.7 1.4 0
## 52 6.4 3.2 4.5 1.5 0
## 53 6.9 3.1 4.9 1.5 0
## 54 5.5 2.3 4.0 1.3 0
## 55 6.5 2.8 4.6 1.5 0
## 56 5.7 2.8 4.5 1.3 0
## 57 6.3 3.3 4.7 1.6 0
## 58 4.9 2.4 3.3 1.0 0
## 59 6.6 2.9 4.6 1.3 0
## 60 5.2 2.7 3.9 1.4 0
## 61 5.0 2.0 3.5 1.0 0
## 62 5.9 3.0 4.2 1.5 0
## 63 6.0 2.2 4.0 1.0 0
## 64 6.1 2.9 4.7 1.4 0
## 65 5.6 2.9 3.6 1.3 0
## 66 6.7 3.1 4.4 1.4 0
## 67 5.6 3.0 4.5 1.5 0
## 68 5.8 2.7 4.1 1.0 0
## 69 6.2 2.2 4.5 1.5 0
## 70 5.6 2.5 3.9 1.1 0
## 71 5.9 3.2 4.8 1.8 0
## 72 6.1 2.8 4.0 1.3 0
## 73 6.3 2.5 4.9 1.5 0
## 74 6.1 2.8 4.7 1.2 0
## 75 6.4 2.9 4.3 1.3 0
## 76 6.6 3.0 4.4 1.4 0
## 77 6.8 2.8 4.8 1.4 0
## 78 6.7 3.0 5.0 1.7 0
## 79 6.0 2.9 4.5 1.5 0
## 80 5.7 2.6 3.5 1.0 0
## 81 5.5 2.4 3.8 1.1 0
## 82 5.5 2.4 3.7 1.0 0
## 83 5.8 2.7 3.9 1.2 0
## 84 6.0 2.7 5.1 1.6 0
## 85 5.4 3.0 4.5 1.5 0
## 86 6.0 3.4 4.5 1.6 0
## 87 6.7 3.1 4.7 1.5 0
## 88 6.3 2.3 4.4 1.3 0
## 89 5.6 3.0 4.1 1.3 0
## 90 5.5 2.5 4.0 1.3 0
## 91 5.5 2.6 4.4 1.2 0
## 92 6.1 3.0 4.6 1.4 0
## 93 5.8 2.6 4.0 1.2 0
## 94 5.0 2.3 3.3 1.0 0
## 95 5.6 2.7 4.2 1.3 0
## 96 5.7 3.0 4.2 1.2 0
## 97 5.7 2.9 4.2 1.3 0
## 98 6.2 2.9 4.3 1.3 0
## 99 5.1 2.5 3.0 1.1 0
## 100 5.7 2.8 4.1 1.3 0
## 101 6.3 3.3 6.0 2.5 0
## 102 5.8 2.7 5.1 1.9 0
## 103 7.1 3.0 5.9 2.1 0
## 104 6.3 2.9 5.6 1.8 0
## 105 6.5 3.0 5.8 2.2 0
## 106 7.6 3.0 6.6 2.1 0
## 107 4.9 2.5 4.5 1.7 0
## 108 7.3 2.9 6.3 1.8 0
## 109 6.7 2.5 5.8 1.8 0
## 110 7.2 3.6 6.1 2.5 0
## 111 6.5 3.2 5.1 2.0 0
## 112 6.4 2.7 5.3 1.9 0
## 113 6.8 3.0 5.5 2.1 0
## 114 5.7 2.5 5.0 2.0 0
## 115 5.8 2.8 5.1 2.4 0
## 116 6.4 3.2 5.3 2.3 0
## 117 6.5 3.0 5.5 1.8 0
## 118 7.7 3.8 6.7 2.2 0
## 119 7.7 2.6 6.9 2.3 0
## 120 6.0 2.2 5.0 1.5 0
## 121 6.9 3.2 5.7 2.3 0
## 122 5.6 2.8 4.9 2.0 0
## 123 7.7 2.8 6.7 2.0 0
## 124 6.3 2.7 4.9 1.8 0
## 125 6.7 3.3 5.7 2.1 0
## 126 7.2 3.2 6.0 1.8 0
## 127 6.2 2.8 4.8 1.8 0
## 128 6.1 3.0 4.9 1.8 0
## 129 6.4 2.8 5.6 2.1 0
## 130 7.2 3.0 5.8 1.6 0
## 131 7.4 2.8 6.1 1.9 0
## 132 7.9 3.8 6.4 2.0 0
## 133 6.4 2.8 5.6 2.2 0
## 134 6.3 2.8 5.1 1.5 0
## 135 6.1 2.6 5.6 1.4 0
## 136 7.7 3.0 6.1 2.3 0
## 137 6.3 3.4 5.6 2.4 0
## 138 6.4 3.1 5.5 1.8 0
## 139 6.0 3.0 4.8 1.8 0
## 140 6.9 3.1 5.4 2.1 0
## 141 6.7 3.1 5.6 2.4 0
## 142 6.9 3.1 5.1 2.3 0
## 143 5.8 2.7 5.1 1.9 0
## 144 6.8 3.2 5.9 2.3 0
## 145 6.7 3.3 5.7 2.5 0
## 146 6.7 3.0 5.2 2.3 0
## 147 6.3 2.5 5.0 1.9 0
## 148 6.5 3.0 5.2 2.0 0
## 149 6.2 3.4 5.4 2.3 0
## 150 5.9 3.0 5.1 1.8 0
## Species_versicolor Species_virginica
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## 11 0 0
## 12 0 0
## 13 0 0
## 14 0 0
## 15 0 0
## 16 0 0
## 17 0 0
## 18 0 0
## 19 0 0
## 20 0 0
## 21 0 0
## 22 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 27 0 0
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 0 0
## 34 0 0
## 35 0 0
## 36 0 0
## 37 0 0
## 38 0 0
## 39 0 0
## 40 0 0
## 41 0 0
## 42 0 0
## 43 0 0
## 44 0 0
## 45 0 0
## 46 0 0
## 47 0 0
## 48 0 0
## 49 0 0
## 50 0 0
## 51 1 0
## 52 1 0
## 53 1 0
## 54 1 0
## 55 1 0
## 56 1 0
## 57 1 0
## 58 1 0
## 59 1 0
## 60 1 0
## 61 1 0
## 62 1 0
## 63 1 0
## 64 1 0
## 65 1 0
## 66 1 0
## 67 1 0
## 68 1 0
## 69 1 0
## 70 1 0
## 71 1 0
## 72 1 0
## 73 1 0
## 74 1 0
## 75 1 0
## 76 1 0
## 77 1 0
## 78 1 0
## 79 1 0
## 80 1 0
## 81 1 0
## 82 1 0
## 83 1 0
## 84 1 0
## 85 1 0
## 86 1 0
## 87 1 0
## 88 1 0
## 89 1 0
## 90 1 0
## 91 1 0
## 92 1 0
## 93 1 0
## 94 1 0
## 95 1 0
## 96 1 0
## 97 1 0
## 98 1 0
## 99 1 0
## 100 1 0
## 101 0 1
## 102 0 1
## 103 0 1
## 104 0 1
## 105 0 1
## 106 0 1
## 107 0 1
## 108 0 1
## 109 0 1
## 110 0 1
## 111 0 1
## 112 0 1
## 113 0 1
## 114 0 1
## 115 0 1
## 116 0 1
## 117 0 1
## 118 0 1
## 119 0 1
## 120 0 1
## 121 0 1
## 122 0 1
## 123 0 1
## 124 0 1
## 125 0 1
## 126 0 1
## 127 0 1
## 128 0 1
## 129 0 1
## 130 0 1
## 131 0 1
## 132 0 1
## 133 0 1
## 134 0 1
## 135 0 1
## 136 0 1
## 137 0 1
## 138 0 1
## 139 0 1
## 140 0 1
## 141 0 1
## 142 0 1
## 143 0 1
## 144 0 1
## 145 0 1
## 146 0 1
## 147 0 1
## 148 0 1
## 149 0 1
## 150 0 1
plot_missing(iris)

plot_histogram(iris)

plot_boxplot(iris,by='Species')

plot_correlation(iris[-5])

plot_prcomp(iris)


#create_report(iris)