零、開始之前

一、R的環境介紹

二、開始吧:介紹基本運算子與變數指派

x <- 5566
x
## [1] 5566
x <- "不能亡"
x
## [1] "不能亡"
rm(x)
rm(list = ls())
#加減乘除
1+1
## [1] 2
1-1
## [1] 0
1*2
## [1] 2
1/2
## [1] 0.5
#進階運算子
2^10
## [1] 1024
5%%2
## [1] 1
#邏輯運算子
5==5
## [1] TRUE
3>5
## [1] FALSE
1!=1
## [1] FALSE
sqrt(2)    # 開根號
## [1] 1.414214
sum(1:10)  # 總和
## [1] 55
sd(1:10)   # 標準差
## [1] 3.02765
median(1:10)  # 中位數
## [1] 5.5
mean(1:10)  # 平均數
## [1] 5.5
var(1:10) # 變異數
## [1] 9.166667
round(var(1:10),digits = 3) # 四捨五入到第三位數
## [1] 9.167

三、基本資料屬性

R所使用的數值屬性大概分為下列幾個大類,我們可以用測量尺度的方式來想像

# Try it!
class(2)
## [1] "numeric"
class("2")
## [1] "character"
class(2L)
## [1] "integer"
class(2==2)
## [1] "logical"
class(2+2i)
## [1] "complex"

同場加映:

x <- c(4,4,3,2,2,1,4,2)
y <- factor(x)
factor(x, labels = c("非常不滿意", "不滿意", "滿意", "非常滿意")) # 可自訂 Level 的名稱。
## [1] 非常滿意   非常滿意   滿意       不滿意     不滿意     非常不滿意
## [7] 非常滿意   不滿意    
## Levels: 非常不滿意 不滿意 滿意 非常滿意
factor(x, ordered = TRUE)
## [1] 4 4 3 2 2 1 4 2
## Levels: 1 < 2 < 3 < 4
sum(x)  # sum(y)
## [1] 22

四、R的資料格式與結構

在瞭解單個變數的屬性之後,我們開始介紹資料結構。資料結構就像是容器一樣,將我們剛剛前面提到的變數或是數值排列成特定的格式,依照複雜程度由低到高大概可以分為:

x <- 5566
length(x)
## [1] 1
x <- 5566
y <- "得第一"
z <- c(x,y)
z
## [1] "5566"   "得第一"
length(z)
## [1] 2
x <- 1:10
y <- letters[1:10]
cbind(x,y)
##       x    y  
##  [1,] "1"  "a"
##  [2,] "2"  "b"
##  [3,] "3"  "c"
##  [4,] "4"  "d"
##  [5,] "5"  "e"
##  [6,] "6"  "f"
##  [7,] "7"  "g"
##  [8,] "8"  "h"
##  [9,] "9"  "i"
## [10,] "10" "j"
rbind(x,y)
##   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## x "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" 
## y "a"  "b"  "c"  "d"  "e"  "f"  "g"  "h"  "i"  "j"
seq(1,10)
##  [1]  1  2  3  4  5  6  7  8  9 10
seq(1,10,2)
## [1] 1 3 5 7 9
x <- seq(1,10,2)
rep(x,3)
##  [1] 1 3 5 7 9 1 3 5 7 9 1 3 5 7 9
x[5]
## [1] 9
matrix(data=1:6,nrow=2,byrow=F,dimnames = list(c("row1","row2"),
                                               c("col.1","col.2","col.3")))
##      col.1 col.2 col.3
## row1     1     3     5
## row2     2     4     6
matrix(data=1:6,nrow=2,byrow=T,dimnames = list(c("row1","row2"),
                                               c("col.1","col.2","col.3")))
##      col.1 col.2 col.3
## row1     1     2     3
## row2     4     5     6
help("matrix")
mat <- matrix(1:6,nrow=2,byrow=T)
nrow(mat)
## [1] 2
ncol(mat)
## [1] 3
dim(mat)
## [1] 2 3
rownames(mat) <- c("row1","row2")
colnames(mat) <- c("col.1","col.2","col.3")
mat
##      col.1 col.2 col.3
## row1     1     2     3
## row2     4     5     6
mat[1,2]
## [1] 2
mat["row1","col.2"]
## [1] 2
mat[1,]  # 第一列所有
## col.1 col.2 col.3 
##     1     2     3
mat[,2]  # 第二欄所有
## row1 row2 
##    2    5
data.frame(name=c("Rory","文峰","EJ","yen","丞堯","Sam","Anna","Ting","承彥"),age=c(18,25,24,24,24,24,23,23,20),height=c(168,172,165,156,170,168,160,158,168))
##   name age height
## 1 Rory  18    168
## 2 文峰  25    172
## 3   EJ  24    165
## 4  yen  24    156
## 5 丞堯  24    170
## 6  Sam  24    168
## 7 Anna  23    160
## 8 Ting  23    158
## 9 承彥  20    168
#another way
name <- c("Rory","文峰","EJ","yen","丞堯","Sam","Anna","Ting","承彥")
age <- c(18,25,24,24,24,24,23,23,20)
height <- c(168,172,165,156,170,168,160,158,168)
data.frame(name=name,age=age,height=height)
##   name age height
## 1 Rory  18    168
## 2 文峰  25    172
## 3   EJ  24    165
## 4  yen  24    156
## 5 丞堯  24    170
## 6  Sam  24    168
## 7 Anna  23    160
## 8 Ting  23    158
## 9 承彥  20    168
df<- data.frame(name=c("Rory","文峰","EJ","yen","丞堯","Sam","Anna","Ting","承彥"),age=c(18,25,24,24,24,24,23,23,20),height=c(168,172,165,156,170,168,160,158,168))
summary(df)
##       name        age            height   
##  丞堯   :1   Min.   :18.00   Min.   :156  
##  承彥   :1   1st Qu.:23.00   1st Qu.:160  
##  文峰   :1   Median :24.00   Median :168  
##  Anna   :1   Mean   :22.78   Mean   :165  
##  EJ     :1   3rd Qu.:24.00   3rd Qu.:168  
##  Rory   :1   Max.   :25.00   Max.   :172  
##  (Other):3
str(df)
## 'data.frame':    9 obs. of  3 variables:
##  $ name  : Factor w/ 9 levels "丞堯","承彥",..: 6 3 5 9 1 7 4 8 2
##  $ age   : num  18 25 24 24 24 24 23 23 20
##  $ height: num  168 172 165 156 170 168 160 158 168
head(df)
##   name age height
## 1 Rory  18    168
## 2 文峰  25    172
## 3   EJ  24    165
## 4  yen  24    156
## 5 丞堯  24    170
## 6  Sam  24    168
vec.1 <- "這是一個列表說明"
mat.1 <- matrix(1:15,nrow=3,byrow = T)
df.1 <- data.frame(name=c("Rory","文峰","EJ","yen","Sam","Anna"),age=c(18,25,24,24,24,23),
           height=c(168,172,165,158,168,160))
list.1 <- list(vec.1,mat.1,df.1)
names(list.1)<-c("向量","矩陣","資料框架")
list.1[[1]]
## [1] "這是一個列表說明"
list.1[["向量"]]
## [1] "這是一個列表說明"
list.1[[2]][3,1]
## [1] 11
list.1[[3]][2,]
##   name age height
## 2 文峰  25    172
list.1[[1]] <- NULL
list.1[["向量"]]  # 原先位置在第一個的向量被刪除了
## NULL

五、資料清理與運用

介紹完資料屬性與資料結構,想必大家對於R的運用有個初步的瞭解,找出特定的數值並處理呢? 主要可以有下列三個步驟: - 利用[]取出位置 - 利用邏輯值TUREFALSE - 利用名稱

x <- c(1,4,5,6,7,8,12)
x[c(1,3,5)] # 透過位置取
## [1] 1 5 7
x[c(T,F,T,F,T,F,F)]
## [1] 1 5 7
x[which(x%%2!=0)] # 找出奇數
## [1] 1 5 7
x[which(x%%2==0)] # 找出偶數
## [1]  4  6  8 12
which(c(T,F,T,T,F))
## [1] 1 3 4
which(c(T,T,T,F,T))
## [1] 1 2 3 5
x <- 1:10
x[-c(1,3,5,7,9)]
## [1]  2  4  6  8 10
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x <- x[-c(1,3,5,7,9)]
x
## [1]  2  4  6  8 10

QUIZ!!

請找出1~100之間,無法被2與3整除的數字,並指派為一個新的向量

x <- 1:100
y <-  x[which(x%%2!=0 & x%%3 !=0)]
y
##  [1]  1  5  7 11 13 17 19 23 25 29 31 35 37 41 43 47 49 53 55 59 61 65 67
## [24] 71 73 77 79 83 85 89 91 95 97
x <- c(1,3,5,7)
class(x)
## [1] "numeric"
x <- as.integer(x)
class(x)
## [1] "integer"
x <- as.character(x)
class(x)
## [1] "character"
x <- as.numeric(x)
class(x)
## [1] "numeric"
y <- c("你","好","嗎")
y <- as.numeric(y)
## Warning: 強制變更過程中產生了 NA
y
## [1] NA NA NA
name <- c("Rory","文峰","EJ","yen","Sam","Anna")
age <- c(18,25,24,24,24,23)
height <- c(168,172,165,158,168,160)
df<- data.frame(name=name,age=age,height=height)
df$name[6]
## [1] Anna
## Levels: 文峰 Anna EJ Rory Sam yen
df$age[6]
## [1] 23
df$height[6]
## [1] 160
df[6,]
##   name age height
## 6 Anna  23    160
#install.packages("datasets")
library(datasets)
data(package = .packages(all.available = TRUE))
## Warning in data(package = .packages(all.available = TRUE)): datasets have
## been moved from package 'base' to package 'datasets'
## Warning in data(package = .packages(all.available = TRUE)): datasets have
## been moved from package 'stats' to package 'datasets'
?airquality
dt.1 <-  airquality
str(dt.1)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(dt.1)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
is.na(dt.1)
##        Ozone Solar.R  Wind  Temp Month   Day
##   [1,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [2,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [3,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [4,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [5,]  TRUE    TRUE FALSE FALSE FALSE FALSE
##   [6,] FALSE    TRUE FALSE FALSE FALSE FALSE
##   [7,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [8,] FALSE   FALSE FALSE FALSE FALSE FALSE
##   [9,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [10,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [11,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [12,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [13,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [14,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [15,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [16,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [17,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [18,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [19,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [20,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [21,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [22,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [23,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [24,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [25,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [26,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [27,]  TRUE    TRUE FALSE FALSE FALSE FALSE
##  [28,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [29,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [30,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [31,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [32,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [33,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [34,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [35,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [36,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [37,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [38,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [39,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [40,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [41,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [42,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [43,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [44,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [45,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [46,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [47,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [48,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [49,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [50,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [51,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [52,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [53,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [54,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [55,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [56,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [57,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [58,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [59,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [60,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [61,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [62,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [63,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [64,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [65,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [66,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [67,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [68,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [69,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [70,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [71,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [72,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [73,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [74,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [75,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [76,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [77,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [78,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [79,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [80,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [81,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [82,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [83,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [84,]  TRUE   FALSE FALSE FALSE FALSE FALSE
##  [85,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [86,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [87,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [88,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [89,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [90,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [91,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [92,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [93,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [94,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [95,] FALSE   FALSE FALSE FALSE FALSE FALSE
##  [96,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [97,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [98,] FALSE    TRUE FALSE FALSE FALSE FALSE
##  [99,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [100,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [101,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [102,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [103,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [104,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [105,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [106,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [107,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [108,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [109,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [110,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [111,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [112,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [113,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [114,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [115,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [116,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [117,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [118,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [119,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [120,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [121,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [122,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [123,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [124,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [125,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [126,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [127,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [128,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [129,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [130,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [131,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [132,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [133,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [134,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [135,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [136,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [137,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [138,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [139,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [140,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [141,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [142,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [143,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [144,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [145,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [146,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [147,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [148,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [149,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [150,]  TRUE   FALSE FALSE FALSE FALSE FALSE
## [151,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [152,] FALSE   FALSE FALSE FALSE FALSE FALSE
## [153,] FALSE   FALSE FALSE FALSE FALSE FALSE
dt.1<- na.omit(dt.1)
dt.2<- dt.1[which(dt.1$Month==7 | dt.1$Month==9),]
dt.2[which(dt.2$Solar.R>mean(dt.2$Solar.R)),]
##     Ozone Solar.R Wind Temp Month Day
## 62    135     269  4.1   84     7   1
## 63     49     248  9.2   85     7   2
## 64     32     236  9.2   81     7   3
## 67     40     314 10.9   83     7   6
## 68     77     276  5.1   88     7   7
## 69     97     267  6.3   92     7   8
## 70     97     272  5.7   92     7   9
## 73     10     264 14.3   73     7  12
## 77     48     260  6.9   81     7  16
## 78     35     274 10.3   82     7  17
## 79     61     285  6.3   84     7  18
## 81     63     220 11.5   85     7  20
## 85     80     294  8.6   86     7  24
## 86    108     223  8.0   85     7  25
## 89     82     213  7.4   88     7  28
## 90     50     275  7.4   86     7  29
## 91     64     253  7.4   83     7  30
## 92     59     254  9.2   81     7  31
## 125    78     197  5.1   92     9   2
## 130    20     252 10.9   80     9   7
## 131    23     220 10.3   78     9   8
## 132    21     230 10.9   75     9   9
## 133    24     259  9.7   73     9  10
## 134    44     236 14.9   81     9  11
## 135    21     259 15.5   76     9  12
## 136    28     238  6.3   77     9  13
## 139    46     237  6.9   78     9  16
## 140    18     224 13.8   67     9  17
## 142    24     238 10.3   68     9  19
## 143    16     201  8.0   82     9  20
## 144    13     238 12.6   64     9  21
## 149    30     193  6.9   70     9  26
## 153    20     223 11.5   68     9  30

六、流程控制與迴圈

在進行流程控制之前,我們必須先瞭解「判斷式」,判斷式就是讓程式判斷後會吐出TRUE or FALSE,再藉由TRUE or FALSE 的情況進行流程控制

'得第一'=='巧芯巧芯得第一'
## [1] FALSE
3>7
## [1] FALSE
5>3 & 5>4
## [1] TRUE
5>3 | 5>7
## [1] TRUE

當你今天想要做某些事情,而你需要設定一些條件的時候,就可以通過設條件式與指令的方式來達成。

if(判斷){#do something} else if (判斷){#do something} else{#do something}

age <- 20
if(age > 18){
print("你可以喝酒")
}
## [1] "你可以喝酒"
age <- 16
if(age >18){
print("你可以喝酒")
}

age <- 16
if(age>18){
print("你可以喝酒")
}else{
print("你不能喝酒")
}
## [1] "你不能喝酒"
age <- round(runif(1, -1000,20))

if(age>18){
  print("你可以喝酒")
}else if(age<0){print("年齡不會有負數")}else{
  print("你不能喝酒")
}
## [1] "年齡不會有負數"

當你需要同時要求多個數值時(例如同時處理很多列的數值)做相同條件的事情時,你可以透過迴圈(for-loop)指令搭配前面講的流程控制指令來完成。

for (i in 1:10){
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
all <- c()
for (i in 1:10){
  all <- c(all,i*2+1)
}
all
##  [1]  3  5  7  9 11 13 15 17 19 21
# 挑選出1到100中的偶數
all <- c()
for(i in 1:100){
if(i%%2==0){
all <- c(all,i)
}
}
all
##  [1]   2   4   6   8  10  12  14  16  18  20  22  24  26  28  30  32  34
## [18]  36  38  40  42  44  46  48  50  52  54  56  58  60  62  64  66  68
## [35]  70  72  74  76  78  80  82  84  86  88  90  92  94  96  98 100

-while的應用,while後面接的是一個條件,當這個條件為TRUE的時候才會執行下列指令。 - whileif很常寫出殊途同歸的結果

# 計算1~100的積為何
i <- 1
result <- 0
while(i<101){
result <- result + i 
i <-  i+1
}
#break
for (i in 1:100){
  if (i > 10) break
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
#next
for (i in 1:100){
  if (i > 10 && i <50) next
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 50
## [1] 51
## [1] 52
## [1] 53
## [1] 54
## [1] 55
## [1] 56
## [1] 57
## [1] 58
## [1] 59
## [1] 60
## [1] 61
## [1] 62
## [1] 63
## [1] 64
## [1] 65
## [1] 66
## [1] 67
## [1] 68
## [1] 69
## [1] 70
## [1] 71
## [1] 72
## [1] 73
## [1] 74
## [1] 75
## [1] 76
## [1] 77
## [1] 78
## [1] 79
## [1] 80
## [1] 81
## [1] 82
## [1] 83
## [1] 84
## [1] 85
## [1] 86
## [1] 87
## [1] 88
## [1] 89
## [1] 90
## [1] 91
## [1] 92
## [1] 93
## [1] 94
## [1] 95
## [1] 96
## [1] 97
## [1] 98
## [1] 99
## [1] 100

七、綜合練習!!!

首先大家可以先把內建的data叫出來,是一個關於鳶尾花的資料集iris - practice <- iris 首先我們同樣先對於這個資料集作一些檢視,包含head()str()summary() 接著因為他是dataframe,可以點開檢視一下資料集的樣式 接著以下是今天的綜合練習,會用到今天教的一些技巧。 請大家花?分鐘完成

# answer
# 00
practice <- iris
head(practice)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(practice)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(practice)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
# 01
practice$Species <- as.character(practice$Species)
practice[,5] <- as.character(practice[,5])
practice[,'Species'] <- as.character(practice[,'Species'])

# 02
sd(practice$Sepal.Length)
## [1] 0.8280661
sd(practice[,1])
## [1] 0.8280661
sd(practice[,'Sepal.Length'])
## [1] 0.8280661
# 03
t1 <- practice[,2]
t1 <- practice[,'Sepal.Width']
t1 <- practice$Sepal.Width
t1 <- practice[,c(F,T,F,F,F)]

# 04
names(t1) <- 1:length(t1)
t1[c(7,21)]
##   7  21 
## 3.4 3.4
t1[c('7','21')]
##   7  21 
## 3.4 3.4
# 05
leng <- 5.5
if(leng>mean(practice$Petal.Length)){
  print("超過平均")
}else if(leng<mean(practice$Petal.Length)&leng>0){
  print("小於平均")
}else{
  print("不會有負數")
}
## [1] "超過平均"
#06
result <- c()
for(i in 1:4){
  var1 <- var(practice[,i])
  var1 <- round(var1,digits = 2)
  result <- c(result,var1)
}
result
## [1] 0.69 0.19 3.12 0.58
# apply函數可以更快的做出結果
apply(practice[1:4],2,var)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##    0.6856935    0.1899794    3.1162779    0.5810063

八、在上多變量之前

csv_sample <- read.csv("sample.csv")
require(readxl)
## Loading required package: readxl
xls_sample <- read_excel("sample.xlsx")
require(foreign)
## Loading required package: foreign
sav_sample<- read.spss("TEDS2016_indQ_sample.sav",to.data.frame = T,use.value.labels = F)
#sav_sample<- read.spss("TEDS2016_indQ_sample.sav",to.data.frame = T,use.value.labels = T)
load("sample.RData")
require(xlsx)
## Loading required package: xlsx
write.csv(csv_sample,file = "sample1.csv")
write.xlsx(xls_sample,file= "sample1.xlsx")
require(haven)
## Loading required package: haven
write_sav(sav_sample,"sample1.sav")
save(datlis,file = "sample1.Rdata")

九、迴圈能幹嘛?為什麼要用R來麻煩自己

rm(list = ls())
load("sample.RData")
data<- datlis[[5]]  # 2017年共有7530列資料
vars <- c("gender","age","edu") # 只要性別、年齡、教育程度
df <- data[which(names(data) %in% vars)] # 切割次資料集

for(i in 0:15L){
   a <- i*500+1
   b <- i*500+500
 if(b>nrow(df)){
  b <- nrow(df)
 }
assign(paste0("dataset",i),df[a:b,])
print(paste("dataset",i,"From row",a,"to row",b))
 }
## [1] "dataset 0 From row 1 to row 500"
## [1] "dataset 1 From row 501 to row 1000"
## [1] "dataset 2 From row 1001 to row 1500"
## [1] "dataset 3 From row 1501 to row 2000"
## [1] "dataset 4 From row 2001 to row 2500"
## [1] "dataset 5 From row 2501 to row 3000"
## [1] "dataset 6 From row 3001 to row 3500"
## [1] "dataset 7 From row 3501 to row 4000"
## [1] "dataset 8 From row 4001 to row 4500"
## [1] "dataset 9 From row 4501 to row 5000"
## [1] "dataset 10 From row 5001 to row 5500"
## [1] "dataset 11 From row 5501 to row 6000"
## [1] "dataset 12 From row 6001 to row 6500"
## [1] "dataset 13 From row 6501 to row 7000"
## [1] "dataset 14 From row 7001 to row 7500"
## [1] "dataset 15 From row 7501 to row 7530"
#Save all data
rm(data,datlis,df,a,b,i,vars)  # 把一些不要的刪掉
files <- mget(ls())
for (i in 1:length(files)){
  write.csv(files[[i]], paste(names(files[i]),".csv", sep = ""),fileEncoding ="BIG5")
}