R, Rstudio 설치하기

R 설치하기

R에서 쓰는 표현식, 연산자

r=2
circle = pi*r^2
total=100; n=10
average <- total/n
5**2
## [1] 25
(1+2)*3
## [1] 9

R에서 모든 데이터는 벡터다

변수에 데이타를 넣는 방법

x=1
y<-2
a=c(1,2,3)
a
## [1] 1 2 3
a[2]
## [1] 2
b=1:10
b[9]
## [1] 9
c=seq(5)
d=seq(1,3,0.25)
e=c(a,b)
f=rep(a,3)
f
## [1] 1 2 3 1 2 3 1 2 3

기본데이터형

데이터 구조

왜 벡터로 되어있을까?

a=1:5
a=c(a,101,102)
b=c(a,103)
b
## [1]   1   2   3   4   5 101 102 103
Height=c(168,173,160,145,180)
Weight=c(80,65,92,53,76)
BMI=Weight/(Height/100)^2
BMI
## [1] 28.34 21.72 35.94 25.21 23.46
a=1:10
b=c(1,-1)
a+b
##  [1]  2  1  4  3  6  5  8  7 10  9
b=10
a=c=2
b^2+c(1,-1)*4*a*c
## [1] 116  84

행렬이란 무엇인가 ?

a=matrix(1:12,ncol=3)
a
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
b=LETTERS[1:12]
b
##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L"
b=matrix(b,ncol=4)
b
##      [,1] [,2] [,3] [,4]
## [1,] "A"  "D"  "G"  "J" 
## [2,] "B"  "E"  "H"  "K" 
## [3,] "C"  "F"  "I"  "L"
b[3,2]
## [1] "F"
b[2,4]
## [1] "K"
b[2,]
## [1] "B" "E" "H" "K"
b[,3]
## [1] "G" "H" "I"
length(b)
## [1] 12

범주형자료

sex=c("Male","Female","Female","Male","Male")
sex=factor(sex)
sex
## [1] Male   Female Female Male   Male  
## Levels: Female Male
str(sex)
##  Factor w/ 2 levels "Female","Male": 2 1 1 2 2
levels(sex)
## [1] "Female" "Male"
length(sex)
## [1] 5
smoking=c(1,1,2,3,1)
smoking=factor(smoking)
levels(smoking)=c("none","ex-smoker","smoker")
smoking
## [1] none      none      ex-smoker smoker    none     
## Levels: none ex-smoker smoker

배열 (array)

데이타프레임

mydata=data.frame(height=Height,weight=Weight,sex=sex,smoking=smoking)
mydata
##   height weight    sex   smoking
## 1    168     80   Male      none
## 2    173     65 Female      none
## 3    160     92 Female ex-smoker
## 4    145     53   Male    smoker
## 5    180     76   Male      none
mydata[3,]
##   height weight    sex   smoking
## 3    160     92 Female ex-smoker
mydata[,1]
## [1] 168 173 160 145 180
mydata$height
## [1] 168 173 160 145 180
mydata$BMI=mydata$weight*10000/(mydata$height)^2
mydata
##   height weight    sex   smoking   BMI
## 1    168     80   Male      none 28.34
## 2    173     65 Female      none 21.72
## 3    160     92 Female ex-smoker 35.94
## 4    145     53   Male    smoker 25.21
## 5    180     76   Male      none 23.46
str(mydata)
## 'data.frame':    5 obs. of  5 variables:
##  $ height : num  168 173 160 145 180
##  $ weight : num  80 65 92 53 76
##  $ sex    : Factor w/ 2 levels "Female","Male": 2 1 1 2 2
##  $ smoking: Factor w/ 3 levels "none","ex-smoker",..: 1 1 2 3 1
##  $ BMI    : num  28.3 21.7 35.9 25.2 23.5
summary(mydata)
##      height        weight         sex         smoking       BMI      
##  Min.   :145   Min.   :53.0   Female:2   none     :3   Min.   :21.7  
##  1st Qu.:160   1st Qu.:65.0   Male  :3   ex-smoker:1   1st Qu.:23.5  
##  Median :168   Median :76.0              smoker   :1   Median :25.2  
##  Mean   :165   Mean   :73.2                            Mean   :26.9  
##  3rd Qu.:173   3rd Qu.:80.0                            3rd Qu.:28.3  
##  Max.   :180   Max.   :92.0                            Max.   :35.9
plot(mydata)

plot of chunk unnamed-chunk-13

진짜 데이타를 가지고 실습

data(mtcars)
head(mtcars,10)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
##       mpg            cyl            disp             hp       
##  Min.   :10.4   Min.   :4.00   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.4   1st Qu.:4.00   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.2   Median :6.00   Median :196.3   Median :123.0  
##  Mean   :20.1   Mean   :6.19   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.8   3rd Qu.:8.00   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.9   Max.   :8.00   Max.   :472.0   Max.   :335.0  
##       drat            wt            qsec            vs       
##  Min.   :2.76   Min.   :1.51   Min.   :14.5   Min.   :0.000  
##  1st Qu.:3.08   1st Qu.:2.58   1st Qu.:16.9   1st Qu.:0.000  
##  Median :3.69   Median :3.33   Median :17.7   Median :0.000  
##  Mean   :3.60   Mean   :3.22   Mean   :17.8   Mean   :0.438  
##  3rd Qu.:3.92   3rd Qu.:3.61   3rd Qu.:18.9   3rd Qu.:1.000  
##  Max.   :4.93   Max.   :5.42   Max.   :22.9   Max.   :1.000  
##        am             gear           carb     
##  Min.   :0.000   Min.   :3.00   Min.   :1.00  
##  1st Qu.:0.000   1st Qu.:3.00   1st Qu.:2.00  
##  Median :0.000   Median :4.00   Median :2.00  
##  Mean   :0.406   Mean   :3.69   Mean   :2.81  
##  3rd Qu.:1.000   3rd Qu.:4.00   3rd Qu.:4.00  
##  Max.   :1.000   Max.   :5.00   Max.   :8.00
mtcars$mpg
##  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
## [29] 15.8 19.7 15.0 21.4
stem(mtcars$mpg)
## 
##   The decimal point is at the |
## 
##   10 | 44
##   12 | 3
##   14 | 3702258
##   16 | 438
##   18 | 17227
##   20 | 00445
##   22 | 88
##   24 | 4
##   26 | 03
##   28 | 
##   30 | 44
##   32 | 49
hist(mtcars$mpg)

plot of chunk unnamed-chunk-14

boxplot(mtcars$mpg)

plot of chunk unnamed-chunk-14

fivenum(mtcars$mpg)
## [1] 10.40 15.35 19.20 22.80 33.90
quantile(mtcars$mpg)
##    0%   25%   50%   75%  100% 
## 10.40 15.43 19.20 22.80 33.90

데이타의 정렬(order)

order(mtcars$mpg)
##  [1] 15 16 24  7 17 31 14 23 22 29 12 13 11  6  5 10 25 30  1  2  4 32 21
## [24]  3  9  8 27 26 19 28 18 20
mtcars=mtcars[order(mtcars$mpg),]
head(mtcars)
##                      mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Cadillac Fleetwood  10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
## Camaro Z28          13.3   8  350 245 3.73 3.840 15.41  0  0    3    4
## Duster 360          14.3   8  360 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial   14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
## Maserati Bora       15.0   8  301 335 3.54 3.570 14.60  0  1    5    8
rownames(mtcars)
##  [1] "Cadillac Fleetwood"  "Lincoln Continental" "Camaro Z28"         
##  [4] "Duster 360"          "Chrysler Imperial"   "Maserati Bora"      
##  [7] "Merc 450SLC"         "AMC Javelin"         "Dodge Challenger"   
## [10] "Ford Pantera L"      "Merc 450SE"          "Merc 450SL"         
## [13] "Merc 280C"           "Valiant"             "Hornet Sportabout"  
## [16] "Merc 280"            "Pontiac Firebird"    "Ferrari Dino"       
## [19] "Mazda RX4"           "Mazda RX4 Wag"       "Hornet 4 Drive"     
## [22] "Volvo 142E"          "Toyota Corona"       "Datsun 710"         
## [25] "Merc 230"            "Merc 240D"           "Porsche 914-2"      
## [28] "Fiat X1-9"           "Honda Civic"         "Lotus Europa"       
## [31] "Fiat 128"            "Toyota Corolla"
order(rownames(mtcars))
##  [1]  8  1  3  5 24  9  4 18 31 28 10 29 21 15  2 30  6 19 20 25 26 16 13
## [24] 11 12  7 17 27 32 23 14 22
mtcars=mtcars[order(rownames(mtcars)),]
mtcars=mtcars[order(mtcars$mpg,mtcars$wt),]

데이타의 일부 선택(subset)

# 4기통, 6기통, 8기통 중 4,6기통 만 선택 
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
mtcars$cyl<7
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [23]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
mtcars1=mtcars[mtcars$cyl<7,]
table(mtcars1$cyl)
## 
##  4  6 
## 11  7
# subset(data.frame, subset(행), select(열))
mtcars1=subset(mtcars,cyl<7)
mtcars2=subset(mtcars1,select=c(mpg,cyl))

데이타의 요약 ; 테이블만들기, 카이제곱,피셔검정

table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
help(mtcars)
table(mtcars$cyl,mtcars$am)
##    
##      0  1
##   4  3  8
##   6  4  3
##   8 12  2
mtcars$tm=factor(mtcars$am,labels=c("automatic","manual"))
# mtcars$tm=ifelse(mtcars$am==0,"automatic","manual")
str(mtcars)
## 'data.frame':    32 obs. of  12 variables:
##  $ mpg : num  10.4 10.4 13.3 14.3 14.7 15 15.2 15.2 15.5 15.8 ...
##  $ cyl : num  8 8 8 8 8 8 8 8 8 8 ...
##  $ disp: num  472 460 350 360 440 ...
##  $ hp  : num  205 215 245 245 230 335 150 180 150 264 ...
##  $ drat: num  2.93 3 3.73 3.21 3.23 3.54 3.15 3.07 2.76 4.22 ...
##  $ wt  : num  5.25 5.42 3.84 3.57 5.34 ...
##  $ qsec: num  18 17.8 15.4 15.8 17.4 ...
##  $ vs  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ am  : num  0 0 0 0 0 1 0 0 0 1 ...
##  $ gear: num  3 3 3 3 3 5 3 3 3 5 ...
##  $ carb: num  4 4 4 4 4 8 2 3 2 4 ...
##  $ tm  : Factor w/ 2 levels "automatic","manual": 1 1 1 1 1 2 1 1 1 2 ...
result=table(mtcars$cyl,mtcars$tm)
result
##    
##     automatic manual
##   4         3      8
##   6         4      3
##   8        12      2
chisq.test(result)
## Warning: Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  result
## X-squared = 8.741, df = 2, p-value = 0.01265
plot(result)

plot of chunk unnamed-chunk-17

barplot(result,legend=paste(rownames(result),"cyl"))

plot of chunk unnamed-chunk-17

#xtabs(도수~가로+세로)
result1=xtabs(~cyl+tm,data=mtcars)
result1
##    tm
## cyl automatic manual
##   4         3      8
##   6         4      3
##   8        12      2
addmargins(result1)
##      tm
## cyl   automatic manual Sum
##   4           3      8  11
##   6           4      3   7
##   8          12      2  14
##   Sum        19     13  32
chisq.test(result1)
## Warning: Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  result1
## X-squared = 8.741, df = 2, p-value = 0.01265
#fisher.test(result1)

데이타의 요약 : 평균 구하기

plot(mtcars)