Rことはじめ

2+3
## [1] 5
3-4
## [1] -1
5*6
## [1] 30
6/3
## [1] 2
sqrt(4)
## [1] 2
obj <- 2
obj
## [1] 2
obj <- 3
obj
## [1] 3
obj2 <- 2
obj3 <- 3
obj2 + obj3
## [1] 5
obj <- c(1,2,3)
obj
## [1] 1 2 3
obj <- c(1:10)
obj
##  [1]  1  2  3  4  5  6  7  8  9 10
obj * 2
##  [1]  2  4  6  8 10 12 14 16 18 20
obj <- matrix(c(1:10),nrow=5)
obj
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10
obj <- matrix(c(1:10),nrow=5,byrow=T)
obj
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6
## [4,]    7    8
## [5,]    9   10
obj *2
##      [,1] [,2]
## [1,]    2    4
## [2,]    6    8
## [3,]   10   12
## [4,]   14   16
## [5,]   18   20
obj <- list(name=c("kosugi","tanaka","suzuki"),
            gender=c("male","female","male"),
            hight=c(170,160),
            weight=c(70.6,80.9,90.6,40.3)
            )
obj
## $name
## [1] "kosugi" "tanaka" "suzuki"
## 
## $gender
## [1] "male"   "female" "male"  
## 
## $hight
## [1] 170 160
## 
## $weight
## [1] 70.6 80.9 90.6 40.3
obj$name
## [1] "kosugi" "tanaka" "suzuki"
obj$weight
## [1] 70.6 80.9 90.6 40.3
str(obj)
## List of 4
##  $ name  : chr [1:3] "kosugi" "tanaka" "suzuki"
##  $ gender: chr [1:3] "male" "female" "male"
##  $ hight : num [1:2] 170 160
##  $ weight: num [1:4] 70.6 80.9 90.6 40.3
obj <- data.frame(
            list(name=c("kosugi","tanaka","suzuki"),
                gender=c(1,2,1),
                hight=c(170,160,170),
                weight=c(70.6,80.9,90.6)
                )
            )
obj
##     name gender hight weight
## 1 kosugi      1   170   70.6
## 2 tanaka      2   160   80.9
## 3 suzuki      1   170   90.6
str(obj)
## 'data.frame':    3 obs. of  4 variables:
##  $ name  : Factor w/ 3 levels "kosugi","suzuki",..: 1 3 2
##  $ gender: num  1 2 1
##  $ hight : num  170 160 170
##  $ weight: num  70.6 80.9 90.6
obj$gender
## [1] 1 2 1
obj$gender <- factor(obj$gender,labels=c("male","female"))
obj
##     name gender hight weight
## 1 kosugi   male   170   70.6
## 2 tanaka female   160   80.9
## 3 suzuki   male   170   90.6
str(obj)
## 'data.frame':    3 obs. of  4 variables:
##  $ name  : Factor w/ 3 levels "kosugi","suzuki",..: 1 3 2
##  $ gender: Factor w/ 2 levels "male","female": 1 2 1
##  $ hight : num  170 160 170
##  $ weight: num  70.6 80.9 90.6
obj$hight
## [1] 170 160 170
obj[3,]
##     name gender hight weight
## 3 suzuki   male   170   90.6
obj[,2]
## [1] male   female male  
## Levels: male female
obj[3,2]
## [1] male
## Levels: male female
obj[3,2] <- NA
obj
##     name gender hight weight
## 1 kosugi   male   170   70.6
## 2 tanaka female   160   80.9
## 3 suzuki   <NA>   170   90.6
summary(obj)
##      name      gender      hight         weight    
##  kosugi:1   male  :1   Min.   :160   Min.   :70.6  
##  suzuki:1   female:1   1st Qu.:165   1st Qu.:75.8  
##  tanaka:1   NA's  :1   Median :170   Median :80.9  
##                        Mean   :167   Mean   :80.7  
##                        3rd Qu.:170   3rd Qu.:85.8  
##                        Max.   :170   Max.   :90.6

Rで描画

Rがもっているサンプルデータセットを使って練習。サンプルデータセットの特徴についてはこちらを参照>> http://goo.gl/7nCscZ

data(VADeaths)
VADeaths
##       Rural Male Rural Female Urban Male Urban Female
## 50-54       11.7          8.7       15.4          8.4
## 55-59       18.1         11.7       24.3         13.6
## 60-64       26.9         20.3       37.0         19.3
## 65-69       41.0         30.9       54.6         35.1
## 70-74       66.0         54.3       71.1         50.0
barplot(VADeaths)

plot of chunk unnamed-chunk-2

barplot(VADeaths,beside=TRUE)

plot of chunk unnamed-chunk-2

別のデータ例。

data(iris)
summary(iris)
##   Sepal.Length   Sepal.Width    Petal.Length   Petal.Width 
##  Min.   :4.30   Min.   :2.00   Min.   :1.00   Min.   :0.1  
##  1st Qu.:5.10   1st Qu.:2.80   1st Qu.:1.60   1st Qu.:0.3  
##  Median :5.80   Median :3.00   Median :4.35   Median :1.3  
##  Mean   :5.84   Mean   :3.06   Mean   :3.76   Mean   :1.2  
##  3rd Qu.:6.40   3rd Qu.:3.30   3rd Qu.:5.10   3rd Qu.:1.8  
##  Max.   :7.90   Max.   :4.40   Max.   :6.90   Max.   :2.5  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
hist(iris$Sepal.Length)

plot of chunk unnamed-chunk-3

plot(iris$Sepal.Length,iris$Sepal.Width)

plot of chunk unnamed-chunk-3

plot(iris$Sepal.Length,iris$Sepal.Width,pch=21,bg=c(2,3,4)[iris$Species])

plot of chunk unnamed-chunk-3

Rで記述統計

mean(iris$Sepal.Length)
## [1] 5.843
median(iris$Sepal.Length)
## [1] 5.8
var(iris$Sepal.Length)
## [1] 0.6857
sd(iris$Sepal.Length)
## [1] 0.8281
cor(iris$Sepal.Length,iris$Sepal.Width)
## [1] -0.1176

データ・ハンドリング

sample <- read.csv("sample_utf8.csv",encoding="utf-8",head=T,na.strings="*")
summary(sample)
##        ID        class      gender        height        weight    
##  Min.   :  1.0   A:34   Min.   :1.0   Min.   :132   Min.   :33.2  
##  1st Qu.: 25.8   B:33   1st Qu.:1.0   1st Qu.:145   1st Qu.:50.6  
##  Median : 50.5   C:33   Median :1.5   Median :150   Median :56.0  
##  Mean   : 50.5          Mean   :1.5   Mean   :151   Mean   :56.8  
##  3rd Qu.: 75.2          3rd Qu.:2.0   3rd Qu.:157   3rd Qu.:63.1  
##  Max.   :100.0          Max.   :2.0   Max.   :172   Max.   :87.0  
##                                                                   
##      kokugo         sansuu          rika          syakai    
##  Min.   :34.0   Min.   :58.0   Min.   :34.0   Min.   :20.0  
##  1st Qu.:55.0   1st Qu.:68.0   1st Qu.:46.5   1st Qu.:40.8  
##  Median :64.0   Median :72.0   Median :51.0   Median :48.0  
##  Mean   :64.5   Mean   :71.5   Mean   :50.5   Mean   :49.4  
##  3rd Qu.:74.0   3rd Qu.:75.5   3rd Qu.:54.0   3rd Qu.:57.2  
##  Max.   :94.0   Max.   :86.0   Max.   :66.0   Max.   :86.0  
##  NA's   :1      NA's   :1      NA's   :1                    
##       eigo     
##  Min.   :25.0  
##  1st Qu.:49.0  
##  Median :61.0  
##  Mean   :59.9  
##  3rd Qu.:71.0  
##  Max.   :94.0  
## 
head(sample)
##   ID class gender height weight kokugo sansuu rika syakai eigo
## 1  1     A      1  170.2  64.16     34     74   43     20   28
## 2  2     B      1  165.7  75.63     82     63   44     54   72
## 3  3     C      1  157.8  62.64     50     74   55     26   44
## 4  4     A      1  161.6  69.57     57     75   55     46   44
## 5  5     B      1  161.1  60.23     74     73   54     41   65
## 6  6     C      1  156.2  54.99     NA     58   38     47   58
tail(sample)
##      ID class gender height weight kokugo sansuu rika syakai eigo
## 95   95     B      2  141.1  53.34     65     77   53     51   52
## 96   96     C      2  142.9  47.64     65     66   46     44   52
## 97   97     A      2  146.7  63.39     61     77   53     53   50
## 98   98     B      2  147.0  60.59     91     78   52     76   92
## 99   99     C      2  151.0  63.11     44     82   66     23   38
## 100 100     A      2  137.8  47.48     56     64   52     36   60
subdata <- subset(sample,complete.cases(sample))
summary(subdata)
##        ID        class      gender         height        weight    
##  Min.   :  1.0   A:33   Min.   :1.00   Min.   :132   Min.   :33.2  
##  1st Qu.: 28.0   B:33   1st Qu.:1.00   1st Qu.:144   1st Qu.:50.1  
##  Median : 52.0   C:31   Median :2.00   Median :150   Median :56.1  
##  Mean   : 51.8          Mean   :1.52   Mean   :151   Mean   :56.9  
##  3rd Qu.: 76.0          3rd Qu.:2.00   3rd Qu.:158   3rd Qu.:63.1  
##  Max.   :100.0          Max.   :2.00   Max.   :172   Max.   :87.0  
##      kokugo         sansuu          rika          syakai    
##  Min.   :34.0   Min.   :58.0   Min.   :34.0   Min.   :20.0  
##  1st Qu.:55.0   1st Qu.:68.0   1st Qu.:47.0   1st Qu.:40.0  
##  Median :63.0   Median :72.0   Median :51.0   Median :48.0  
##  Mean   :64.3   Mean   :71.6   Mean   :50.6   Mean   :49.3  
##  3rd Qu.:74.0   3rd Qu.:76.0   3rd Qu.:54.0   3rd Qu.:57.0  
##  Max.   :94.0   Max.   :86.0   Max.   :66.0   Max.   :86.0  
##       eigo     
##  Min.   :25.0  
##  1st Qu.:49.0  
##  Median :61.0  
##  Mean   :59.7  
##  3rd Qu.:71.0  
##  Max.   :94.0
subdata2 <- subset(sample,select=c("gender","class","eigo","syakai"))
summary(subdata2)
##      gender    class       eigo          syakai    
##  Min.   :1.0   A:34   Min.   :25.0   Min.   :20.0  
##  1st Qu.:1.0   B:33   1st Qu.:49.0   1st Qu.:40.8  
##  Median :1.5   C:33   Median :61.0   Median :48.0  
##  Mean   :1.5          Mean   :59.9   Mean   :49.4  
##  3rd Qu.:2.0          3rd Qu.:71.0   3rd Qu.:57.2  
##  Max.   :2.0          Max.   :94.0   Max.   :86.0
subdata2$total <- subdata2$eigo+subdata2$syakai
hist(subdata2$total)

plot of chunk unnamed-chunk-5

quantile(subdata2$total,prob=c(0,0.25,0.5,0.75,1))
##    0%   25%   50%   75%  100% 
##  48.0  91.0 109.0 124.5 168.0
subdata2$grade <- ifelse(subdata2$total>=109.0,1,2)
subdata2$grade <- factor(subdata2$grade,labels=c("good","poor"))
plot(subdata2$eigo,subdata2$syakai,pch=21,bg=c(2,3)[subdata2$grade])

plot of chunk unnamed-chunk-5

subdata2$grade2 <- ifelse(subdata2$total>=124.5,1,
                    ifelse(subdata2$total>109.0,2,
                       ifelse(subdata2$total>91.0,3,4)))
subdata2$grade2 <- factor(subdata2$grade2,labels=c("Great","Good","Poor","Bad"))
plot(subdata2$eigo,subdata2$syakai,pch=21,bg=c(2,3,4,5)[subdata2$grade2])

plot of chunk unnamed-chunk-5

本日の課題

  • サンプルデータのBMIを計算してください。計算した値は,BMIという変数を作って保存してください。
  • BMI値の判定基準は一般的には、18.5未満で「やせ」、18.5以上25未満で「標準」、 25以上30未満で「肥満」、30以上で「高度肥満」と判定されるそうです。この基準にしたがって,サンプルを分類してください。分類した変数は,BMI2という変数名で保存してください。
  • サンプルデータの身長と体重の散布図を描いてください。また,その散布図をBMI2のランク別に塗り分けてください。