#install.packages("here")
#ルートディレクトリからの相対パスを使えるようにする
library(here)
## here() starts at /cloud/project
#Rマークダウンがある場所をルートディレクトリからの相対パスとして表現
i_am("INTRO/qss01.Rmd")
## here() starts at /cloud/project
#作業ディレクトリの変更
setwd(here("INTRO"))
getwd()
## [1] "/cloud/project/INTRO"
#install.packages("swirl")
#library(swirl)
#install_course_github("kosukeimai","qss-swirl")
library(swirl)
## 
## | Hi! Type swirl() when you are ready to begin.
#swirl()

1.3 Rの基礎

1.3.1 算術演算

5 + 3
## [1] 8
5 - 3
## [1] 2
5 / 3
## [1] 1.666667
5 ^ 3
## [1] 125
5 * (10-3)
## [1] 35
sqrt(4)
## [1] 2

1.3.2 オブジェクト

result <- 5 + 3
result
## [1] 8
result <- 5 - 3
result
## [1] 2
ls()
## [1] "result"
kosuke <- "instructor"
kosuke
## [1] "instructor"
kosuke <- "instructor and author"
kosuke
## [1] "instructor and author"
Result <- "5"
Result
## [1] "5"
result
## [1] 2
class(result)
## [1] "numeric"
Result
## [1] "5"
class(Result)
## [1] "character"
class(sqrt)
## [1] "function"

1.3.3 ベクトル

world.pop <- c(2525779, 3026003, 3691173, 4449049, 5320817, 6127700, 6916183)
world.pop
## [1] 2525779 3026003 3691173 4449049 5320817 6127700 6916183
pop.first <- c(2525779, 3026003, 3691173)
pop.second <- c(4449049, 5320817, 6127700)
pop.all <- c(pop.first, pop.second, 6916183)
pop.all
## [1] 2525779 3026003 3691173 4449049 5320817 6127700 6916183
world.pop[2]
## [1] 3026003
world.pop[c(2, 4)]
## [1] 3026003 4449049
world.pop[c(4, 2)]
## [1] 4449049 3026003
world.pop[-3]
## [1] 2525779 3026003 4449049 5320817 6127700 6916183
pop.million <- world.pop / 1000
pop.million
## [1] 2525.779 3026.003 3691.173 4449.049 5320.817 6127.700 6916.183
pop.rate <-  world.pop / world.pop[1]
pop.rate
## [1] 1.000000 1.198047 1.461400 1.761456 2.106604 2.426063 2.738238
pop.increase <- world.pop[-1] -  world.pop[-7]
percent.increase <- (pop.increase / world.pop[-7]) *100
percent.increase
## [1] 19.80474 21.98180 20.53212 19.59448 15.16464 12.86752
percent.increase[c(1,2)] <- c(20, 22)
percent.increase
## [1] 20.00000 22.00000 20.53212 19.59448 15.16464 12.86752

1.3.4 関数

length(world.pop)
## [1] 7
min(world.pop)
## [1] 2525779
max(world.pop)
## [1] 6916183
range(world.pop)
## [1] 2525779 6916183
mean(world.pop)
## [1] 4579529
sum(world.pop) / length(world.pop)
## [1] 4579529
year <- seq(from = 1950, to = 2010, by =10)
year
## [1] 1950 1960 1970 1980 1990 2000 2010
seq(to = 2010, by =10, from = 1950)
## [1] 1950 1960 1970 1980 1990 2000 2010
seq(from = 2010, to = 1950, by =-10)
## [1] 2010 2000 1990 1980 1970 1960 1950
2008:2012
## [1] 2008 2009 2010 2011 2012
2012:2008
## [1] 2012 2011 2010 2009 2008
names(world.pop)
## NULL
names(world.pop) <- year
names(world.pop)
## [1] "1950" "1960" "1970" "1980" "1990" "2000" "2010"
world.pop
##    1950    1960    1970    1980    1990    2000    2010 
## 2525779 3026003 3691173 4449049 5320817 6127700 6916183
my.summary <- function(x) {
  s.out <- sum(x)
  l.out <- length(x)
  m.out <- s.out / l.out
  out <- c(s.out, l.out, m.out)
  names(out) <- c("sum", "length", "mean")
  return(out)
}

z <- 1:10
my.summary(z)
##    sum length   mean 
##   55.0   10.0    5.5
my.summary(world.pop)
##      sum   length     mean 
## 32056704        7  4579529

例)通学時間

time <- c(40, 90, 50, 50, 90)
my.summary(time)
##    sum length   mean 
##    320      5     64
namae <- c("N", "I", "K", "S", "F")
names(time) <- namae
time
##  N  I  K  S  F 
## 40 90 50 50 90

1.3.5 データファイル

UNpop <- read.csv("UNpop.csv")
class(UNpop)
## [1] "data.frame"
load("UNpop.RData")
names(UNpop)
## [1] "year"      "world.pop"
nrow(UNpop)
## [1] 7
ncol(UNpop)
## [1] 2
dim(UNpop)
## [1] 7 2
summary(UNpop)
##       year        world.pop      
##  Min.   :1950   Min.   :2525779  
##  1st Qu.:1965   1st Qu.:3358588  
##  Median :1980   Median :4449049  
##  Mean   :1980   Mean   :4579529  
##  3rd Qu.:1995   3rd Qu.:5724258  
##  Max.   :2010   Max.   :6916183
UNpop$world.pop
## [1] 2525779 3026003 3691173 4449049 5320817 6127700 6916183
UNpop[,"world.pop"]
## [1] 2525779 3026003 3691173 4449049 5320817 6127700 6916183
UNpop[c(1,2,3),]
##   year world.pop
## 1 1950   2525779
## 2 1960   3026003
## 3 1970   3691173
UNpop[1:3,"year"]
## [1] 1950 1960 1970
UNpop$world.pop[seq(from = 1, to = nrow(UNpop), by = 2)]
## [1] 2525779 3691173 5320817 6916183
world.pop <- c(UNpop$world.pop, NA)
world.pop
## [1] 2525779 3026003 3691173 4449049 5320817 6127700 6916183      NA
mean(world.pop)
## [1] NA
mean(world.pop, na.rm = TRUE)
## [1] 4579529

1.3.6 オブジェクトを保存する

getwd()
## [1] "/cloud/project/INTRO"
save.image("Chapter1.RData")

save(UNpop, file = "Chapter1.RData")
save(world.pop, year, file = "Chapter1.RData")

write.csv(UNpop, file = "UNpop.csv")

load("Chapter1.RData")

1.3.7 パッケージ

#install.packages("foreign")
library(foreign)
read.dta("UNpop.dta")
##   X year world_pop
## 1 1 1950  2525.779
## 2 2 1960  3026.003
## 3 3 1970  3691.173
## 4 4 1980  4449.049
## 5 5 1990  5320.817
## 6 6 2000  6127.700
## 7 7 2010  6916.183
write.dta(UNpop, file = "UNpop.dta")

1.3.8 プログラミングと学習のコツ

source("UNpop.R")
#install.packages("lintr")
library(lintr)
lint("UNpop.R")
## /cloud/project/INTRO/UNpop.R:10:1: style: [object_name_linter] Variable and function name style should match snake_case or symbols.
## UNpop <- read.csv("UNpop.csv")
## ^~~~~
## /cloud/project/INTRO/UNpop.R:11:1: style: [object_name_linter] Variable and function name style should match snake_case or symbols.
## UNpop$world.pop <- UNpop$world.pop / 1000 #100万人単位の人口
## ^~~~~
## /cloud/project/INTRO/UNpop.R:13:1: style: [trailing_blank_lines_linter] Trailing blank lines are superfluous.
## 
## ^
## /cloud/project/INTRO/UNpop.R:14:1: style: [trailing_blank_lines_linter] Trailing blank lines are superfluous.
## 
## ^

テキスト

  • 今井耕介 (2018)『社会科学のためのデータ分析入門 (上)』岩波書店。