第4章「予測」

values <- c(2, 4, 6)
n <- length(values)

results <- rep(NA, n)

for (i in 1:n) {
  results[i] <- values[i] * 2 
  
  cat(values[i], "times 2 is equal to", results[i], "\n")
  
}

## 2 times 2 is equal to 4 
## 4 times 2 is equal to 8 
## 6 times 2 is equal to 12

i <- 1
x <- values[i] * 2
cat(values[i], "times 2 is equal to", x, "\n")

## 2 times 2 is equal to 4

data <- data.frame("a" = 1:2, "b" = c("hi", "hey"), "c" = 3:4)
results <- rep(NA, 3)
for (i in 1:3) {
  cat("iteration", i, "\n")
  results[i] <- median(data[, i])
}

## iteration 1 
## iteration 2

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]):
## argument is not numeric or logical: returning NA

## iteration 3

operation <- "add"
if (operation == "add") {
  cat("I will perform addition 4 + 4\n")
  4 + 4
  }

## I will perform addition 4 + 4

## [1] 8

if  (operation == "multiply") {
    cat("I will performmultiplication 4 * 4\n")
  4 * 4
}

operation <- "multiply"
if (operation == "add") {
  cat("I will perform addition 4 + 4")
  4 + 4
} else {
    cat("I will performmultiplication 4 * 4")
  4 * 4
}

## I will performmultiplication 4 * 4

## [1] 16

operation <- "subtract"
if (operation == "add") {
  cat("I will perform addition 4 + 4 \n")
  4 + 4
} else if (operation == "multiply") {
  cat("I will perform multiplication 4 * 4 \n")
  4 * 4
} else {
cat("`", operation, "' is invalid. Use either `add' or `multiply'.\n",
        sep = "")
}

## `subtract' is invalid. Use either `add' or `multiply'.

values <- 1:5
n <- length(values)
results <- rep(NA, n)
for (i in 1:n) {
  x <- values[i]
  r <- x %% 2
  if (r == 0) {
    cat(x, "is even and I will perform addition", x, "+", x, "\n")
  results[i] <- x + x
  } else {
    cat(x, "is odd and I will perform multiplication", x, "*", x, "\n")
  results[i] <- x * x
  }
}

## 1 is odd and I will perform multiplication 1 * 1 
## 2 is even and I will perform addition 2 + 2 
## 3 is odd and I will perform multiplication 3 * 3 
## 4 is even and I will perform addition 4 + 4 
## 5 is odd and I will perform multiplication 5 * 5

results

## [1]  1  4  9  8 25

library(readr)
pres08 <- read_csv("pres08.csv")

## Rows: 51 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state.name, state
## dbl (3): Obama, McCain, EV
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

polls08 <- read_csv("polls08.csv")

## Rows: 1332 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): state, Pollster
## dbl  (2): Obama, McCain
## date (1): middate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

polls08$margin <- polls08$Obama - polls08$McCain

pres08$margin <- pres08$Obama - pres08$McCain

x <- as.Date("2008-11-04")

y <- as.Date("2008/9/1")

x - y

## Time difference of 64 days

polls08$middate <- as.Date(polls08$middate)
polls08$DaysToElection <- as.Date("2008-11-04") - polls08$middate

poll.pred <- rep(NA, 51)

st.names <- unique(polls08$state)

names(poll.pred) <- as.character(st.names)

for (i in 1:51) {
  state.data <- subset(polls08, subset = (state == st.names[i]))
  latest <- subset(state.data, DaysToElection == min(DaysToElection))
  poll.pred[i] <- mean(latest$margin)
}


# poll.pred <- as.data.frame(poll.pred)

errors <- pres08$margin - poll.pred
names(errors) <- st.names
errors

##          AL          AK          AZ          AR          CA          CO 
##   4.0000000  -2.0000000  -6.5000000 -13.0000000   0.0000000   2.0000000 
##          CT          DC          DE          FL          GA          HI 
##  -2.0000000  16.0000000  -5.0000000   1.0000000   0.0000000   4.0000000 
##          ID          IL          IN          IA          KS          KY 
##   3.0000000   3.0000000   6.0000000  -7.0000000   6.0000000   0.0000000 
##          LA          ME          MD          MA          MI          MN 
## -16.0000000   5.0000000   3.0000000   9.0000000   0.0000000   7.0000000 
##          MS          MO          MT          NE          NV          NH 
##   0.0000000  -2.0000000   0.6666667   4.0000000   4.0000000   5.0000000 
##          NJ          NM          NY          NC          ND          OH 
##   5.0000000   8.0000000  -2.0000000   2.0000000  -7.0000000  -2.0000000 
##          OK          OR          PA          RI          SC          SD 
##  -3.0000000   5.0000000   5.0000000   6.0000000  -1.0000000   1.0000000 
##          TN          TX          UT          VT          VA          WA 
##   1.0000000  -1.0000000  -4.0000000  16.0000000   2.0000000   6.0000000 
##          WV          WI          WY 
##  -3.0000000  -2.0000000  -7.0000000

mean(errors)

## [1] 1.062092

sqrt(mean(errors^2))

## [1] 5.90894

第4章「予測」

Ayumu Tanaka

2024-06-10