require(UsingR)
require(ggplot2)
require(tidyr)
require(dplyr)
require(plotly)
require(coefplot)

1 非線形最小二乗法

load("../data/wifi.rdata")
head(wifi)
##   Distance        x         y
## 1 21.87559 28.60461 68.429628
## 2 67.68198 90.29680 29.155945
## 3 79.25427 83.48934  0.371902
## 4 44.73767 61.39133 80.258138
## 5 39.71233 19.55080 83.805855
## 6 56.65595 71.93928 65.551340
ggplot(wifi, aes(x=x, y=y, color=Distance)) + geom_point() +
 scale_color_gradient2(low="blue", mid="white", high="red",
 midpoint=mean(wifi$Distance))

wifiMod1 <- nls(Distance ~ sqrt((betaX - x)^2 + (betaY - y)^2),
data = wifi, start = list(betaX = 50, betaY = 50))
summary(wifiMod1)
## 
## Formula: Distance ~ sqrt((betaX - x)^2 + (betaY - y)^2)
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## betaX   17.851      1.289   13.85   <2e-16 ***
## betaY   52.906      1.476   35.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.73 on 198 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 3.846e-06
ggplot(wifi, aes(x = x, y = y, color = Distance)) + geom_point() +
 scale_color_gradient2(low = "blue", mid = "white", high = "red",
 midpoint = mean(wifi$Distance)) +
 geom_point(data = as.data.frame(t(coef(wifiMod1))),
 aes(x = betaX, y = betaY), size = 5, color = "green")

青い点が密集しているところにあるのでよい推定であることが分かる.

2 スプライン

data("diamond")
ggplot(diamond, aes(x=carat, y=price)) + geom_point()

diaSpline1 <- smooth.spline(x=diamond$carat, y=diamond$price )
df <- data.frame(x=diaSpline1$x, y=diaSpline1$y)
ggplot(diamond, aes(x=carat, y=price))+ geom_point() +
  geom_line(data=df, aes(x=x,y=y)) + scale_x_continuous(limits = c(0.1, 0.36)) + 
  scale_y_continuous(limits = c(0, 1100))

3 一般化加法モデル

4 決定木

creditNames <- c("Checking", "Duration", "CreditHistory",
"Purpose", "CreditAmount", "Savings", "Employment",
"InstallmentRate", "GenderMarital", "OtherDebtors",
"YearsAtResidence", "RealEstate", "Age",
"OtherInstallment", "Housing", "ExistingCredits", "Job",
"NumLiable", "Phone", "Foreign", "Credit")
theURL <- "http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
credit <- read.table(theURL, sep = " ",header=FALSE,col.names=creditNames,stringsAsFactors = FALSE)
require(rpart)
## Loading required package: rpart
require(rpart.plot)
## Loading required package: rpart.plot
creditTree <- rpart(Credit ~ CreditAmount + Age +
+ CreditHistory + Employment, data = credit)
head(credit)
##   Checking Duration CreditHistory Purpose CreditAmount Savings Employment
## 1      A11        6           A34     A43         1169     A65        A75
## 2      A12       48           A32     A43         5951     A61        A73
## 3      A14       12           A34     A46         2096     A61        A74
## 4      A11       42           A32     A42         7882     A61        A74
## 5      A11       24           A33     A40         4870     A61        A73
## 6      A14       36           A32     A46         9055     A65        A73
##   InstallmentRate GenderMarital OtherDebtors YearsAtResidence RealEstate
## 1               4           A93         A101                4       A121
## 2               2           A92         A101                2       A121
## 3               2           A93         A101                3       A121
## 4               2           A93         A103                4       A122
## 5               3           A93         A101                4       A124
## 6               2           A93         A101                4       A124
##   Age OtherInstallment Housing ExistingCredits  Job NumLiable Phone
## 1  67             A143    A152               2 A173         1  A192
## 2  22             A143    A152               1 A173         1  A191
## 3  49             A143    A152               1 A172         2  A191
## 4  45             A143    A153               1 A173         2  A191
## 5  53             A143    A153               2 A173         2  A191
## 6  35             A143    A153               1 A172         2  A192
##   Foreign Credit
## 1    A201      1
## 2    A201      2
## 3    A201      1
## 4    A201      1
## 5    A201      2
## 6    A201      1
rpart.plot(creditTree)

5 Random Forest

require(useful)
## Loading required package: useful
require(randomForest)
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:Hmisc':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
# 予測因子と応答変数の matrix を作成する
creditFormula <- Credit ~ CreditHistory + Purpose + Employment +
Duration + Age + CreditAmount
creditX <- build.x(creditFormula, data=credit)
creditY <- build.y(creditFormula, data=credit)
# ランダムフォレストの適用
creditForest <- randomForest(x=creditX, y=creditY)
## Warning in randomForest.default(x = creditX, y = creditY): The response has
## five or fewer unique values. Are you sure you want to do regression?
creditForest
## 
## Call:
##  randomForest(x = creditX, y = creditY) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 7
## 
##           Mean of squared residuals: 0.1850619
##                     % Var explained: 11.88