Simple Circle Example

numSamples = 200 # total number of observations
radius = 10 # radius of the outer curcle
noise = 0.0001 # amount of noise to be added to the data
d = matrix(0,ncol = 3, nrow = numSamples) # matrix to store our generated data

# Generate positive points inside the circle.
for (i in 1:(numSamples/2) ) {
  r = runif(1,0, radius * 0.4);
  angle = runif(1,0, 2 * pi);
  x = r * sin(angle);
  y = r * cos(angle);
  noiseX = runif(1,-radius, radius) * noise;
  noiseY = runif(1,-radius, radius) * noise;
  d[i,] = c(0,x,y)
}

# Generate negative points outside the circle.
for (i in (numSamples/2+1):numSamples ) {
  r = runif(1,radius * 0.8, radius);
  angle = runif(1,0, 2 * pi);
  x = r * sin(angle);
  y = r * cos(angle);
  noiseX = runif(1,-radius, radius) * noise;
  noiseY = runif(1,-radius, radius) * noise;
  d[i,] = c(1,x,y)
}
colnames(d) = c("label", "x1", "x2")
head(d)
##      label         x1         x2
## [1,]     0  2.1936551  0.9791607
## [2,]     0  1.7829806 -1.9216326
## [3,]     0  1.4988476 -3.3364821
## [4,]     0 -2.4023070  0.8302288
## [5,]     0 -0.7221943  3.4918414
## [6,]     0  1.3623500  0.5882464
plot(d[,2],d[,3], col=d[,1]+2, pch=16, xlab="x1", ylab="x2")
x1 = seq(-11,11,length.out = 100)

# Plot lineas that sepearate once class (red) from another (green)
lines(x1, -x1 - 6); text(-4,-3,1)
lines(x1, -x1 + 6); text(4,3,2)
lines(x1,  x1 - 6); text(4,-3,3)
lines(x1,  x1 + 6); text(-3,4,4)

sigmoid  = function(z)
  return(exp(z)/(1+exp(z)))

# Define hidden layer of our neural network
features = function(x1,x2) {
  z1 =  6 + x1 + x2; a1 = sigmoid(z1)
  z2 =  6 - x1 - x2; a2 = sigmoid(z2)
  z3 =  6 - x1 + x2; a3 = sigmoid(z3)
  z4 =  6 + x1 - x2; a4 = sigmoid(z4)
  return(c(a1,a2,a3,a4))
}

# Calculate prediction (classification) using our neural network
predict_prob = function(x){
  x1 = x[1]; x2 = x[2]
  z = features(x1,x2)
  # print(z)
  mu = sum(z) - 3.1
  # print(mu)
  sigmoid(mu)
}
# Try our model
predict_prob(c(0,10))
## [1] 0.2565405
predict_prob(c(0,0))
## [1] 0.7089128
# Generade a grid of points
x1 = seq(-11,11,length.out = 100)
x2 = seq(-11,11,length.out = 100)

# Generate al combinations of (x1,x2) values 
gr = as.matrix(expand.grid(x1,x2))
dim(gr)
## [1] 10000     2
# Calculate prediciton for each point in the grid
yhat = apply(gr,1,predict_prob)
length(yhat)
## [1] 10000
# Plot everything together
image(x1,x2,matrix(yhat,ncol = 100), col = heat.colors(20,0.7))
lines(d[,2],d[,3], col=d[,1]+2, pch=16, xlab="x1", ylab="x2", type='p')
lines(x1, -x1 - 6); text(-4,-3,1)
lines(x1, -x1 + 6); text(4,3,2)
lines(x1,  x1 - 6); text(4,-3,3)
lines(x1,  x1 + 6); text(-3,4,4)

Zagat

The dataset zagats.csv provides price and quality variables from the 2014 Zagat Guide of restaurants in New York City (http://www.zagat.com/new-york-city). There are n = 114 restaurants in the sample.

zagat = read.csv("./data/zagat.csv")

The four variables are given by: Food Zagat’s food rating (up to a maximum of 30). Anything in the range 25 and above is excellent.

Data summary

knitr::kable(summary(zagat))
food decor service price
Min. :14.00 Min. : 2.00 Min. :10.00 Min. :11.00
1st Qu.:18.00 1st Qu.:14.00 1st Qu.:16.00 1st Qu.:25.00
Median :20.00 Median :16.50 Median :18.00 Median :32.50
Mean :19.61 Mean :16.58 Mean :17.77 Mean :33.32
3rd Qu.:21.00 3rd Qu.:20.00 3rd Qu.:20.00 3rd Qu.:41.00
Max. :27.00 Max. :28.00 Max. :26.00 Max. :65.00
knitr::kable(cor(zagat))
food decor service price
food 1.0000000 0.2133755 0.7066029 0.5991454
decor 0.2133755 1.0000000 0.5904655 0.6691655
service 0.7066029 0.5904655 1.0000000 0.7527678
price 0.5991454 0.6691655 0.7527678 1.0000000

Plot price versus each of the quality variables

par(mfrow=c(1,3))
plot(price~food,main="Price versus Food",pch=20,col=20,bty='n', data=zagat)
plot(price~decor,main="Price versus Decor",pch=20,col=20,bty='n', data=zagat)
plot(price~service,main="Price versus Service",pch=20,col=20,bty='n', data=zagat)

We build a multiple regression model to assess whether price can be described by the quality measure

par(mar=c(4,4,2,0),bty='n')
zaga = lm(price ~ food + decor + service, zagat)

# Extract coefficients and model summary

coef(zaga)
## (Intercept)        food       decor     service 
##  -30.663961    1.379510    1.104315    1.048040
summary(zaga)
## 
## Call:
## lm(formula = price ~ food + decor + service, data = zagat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.0442  -4.0530   0.2109   4.6547  13.0864 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -30.6640     4.7872  -6.405 3.82e-09 ***
## food          1.3795     0.3533   3.904 0.000163 ***
## decor         1.1043     0.1761   6.272 7.18e-09 ***
## service       1.0480     0.3811   2.750 0.006969 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.298 on 110 degrees of freedom
## Multiple R-squared:  0.6875, Adjusted R-squared:  0.6789 
## F-statistic: 80.66 on 3 and 110 DF,  p-value: < 2.2e-16
# Extract fitted value for each case

plot(fitted(zaga),zagat$price, xlab="yhat", ylab="y", pch=16)

# 4-in-1 residual diagnostics

layout(matrix(c(1,2,3,4),2,2))   
plot(zaga,pch=20)

newdata = data.frame(27,25,20)
colnames(newdata) = c("food","service","decor")
predict(zaga,newdata)
##        1 
## 54.87014

Residual standard error

se = summary(zaga)$sigma

# Upper bound
sum(coef(zaga)*c(1,27,20,25))+qnorm(0.975)*se
## [1] 67.21486
# Lower bound
sum(coef(zaga)*c(1,27,20,25))-qnorm(0.975)*se
## [1] 42.52541

Deep Learning Model for Zagat

model <- keras_model_sequential() %>%
  layer_dense(units = 64, activation = "relu", input_shape = 3) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 32, activation = "relu") %>%
  layer_dense(units = 1) 
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  dense_3 (Dense)                    (None, 64)                      256         
##  dense_2 (Dense)                    (None, 64)                      4160        
##  dense_1 (Dense)                    (None, 32)                      2080        
##  dense (Dense)                      (None, 1)                       33          
## ================================================================================
## Total params: 6,529
## Trainable params: 6,529
## Non-trainable params: 0
## ________________________________________________________________________________
model %>% compile(loss = "mse",optimizer = "RMSprop")

Now let’s train the model

history = model %>% fit(x = as.matrix(scale(zagat[,1:3])), y = as.matrix(zagat$price),verbose = 0, epochs=200)
yhat = model %>% predict(as.matrix(scale(zagat[,1:3])))
plot(history)

par(mar=c(4,4,0,0),bty='n')
plot(zagat$price,yhat, pch=16)

Airbnb

X = read.csv("./data/X5k.txt");
Y = read.csv("./data/Y5k.txt");
sz=dim(X)
dim(Y)
## [1] 4999    1
sz
## [1] 4999  661
x_std <- scale(X)
mu = colMeans(x_std)  # faster version of apply(scaled.dat, 2, mean)
sd = apply(x_std, 2, sd)
mu[1:2]
## X.1.000000000000000000e.00  X2.000000000000000000e.00 
##               1.142604e-15               3.795946e-16
sd[1:2]
## X.1.000000000000000000e.00  X2.000000000000000000e.00 
##                          1                          1
smp_size <- floor(0.75 * nrow(x_std))
set.seed(123)
train_ind <- sample(seq_len(nrow(x_std)), size = smp_size)
X_train <- x_std[train_ind, ]
X_val <- x_std[-train_ind, ]
Y_train <- Y[train_ind,1]
Y_val <- Y[-train_ind,1]
num_class = max(Y)+1
label_train <- to_categorical(Y_train, num_class)
## Loaded Tensorflow version 2.9.2
label_val <- to_categorical(Y_val, num_class)
model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 64, activation = "relu", input_shape = c(sz[2])) %>% 
  layer_dropout(rate = 0.4) %>% 
  layer_dense(units = 64, activation = "relu") %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = num_class, activation = "softmax")
model %>% compile(
  loss = "categorical_crossentropy",
  optimizer = optimizer_rmsprop(),
  metrics = c("accuracy")
)
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  dense_2 (Dense)                    (None, 64)                      42368       
##  dropout_1 (Dropout)                (None, 64)                      0           
##  dense_1 (Dense)                    (None, 64)                      4160        
##  dropout (Dropout)                  (None, 64)                      0           
##  dense (Dense)                      (None, 12)                      780         
## ================================================================================
## Total params: 47,308
## Trainable params: 47,308
## Non-trainable params: 0
## ________________________________________________________________________________
Model
________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
================================================================================
dense_1 (Dense)                     (None, 64)                      42368       
________________________________________________________________________________
dropout_1 (Dropout)                 (None, 64)                      0           
________________________________________________________________________________
dense_2 (Dense)                     (None, 64)                      4160        
________________________________________________________________________________
dropout_2 (Dropout)                 (None, 64)                      0           
________________________________________________________________________________
dense_3 (Dense)                     (None, 12)                      780         
================================================================================
Total params: 47,308.0
Trainable params: 47,308.0
Non-trainable params: 0.0
________________________________________________________________________________

 
history <- model %>% fit(
  X_train, label_train, 
  epochs = 20, batch_size = 128, 
  validation_split = 0.2
)
plot(history)

NLP for IMDB

imdb <- keras::dataset_imdb(num_words = 10000)
c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb

# Each review is a sequence of integers
train_data[[1]]
##   [1]    1   14   22   16   43  530  973 1622 1385   65  458 4468   66 3941    4
##  [16]  173   36  256    5   25  100   43  838  112   50  670    2    9   35  480
##  [31]  284    5  150    4  172  112  167    2  336  385   39    4  172 4536 1111
##  [46]   17  546   38   13  447    4  192   50   16    6  147 2025   19   14   22
##  [61]    4 1920 4613  469    4   22   71   87   12   16   43  530   38   76   15
##  [76]   13 1247    4   22   17  515   17   12   16  626   18    2    5   62  386
##  [91]   12    8  316    8  106    5    4 2223 5244   16  480   66 3785   33    4
## [106]  130   12   16   38  619    5   25  124   51   36  135   48   25 1415   33
## [121]    6   22   12  215   28   77   52    5   14  407   16   82    2    8    4
## [136]  107  117 5952   15  256    4    2    7 3766    5  723   36   71   43  530
## [151]  476   26  400  317   46    7    4    2 1029   13  104   88    4  381   15
## [166]  297   98   32 2071   56   26  141    6  194 7486   18    4  226   22   21
## [181]  134  476   26  480    5  144   30 5535   18   51   36   28  224   92   25
## [196]  104    4  226   65   16   38 1334   88   12   16  283    5   16 4472  113
## [211]  103   32   15   16 5345   19  178   32
train_labels[[2]]
## [1] 0
max(sapply(train_data, max))
## [1] 9999
# word_index is a dictionary mapping words to an integer index
word_index <- dataset_imdb_word_index()
# We reverse it, mapping integer indices to words
reverse_word_index <- names(word_index)
names(reverse_word_index) <- word_index
# We decode the review; note that our indices were offset by 3
# because 0, 1 and 2 are reserved indices for "padding", "start of sequence", and "unknown".
decoded_review <- sapply(train_data[[1]], function(index) {
  word <- if (index >= 3) reverse_word_index[[as.character(index - 3)]]
  if (!is.null(word)) word else "?"
})
cat(decoded_review)
## ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all
vectorize_sequences <- function(sequences, dimension = 10000) {
  # Create an all-zero matrix of shape (len(sequences), dimension)
  results <- matrix(0, nrow = length(sequences), ncol = dimension)
  for (i in 1:length(sequences))
    # Sets specific indices of results[i] to 1s
    results[i, sequences[[i]]] <- 1
  results
}
# Our vectorized training data
x_train <- vectorize_sequences(train_data)
# Our vectorized test data
x_test <- vectorize_sequences(test_data)
str(x_train[1,])
##  num [1:10000] 1 1 0 1 1 1 1 1 1 0 ...
# Our vectorized labels
y_train <- as.numeric(train_labels)
y_test <- as.numeric(test_labels)
model <- keras_model_sequential() %>% 
  layer_dense(units = 16, activation = "relu", input_shape = c(10000)) %>% 
  layer_dense(units = 16, activation = "relu") %>% 
  layer_dense(units = 1, activation = "sigmoid")
model %>% compile(optimizer = "rmsprop",loss = "binary_crossentropy",metrics = c("accuracy"))
## ------------------------------------------------------------------------
val_indices <- 1:10000

x_val <- x_train[val_indices,]
partial_x_train <- x_train[-val_indices,]

y_val <- y_train[val_indices]
partial_y_train <- y_train[-val_indices]

history <- model %>% fit(
  partial_x_train,
  partial_y_train,
  epochs = 20,
  batch_size = 512,
  validation_data = list(x_val, y_val)
)
## ------------------------------------------------------------------------
plot(history)

results <- model %>% evaluate(x_val, y_val)
results
##      loss  accuracy 
## 0.7749447 0.8588001
## ------------------------------------------------------------------------
model %>% predict(x_val[c(1:5),])
##              [,1]
## [1,] 9.999971e-01
## [2,] 2.505232e-05
## [3,] 4.290054e-07
## [4,] 1.000000e+00
## [5,] 6.967937e-06