Multiple Linear Regression on Customer Data

# Go through the data and understand the attributes to get the info of the variables.

Write a code to clear the environment if needed

rm(list=ls(all=(TRUE)))

Write a code to set the working directory.

setwd("C:/Users/C5215696/Desktop/Data Science/Regression-concepts/Multi-Regression")
getwd()

## [1] "C:/Users/C5215696/Desktop/Data Science/Regression-concepts/Multi-Regression"

Think and load the libraries as and when required in this place only as best practice

Write a code to read the csv file as “data” as per required

customer_data=read.csv("CustomerData_Assignment.csv",header = T)

Write a code to know the names of the attributes

names(customer_data)

##  [1] "CustomerID"                   "City"                        
##  [3] "NoOfChildren"                 "MinAgeOfChild"               
##  [5] "MaxAgeOfChild"                "Tenure"                      
##  [7] "FrquncyOfPurchase"            "NoOfUnitsPurchased"          
##  [9] "FrequencyOFPlay"              "NoOfGamesPlayed"             
## [11] "NoOfGamesBought"              "FavoriteChannelOfTransaction"
## [13] "FavoriteGame"                 "TotalRevenueGenerated"

colnames(customer_data)

##  [1] "CustomerID"                   "City"                        
##  [3] "NoOfChildren"                 "MinAgeOfChild"               
##  [5] "MaxAgeOfChild"                "Tenure"                      
##  [7] "FrquncyOfPurchase"            "NoOfUnitsPurchased"          
##  [9] "FrequencyOFPlay"              "NoOfGamesPlayed"             
## [11] "NoOfGamesBought"              "FavoriteChannelOfTransaction"
## [13] "FavoriteGame"                 "TotalRevenueGenerated"

Write a code to find the dimensions of the data

dim(customer_data)

## [1] 3209   14

Write a code to see the head and tail of the dataset atleast 10rows

tail(customer_data,n = 10)

##      CustomerID City NoOfChildren MinAgeOfChild MaxAgeOfChild Tenure
## 3200       4200    1            1             6             6    352
## 3201       4201    1            2             7             8    313
## 3202       4202    1            3             2             6    424
## 3203       4203    1            2             5             7    424
## 3204       4204    1            1             4             4    431
## 3205       4205    1            2             4             6    365
## 3206       4206    1            2             2             5    348
## 3207       4207    1            2             5             7    341
## 3208       4208    1            2             6             7    368
## 3209       4209    1            3             2             6    389
##      FrquncyOfPurchase NoOfUnitsPurchased FrequencyOFPlay NoOfGamesPlayed
## 3200                27                 24            2042             194
## 3201                15                 15             952             113
## 3202                20                 12            2038             177
## 3203                32                 29            5933             382
## 3204                26                 26            1166              72
## 3205                16                 16             827              78
## 3206                20                 20            2933             294
## 3207                16                 13            1250             126
## 3208                18                 14            1364             122
## 3209                25                 17             475             111
##      NoOfGamesBought FavoriteChannelOfTransaction FavoriteGame
## 3200              23                     Favorite      Uniform
## 3201              10                     Favorite      Uniform
## 3202              20                      Uniform      Uniform
## 3203              29                     Favorite      Uniform
## 3204              26                     Favorite      Uniform
## 3205              16                     Favorite      Uniform
## 3206              13                     Favorite      Uniform
## 3207              16                      Uniform      Uniform
## 3208              18                     Favorite      Uniform
## 3209              25                     Favorite      Uniform
##      TotalRevenueGenerated
## 3200                249.96
## 3201                175.51
## 3202                117.26
## 3203                261.29
## 3204                205.00
## 3205                132.50
## 3206                223.23
## 3207                120.00
## 3208                161.50
## 3209                137.50

head(customer_data,n=10)

##    CustomerID City NoOfChildren MinAgeOfChild MaxAgeOfChild Tenure
## 1        1001    1            2             3             8    210
## 2        1002    1            2             3             6    442
## 3        1003    1            4             3             5    424
## 4        1004    1            1             6             6    261
## 5        1005    1            3             6             9    422
## 6        1006    1            2             3             4    378
## 7        1007    1            3             8            12    369
## 8        1008    1            2             6             8    404
## 9        1009    1            4             6             9    420
## 10       1010    2            3             5             6    333
##    FrquncyOfPurchase NoOfUnitsPurchased FrequencyOFPlay NoOfGamesPlayed
## 1                 11                 11            2344             108
## 2                 20                 20             245              22
## 3                 18                 18            1059             130
## 4                 11                  9             365              34
## 5                 44                 31            1066             102
## 6                 16                 16             228              12
## 7                 25                 15              75               2
## 8                 13                 12            1488             118
## 9                 20                 16            2743             163
## 10                15                 15            1967              56
##    NoOfGamesBought FavoriteChannelOfTransaction FavoriteGame
## 1               10                      Uniform      Uniform
## 2                7                     Favorite      Uniform
## 3               18                     Favorite      Uniform
## 4               11                     Favorite      Uniform
## 5               44                      Uniform      Uniform
## 6               16                     Favorite     Favorite
## 7               25                     Favorite     Favorite
## 8               13                     Favorite      Uniform
## 9               16                      Uniform      Uniform
## 10              15                     Favorite      Uniform
##    TotalRevenueGenerated
## 1                 107.51
## 2                 382.40
## 3                 135.01
## 4                 125.00
## 5                 335.05
## 6                 150.00
## 7                 127.50
## 8                 122.50
## 9                 164.96
## 10                112.62

Write a code to see the data types of the attributes

str(customer_data)

## 'data.frame':    3209 obs. of  14 variables:
##  $ CustomerID                  : int  1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 ...
##  $ City                        : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ NoOfChildren                : int  2 2 4 1 3 2 3 2 4 3 ...
##  $ MinAgeOfChild               : int  3 3 3 6 6 3 8 6 6 5 ...
##  $ MaxAgeOfChild               : int  8 6 5 6 9 4 12 8 9 6 ...
##  $ Tenure                      : int  210 442 424 261 422 378 369 404 420 333 ...
##  $ FrquncyOfPurchase           : int  11 20 18 11 44 16 25 13 20 15 ...
##  $ NoOfUnitsPurchased          : int  11 20 18 9 31 16 15 12 16 15 ...
##  $ FrequencyOFPlay             : int  2344 245 1059 365 1066 228 75 1488 2743 1967 ...
##  $ NoOfGamesPlayed             : int  108 22 130 34 102 12 2 118 163 56 ...
##  $ NoOfGamesBought             : int  10 7 18 11 44 16 25 13 16 15 ...
##  $ FavoriteChannelOfTransaction: Factor w/ 2 levels "Favorite","Uniform": 2 1 1 1 2 1 1 1 2 1 ...
##  $ FavoriteGame                : Factor w/ 3 levels "Favorite","NONE",..: 3 3 3 3 3 1 1 3 3 3 ...
##  $ TotalRevenueGenerated       : num  108 382 135 125 335 ...

Write a code to remove the unuseful variables and store the data

#Consider
city_data=customer_data$City
customer_data=within(customer_data,rm('City','CustomerID'))
##cust_data=customer_data[,-which(names(customer_data) == "City")]

Write a code to observe the variables and convert them into the required formats

cat_attr=c("FavoriteGame","FavoriteChannelOfTransaction")
num_attr=c(setdiff(names(customer_data),cat_attr))
customer_data[num_attr]=data.frame(apply(customer_data[num_attr],2, function(x)as.numeric(x)))
#or customer_data[num_attr]=data.frame(lapply(customer_data[num_attr],2,FUN = as.numeric))

Check the changes again using str() command

str(customer_data)

## 'data.frame':    3209 obs. of  12 variables:
##  $ NoOfChildren                : num  2 2 4 1 3 2 3 2 4 3 ...
##  $ MinAgeOfChild               : num  3 3 3 6 6 3 8 6 6 5 ...
##  $ MaxAgeOfChild               : num  8 6 5 6 9 4 12 8 9 6 ...
##  $ Tenure                      : num  210 442 424 261 422 378 369 404 420 333 ...
##  $ FrquncyOfPurchase           : num  11 20 18 11 44 16 25 13 20 15 ...
##  $ NoOfUnitsPurchased          : num  11 20 18 9 31 16 15 12 16 15 ...
##  $ FrequencyOFPlay             : num  2344 245 1059 365 1066 ...
##  $ NoOfGamesPlayed             : num  108 22 130 34 102 12 2 118 163 56 ...
##  $ NoOfGamesBought             : num  10 7 18 11 44 16 25 13 16 15 ...
##  $ FavoriteChannelOfTransaction: Factor w/ 2 levels "Favorite","Uniform": 2 1 1 1 2 1 1 1 2 1 ...
##  $ FavoriteGame                : Factor w/ 3 levels "Favorite","NONE",..: 3 3 3 3 3 1 1 3 3 3 ...
##  $ TotalRevenueGenerated       : num  108 382 135 125 335 ...

Write a code to get the numerical attributes out and store them as num_df and seperate categorical attributes.

cate_df=customer_data[,cat_attr]
num_df=subset(customer_data,select=num_attr)

Write a Code to seperate the target

target_attr=customer_data["TotalRevenueGenerated"]
num_df$TotalRevenueGenerated=NULL

Write a code for Normalizing the num_df

library(vegan)

## Warning: package 'vegan' was built under R version 3.3.3

## Loading required package: permute

## Warning: package 'permute' was built under R version 3.3.3

## Loading required package: lattice

## This is vegan 2.4-3

str(num_df)

## 'data.frame':    3209 obs. of  9 variables:
##  $ NoOfChildren      : num  2 2 4 1 3 2 3 2 4 3 ...
##  $ MinAgeOfChild     : num  3 3 3 6 6 3 8 6 6 5 ...
##  $ MaxAgeOfChild     : num  8 6 5 6 9 4 12 8 9 6 ...
##  $ Tenure            : num  210 442 424 261 422 378 369 404 420 333 ...
##  $ FrquncyOfPurchase : num  11 20 18 11 44 16 25 13 20 15 ...
##  $ NoOfUnitsPurchased: num  11 20 18 9 31 16 15 12 16 15 ...
##  $ FrequencyOFPlay   : num  2344 245 1059 365 1066 ...
##  $ NoOfGamesPlayed   : num  108 22 130 34 102 12 2 118 163 56 ...
##  $ NoOfGamesBought   : num  10 7 18 11 44 16 25 13 16 15 ...

num_df=decostand(num_df,method = "standardize")

Write a code to observe the correlation between the attributes

cor(num_df)

##                    NoOfChildren MinAgeOfChild MaxAgeOfChild      Tenure
## NoOfChildren          1.0000000 -0.1791759583   0.189001961  0.08534060
## MinAgeOfChild        -0.1791760  1.0000000000   0.367127367 -0.05582027
## MaxAgeOfChild         0.1890020  0.3671273667   1.000000000 -0.04032911
## Tenure                0.0853406 -0.0558202691  -0.040329109  1.00000000
## FrquncyOfPurchase     0.1376303 -0.0175678432  -0.006212834  0.19334370
## NoOfUnitsPurchased    0.1370396 -0.0061912434  -0.003925588  0.19241309
## FrequencyOFPlay       0.1650357  0.0066897624   0.012095286  0.24180938
## NoOfGamesPlayed       0.2133424  0.0001039156   0.027946580  0.27369597
## NoOfGamesBought       0.1313984 -0.0077484225  -0.007062492  0.18828481
##                    FrquncyOfPurchase NoOfUnitsPurchased FrequencyOFPlay
## NoOfChildren             0.137630281        0.137039620     0.165035716
## MinAgeOfChild           -0.017567843       -0.006191243     0.006689762
## MaxAgeOfChild           -0.006212834       -0.003925588     0.012095286
## Tenure                   0.193343704        0.192413092     0.241809378
## FrquncyOfPurchase        1.000000000        0.934130532     0.279957433
## NoOfUnitsPurchased       0.934130532        1.000000000     0.311981695
## FrequencyOFPlay          0.279957433        0.311981695     1.000000000
## NoOfGamesPlayed          0.397566861        0.436149179     0.740204301
## NoOfGamesBought          0.947787464        0.868374420     0.286943164
##                    NoOfGamesPlayed NoOfGamesBought
## NoOfChildren          0.2133424389     0.131398358
## MinAgeOfChild         0.0001039156    -0.007748422
## MaxAgeOfChild         0.0279465799    -0.007062492
## Tenure                0.2736959714     0.188284813
## FrquncyOfPurchase     0.3975668611     0.947787464
## NoOfUnitsPurchased    0.4361491791     0.868374420
## FrequencyOFPlay       0.7402043013     0.286943164
## NoOfGamesPlayed       1.0000000000     0.399178320
## NoOfGamesBought       0.3991783197     1.000000000

Write a code to plot the corrplot of correlation between the attributes

library(corrplot)

## Warning: package 'corrplot' was built under R version 3.3.3

corrplot(cor(num_df),method = "number")

Write a code to combine the numerical,categorical data along with the target

str(customer_data)

## 'data.frame':    3209 obs. of  12 variables:
##  $ NoOfChildren                : num  2 2 4 1 3 2 3 2 4 3 ...
##  $ MinAgeOfChild               : num  3 3 3 6 6 3 8 6 6 5 ...
##  $ MaxAgeOfChild               : num  8 6 5 6 9 4 12 8 9 6 ...
##  $ Tenure                      : num  210 442 424 261 422 378 369 404 420 333 ...
##  $ FrquncyOfPurchase           : num  11 20 18 11 44 16 25 13 20 15 ...
##  $ NoOfUnitsPurchased          : num  11 20 18 9 31 16 15 12 16 15 ...
##  $ FrequencyOFPlay             : num  2344 245 1059 365 1066 ...
##  $ NoOfGamesPlayed             : num  108 22 130 34 102 12 2 118 163 56 ...
##  $ NoOfGamesBought             : num  10 7 18 11 44 16 25 13 16 15 ...
##  $ FavoriteChannelOfTransaction: Factor w/ 2 levels "Favorite","Uniform": 2 1 1 1 2 1 1 1 2 1 ...
##  $ FavoriteGame                : Factor w/ 3 levels "Favorite","NONE",..: 3 3 3 3 3 1 1 3 3 3 ...
##  $ TotalRevenueGenerated       : num  108 382 135 125 335 ...

str(target_attr)

## 'data.frame':    3209 obs. of  1 variable:
##  $ TotalRevenueGenerated: num  108 382 135 125 335 ...

str(cate_df)

## 'data.frame':    3209 obs. of  2 variables:
##  $ FavoriteGame                : Factor w/ 3 levels "Favorite","NONE",..: 3 3 3 3 3 1 1 3 3 3 ...
##  $ FavoriteChannelOfTransaction: Factor w/ 2 levels "Favorite","Uniform": 2 1 1 1 2 1 1 1 2 1 ...

str(num_df)

## 'data.frame':    3209 obs. of  9 variables:
##  $ NoOfChildren      : num  -0.124 -0.124 1.808 -1.09 0.842 ...
##  $ MinAgeOfChild     : num  -0.528 -0.528 -0.528 0.28 0.28 ...
##  $ MaxAgeOfChild     : num  0.00106 -0.22662 -0.34046 -0.22662 0.11491 ...
##  $ Tenure            : num  -1.519 1.044 0.845 -0.956 0.823 ...
##  $ FrquncyOfPurchase : num  -0.624 0.442 0.205 -0.624 3.285 ...
##  $ NoOfUnitsPurchased: num  -0.513 0.74 0.462 -0.791 2.272 ...
##  $ FrequencyOFPlay   : num  0.428 -0.731 -0.281 -0.665 -0.277 ...
##  $ NoOfGamesPlayed   : num  0.1616 -0.8054 0.409 -0.6705 0.0941 ...
##  $ NoOfGamesBought   : num  -0.543 -0.885 0.369 -0.429 3.332 ...
##  - attr(*, "decostand")= chr "standardize"

combined_data=cbind(target_attr,num_df,cate_df)

Write a code to set the seed and comment why it is used.

set.seed(29)
#set the seed so that the random number generated should be same every time you divide the data into train and test.

Write a code to get the train rows using sample

train_rows=sample(x=1:nrow(combined_data),size = 0.7*nrow(combined_data))

Write a code to get the train and test

train_data=combined_data[train_rows,]
test_data=combined_data[-train_rows,]

Write a code to just plot the graphs between attributes and targets

par(mfrow=c(3,3))
plot(combined_data$NoOfChildren,combined_data$TotalRevenueGenerated,xlab="No Of Childern",ylab = "Total Revenue")
plot(combined_data$MinAgeOfChild,combined_data$TotalRevenueGenerated,xlab="MinAgeOfChild",ylab = "Total Revenue")
plot(combined_data$MaxAgeOfChild,combined_data$TotalRevenueGenerated,xlab="MaxAgeOfChild",ylab = "Total Revenue")
plot(combined_data$Tenure,combined_data$TotalRevenueGenerated,xlab="Tenure",ylab = "Total Revenue")
plot(combined_data$FrquncyOfPurchase,combined_data$TotalRevenueGenerated,xlab="FrquncyOfPurchase",ylab = "Total Revenue")
plot(combined_data$NoOfUnitsPurchased,combined_data$TotalRevenueGenerated,xlab="NoOfUnitsPurchased",ylab = "Total Revenue")
plot(combined_data$FrequencyOFPlay,combined_data$TotalRevenueGenerated,xlab="FrequencyOFPlay",ylab = "Total Revenue")
plot(combined_data$NoOfGamesPlayed,combined_data$TotalRevenueGenerated,xlab="NoOfGamesPlayed",ylab = "Total Revenue")
plot(combined_data$NoOfGamesBought,combined_data$TotalRevenueGenerated,xlab="NoOfGamesBought",ylab = "Total Revenue")

Write a code to form a linear regression model

linReg_model=lm(formula=TotalRevenueGenerated~NoOfChildren+MinAgeOfChild+MaxAgeOfChild+Tenure+FrquncyOfPurchase+NoOfUnitsPurchased+FrequencyOFPlay+NoOfGamesPlayed+NoOfGamesBought,data=train_data)

Write a code to plot the model

par(mfrow = c(2,2))
plot(linReg_model)

Write a code to check the summary of the model

summary(linReg_model)

## 
## Call:
## lm(formula = TotalRevenueGenerated ~ NoOfChildren + MinAgeOfChild + 
##     MaxAgeOfChild + Tenure + FrquncyOfPurchase + NoOfUnitsPurchased + 
##     FrequencyOFPlay + NoOfGamesPlayed + NoOfGamesBought, data = train_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -168.47  -27.96   -4.55   22.48  318.81 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        169.7797     0.9409 180.441  < 2e-16 ***
## NoOfChildren         3.3141     1.0401   3.186  0.00146 ** 
## MinAgeOfChild       13.9216     2.2782   6.111 1.17e-09 ***
## MaxAgeOfChild       -1.8467     1.0026  -1.842  0.06563 .  
## Tenure              -0.4760     0.9851  -0.483  0.62896    
## FrquncyOfPurchase   79.9014     4.1560  19.226  < 2e-16 ***
## NoOfUnitsPurchased  73.5796     2.6271  28.008  < 2e-16 ***
## FrequencyOFPlay      4.3019     1.4323   3.003  0.00270 ** 
## NoOfGamesPlayed     -4.5316     1.5318  -2.958  0.00312 ** 
## NoOfGamesBought    -98.4499     3.0501 -32.277  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.39 on 2236 degrees of freedom
## Multiple R-squared:  0.7186, Adjusted R-squared:  0.7175 
## F-statistic: 634.5 on 9 and 2236 DF,  p-value: < 2.2e-16

Write a code to predict the value of the target on the linear model

attr_without_target=num_attr[-length(num_attr)]
pred_values=predict(linReg_model,test_data[attr_without_target])

Write a code to use stepAIC

library(MASS)
aic_model=stepAIC(linReg_model,direction = 'both')

## Start:  AIC=17047.71
## TotalRevenueGenerated ~ NoOfChildren + MinAgeOfChild + MaxAgeOfChild + 
##     Tenure + FrquncyOfPurchase + NoOfUnitsPurchased + FrequencyOFPlay + 
##     NoOfGamesPlayed + NoOfGamesBought
## 
##                      Df Sum of Sq     RSS   AIC
## - Tenure              1       460 4405482 17046
## <none>                            4405022 17048
## - MaxAgeOfChild       1      6683 4411705 17049
## - NoOfGamesPlayed     1     17242 4422264 17055
## - FrequencyOFPlay     1     17772 4422793 17055
## - NoOfChildren        1     20001 4425023 17056
## - MinAgeOfChild       1     73564 4478586 17083
## - FrquncyOfPurchase   1    728178 5133200 17389
## - NoOfUnitsPurchased  1   1545398 5950420 17721
## - NoOfGamesBought     1   2052433 6457455 17905
## 
## Step:  AIC=17045.95
## TotalRevenueGenerated ~ NoOfChildren + MinAgeOfChild + MaxAgeOfChild + 
##     FrquncyOfPurchase + NoOfUnitsPurchased + FrequencyOFPlay + 
##     NoOfGamesPlayed + NoOfGamesBought
## 
##                      Df Sum of Sq     RSS   AIC
## <none>                            4405482 17046
## - MaxAgeOfChild       1      6505 4411987 17047
## + Tenure              1       460 4405022 17048
## - FrequencyOFPlay     1     17434 4422916 17053
## - NoOfGamesPlayed     1     17924 4423406 17053
## - NoOfChildren        1     20047 4425529 17054
## - MinAgeOfChild       1     76063 4481545 17082
## - FrquncyOfPurchase   1    727756 5133238 17387
## - NoOfUnitsPurchased  1   1547166 5952648 17720
## - NoOfGamesBought     1   2052658 6458140 17903

Write a code to predict it on stepAIC

predict_aic_values=predict(aic_model,test_data[attr_without_target])

Write a code to check the multicollinearity in the lm model

library(car)

## Warning: package 'car' was built under R version 3.3.3

vif(linReg_model)

##       NoOfChildren      MinAgeOfChild      MaxAgeOfChild 
##           1.265423           1.201447           1.071255 
##             Tenure  FrquncyOfPurchase NoOfUnitsPurchased 
##           1.119430          19.920974           7.949876 
##    FrequencyOFPlay    NoOfGamesPlayed    NoOfGamesBought 
##           2.216765           2.515500          10.589748

Write a code to check the plots of the models

par(mfrow=c(2,2))
plot(aic_model)

Write a code to check the multicollinearity problem

vif(aic_model)

##       NoOfChildren      MinAgeOfChild      MaxAgeOfChild 
##           1.265354           1.184135           1.067739 
##  FrquncyOfPurchase NoOfUnitsPurchased    FrequencyOFPlay 
##          19.900110           7.946468           2.203238 
##    NoOfGamesPlayed    NoOfGamesBought 
##           2.493808          10.589633

Error Metrics for Regression

#Mean Absolute Error (MAE)
mae <- function(actual, predicted){
  
  error <- actual - predicted
  
  mean(abs(error))
  
}

#Mean Squared Error (MSE)

mse <- function(actual, predicted){
  
  error <- actual - predicted
  
  mean(error^2)
  
}

#Root Mean Squared Error (RMSE)
rmse <- function(actual, predicted){
  
  error <- actual - predicted
  
  sqrt(mean(error^2))
  
}

#Mean Absolute Percentage Error (MAPE)
mape <- function(actual, predicted){
  
  error <- actual - predicted
  
  mean(abs(error/actual))*100
  
}

Write a code to evaluate the error in the prediction of AIC

mae(test_data$TotalRevenueGenerated, predict_aic_values)

## [1] 32.65343

mse(test_data$TotalRevenueGenerated, predict_aic_values)

## [1] 2181.524

rmse(test_data$TotalRevenueGenerated, predict_aic_values)

## [1] 46.70679

mape(test_data$TotalRevenueGenerated, predict_aic_values)

## [1] 19.96523

# OR

library(DMwR)

## Warning: package 'DMwR' was built under R version 3.3.3

## Loading required package: grid

regr.eval(test_data$TotalRevenueGenerated, predict_aic_values)

##          mae          mse         rmse         mape 
##   32.6534295 2181.5242460   46.7067901    0.1996523

Write a code to evaluate the error on the prediction of Linear Regression

mae(test_data$TotalRevenueGenerated, pred_values)

## [1] 32.62928

mse(test_data$TotalRevenueGenerated, pred_values)

## [1] 2170.543

rmse(test_data$TotalRevenueGenerated, pred_values)

## [1] 46.58909

mape(test_data$TotalRevenueGenerated, pred_values)

## [1] 19.9562

# OR

regr.eval(test_data$TotalRevenueGenerated, pred_values)

##         mae         mse        rmse        mape 
##   32.629279 2170.543222   46.589089    0.199562

Multiple Linear Regression on Customer Data

Insofe Lab Session-Venkatesh Inkollu

July 20 , 2017

Error Metrics for Regression