Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") 
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"
pacman::p_load(dplyr,tidyverse,pROC,caTools,ISLR)
load("UL_all7.rdata")

lm 年資 把有na欄位拿掉

UL_narm = UL %>% select(-c(ID,on,off,job_num,store,section))
UL_narm = UL_narm[,-c(25:77)]
UL_narm$not_off = as.factor(UL_narm$not_off)
UL_narm=mutate_if(UL_narm,is.character,as.factor)
UL_narm = na.omit(UL_narm)
str(UL_narm)
## tibble[,24] [3,053 × 24] (S3: tbl_df/tbl/data.frame)
##  $ sex               : Factor w/ 2 levels "男","女": 1 1 1 1 1 1 1 1 1 1 ...
##  $ job               : Factor w/ 7 levels "出納","服專",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ position          : Factor w/ 33 levels "辦事員","董事長",..: 33 33 33 13 33 19 19 13 15 19 ...
##  $ distributor       : Factor w/ 9 levels "EM","ES","HL",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ mean_car_18_20    : num [1:3053] 0 0 0 0 0 ...
##  $ mean_price_18_20  : num [1:3053] 0 0 0 0 0 ...
##  $ num_car_before    : num [1:3053] 0 0 0 0 0 56 0 159 199 0 ...
##  $ num_car2018       : num [1:3053] 0 0 0 0 0 23 0 0 57 0 ...
##  $ num_car2019       : num [1:3053] 0 0 0 0 0 4 0 0 59 0 ...
##  $ num_car2020       : num [1:3053] 0 0 0 0 0 0 25 0 51 7 ...
##  $ totalprice_before : num [1:3053] 0 0 0 0 0 ...
##  $ totalprice_2018   : num [1:3053] 0 0 0 0 0 ...
##  $ totalprice_2019   : num [1:3053] 0 0 0 0 0 ...
##  $ totalprice_2020   : num [1:3053] 0 0 0 0 0 ...
##  $ lecturer_training : num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
##  $ performance       : num [1:3053] 0 0 0 0 0 0 0 0 4 0 ...
##  $ internet_marketing: num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
##  $ management        : num [1:3053] 0 0 0 0 0 0 0 28 0 0 ...
##  $ newproduct        : num [1:3053] 0 0 0 0 0 7 7 39 26 0 ...
##  $ fresh             : num [1:3053] 0 0 0 0 0 0 28 0 0 0 ...
##  $ interview_skill   : num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
##  $ not_off           : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 2 2 2 ...
##  $ seniority         : num [1:3053] 5.42 5.42 5.42 2.92 5.42 ...
##  $ reward            : num [1:3053] 0 0 0 0 0 0 0 2 0 0 ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 770 963 1339 2358 2425
##   ..- attr(*, "names")= chr [1:5] "770" "963" "1339" "2358" ...
#colnames(UL_narm)
# Split the data into training and test set
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
set.seed(123)
training.samples <- UL_narm$position %>%
  createDataPartition(p = 0.7, list = FALSE)
## Warning in createDataPartition(., p = 0.7, list = FALSE): Some classes have no
## records ( 董事長 ) and these will be ignored
## Warning in createDataPartition(., p = 0.7, list = FALSE): Some classes have a
## single record ( 副主任, 顧客服務專員, 領牌專員, 引擎技師, 主任, 助理 ) and these
## will be selected for the sample
train.data  <- UL_narm[training.samples, ]
test.data <- UL_narm[-training.samples, ]
lm.fit = lm(seniority ~num_car_before+mean_price_18_20+num_car2018+num_car2019+num_car2020+totalprice_before+totalprice_2018+totalprice_2019+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh,data=train.data)
summary(lm.fit)
## 
## Call:
## lm(formula = seniority ~ num_car_before + mean_price_18_20 + 
##     num_car2018 + num_car2019 + num_car2020 + totalprice_before + 
##     totalprice_2018 + totalprice_2019 + lecturer_training + internet_marketing + 
##     performance + management + interview_skill + reward + newproduct + 
##     fresh, data = train.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5936  -1.4881  -1.0896   0.1737  30.2011 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.380e+00  1.333e-01  10.351  < 2e-16 ***
## num_car_before      3.206e-03  5.227e-03   0.613 0.539766    
## mean_price_18_20    3.118e-04  1.907e-03   0.164 0.870135    
## num_car2018         1.442e-01  3.991e-02   3.612 0.000311 ***
## num_car2019        -1.068e-01  5.364e-02  -1.992 0.046517 *  
## num_car2020        -2.693e-02  5.324e-02  -0.506 0.612993    
## totalprice_before   2.264e-04  7.149e-05   3.166 0.001566 ** 
## totalprice_2018    -1.667e-03  7.899e-04  -2.111 0.034914 *  
## totalprice_2019     1.271e-03  1.155e-03   1.101 0.271213    
## lecturer_training  -2.360e-01  6.224e-02  -3.792 0.000153 ***
## internet_marketing -1.157e-01  3.202e-01  -0.361 0.718014    
## performance         4.731e-01  4.903e-01   0.965 0.334746    
## management          2.587e-01  1.982e-02  13.050  < 2e-16 ***
## interview_skill    -2.236e-01  3.013e-01  -0.742 0.458124    
## reward             -5.941e-01  2.972e-01  -1.999 0.045754 *  
## newproduct          2.113e-01  1.461e-02  14.466  < 2e-16 ***
## fresh              -5.781e-02  1.048e-02  -5.519 3.82e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.538 on 2136 degrees of freedom
## Multiple R-squared:  0.5485, Adjusted R-squared:  0.5451 
## F-statistic: 162.2 on 16 and 2136 DF,  p-value: < 2.2e-16

#預測2018~2020平均業績

lm.fit = lm(mean_price_18_20 ~ position+num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh+seniority,data=train.data)
summary(lm.fit)
## 
## Call:
## lm(formula = mean_price_18_20 ~ position + num_car_before + totalprice_before + 
##     lecturer_training + internet_marketing + performance + management + 
##     interview_skill + reward + newproduct + fresh + seniority, 
##     data = train.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4211.2  -287.7   -41.9   134.0 15456.8 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -83.91700  575.94844  -0.146  0.88417    
## position副理          -435.99759  694.31852  -0.628  0.53010    
## position副主任        -191.61046 1131.59816  -0.169  0.86556    
## position副總經理       132.15959  894.05213   0.148  0.88250    
## position高級顧問       234.13054  620.37780   0.377  0.70591    
## position顧客服務代表   169.52868  912.38357   0.186  0.85261    
## position顧客服務專員    84.06798 1136.10918   0.074  0.94102    
## position管理課長      -295.07692  590.12834  -0.500  0.61711    
## position經理           201.42794  634.03527   0.318  0.75075    
## position課長            91.83932  699.83793   0.131  0.89561    
## position領牌專員      2840.73835 1134.31028   2.504  0.01234 *  
## position所長          -499.26550  593.53444  -0.841  0.40035    
## position銷售副理       810.31707  583.50060   1.389  0.16507    
## position銷售高級專員   372.28115  578.88717   0.643  0.52023    
## position銷售經理      1058.22650  584.40313   1.811  0.07032 .  
## position銷售課長       329.54626  580.82437   0.567  0.57052    
## position銷售主任      1799.89217  656.06996   2.743  0.00613 ** 
## position銷售專員       126.09203  576.08549   0.219  0.82677    
## position協理           653.45660  682.47101   0.957  0.33843    
## position行政會計      -299.89175  805.87085  -0.372  0.70983    
## position業務代表       181.59220  601.46394   0.302  0.76275    
## position業務助理        62.76859  619.66852   0.101  0.91933    
## position引擎技師       -36.27138 1133.40391  -0.032  0.97447    
## position職員           274.08439  806.65502   0.340  0.73406    
## position主任            93.50753 1134.43559   0.082  0.93432    
## position助理            26.32417 1131.04155   0.023  0.98143    
## position專員           473.64136  752.54438   0.629  0.52916    
## position組長           443.02594  800.27053   0.554  0.57991    
## positionNULL            76.92702  605.37448   0.127  0.89889    
## positionSA級服務專員  -123.83674  899.94739  -0.138  0.89057    
## positionSSA級服務專員  -30.47985  804.18836  -0.038  0.96977    
## positionTEST            95.83974  720.30701   0.133  0.89416    
## num_car_before          -9.06826    0.78550 -11.545  < 2e-16 ***
## totalprice_before        0.17511    0.01047  16.727  < 2e-16 ***
## lecturer_training       -6.03708   13.70531  -0.440  0.65963    
## internet_marketing     -15.16987   69.03347  -0.220  0.82609    
## performance            206.56177  106.04164   1.948  0.05156 .  
## management             -58.63034    4.96213 -11.816  < 2e-16 ***
## interview_skill        -82.78536   65.45381  -1.265  0.20609    
## reward                 340.62055   65.01272   5.239 1.77e-07 ***
## newproduct              64.49504    3.10448  20.775  < 2e-16 ***
## fresh                    4.44924    2.28298   1.949  0.05144 .  
## seniority               -2.62410    5.30576  -0.495  0.62095    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 979.3 on 2110 degrees of freedom
## Multiple R-squared:  0.5961, Adjusted R-squared:  0.5881 
## F-statistic: 74.16 on 42 and 2110 DF,  p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 912.4191
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.6208327

與銷售額最有關的有(越多星號代表越顯著):
+ position: 銷售主任>>領牌專員 + totalprice_before,num_car_before + reward, newproduct,management

負相關: + num_car_before + management

(a) Prediction error, RMSE
908.6958 (b) R-square
0.6164879

plot(lm.fit)
## Warning: not plotting observations with leverage one:
##   102, 1808

## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs
## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs

lm.fit = lm(totalprice_2020 ~ num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh+seniority,data=train.data)
summary(lm.fit)
## 
## Call:
## lm(formula = totalprice_2020 ~ num_car_before + totalprice_before + 
##     lecturer_training + internet_marketing + performance + management + 
##     interview_skill + reward + newproduct + fresh + seniority, 
##     data = train.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5120.0  -461.8   -34.1    86.1 17031.0 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          34.89040   37.42495   0.932 0.351299    
## num_car_before       -5.24994    0.94477  -5.557 3.09e-08 ***
## totalprice_before     0.11212    0.01247   8.988  < 2e-16 ***
## lecturer_training   -43.66007   17.16166  -2.544 0.011027 *  
## internet_marketing   78.29300   88.10894   0.889 0.374322    
## performance         244.77600  134.90852   1.814 0.069758 .  
## management          -72.69246    5.15550 -14.100  < 2e-16 ***
## interview_skill    -164.73718   82.90166  -1.987 0.047034 *  
## reward              345.68807   81.10880   4.262 2.11e-05 ***
## newproduct           82.47605    3.64411  22.633  < 2e-16 ***
## fresh                11.02923    2.88995   3.816 0.000139 ***
## seniority            -6.96507    5.92404  -1.176 0.239834    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1250 on 2141 degrees of freedom
## Multiple R-squared:  0.4198, Adjusted R-squared:  0.4168 
## F-statistic: 140.8 on 11 and 2141 DF,  p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 942.6994
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.5947843

與2020銷售額正相關的有(越多星號代表越顯著):
+ totalprice_before,reward,reward,fresh + totalprice_before,num_car_before + reward, newproduct,management

(a) Prediction error, RMSE
940.7542 (b) R-square
0.5901853

預測年資

lm.fit = lm(seniority ~ num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh,data=train.data)
summary(lm.fit)
## 
## Call:
## lm(formula = seniority ~ num_car_before + totalprice_before + 
##     lecturer_training + internet_marketing + performance + management + 
##     interview_skill + reward + newproduct + fresh, data = train.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.4618  -1.4644  -1.0934   0.1612  30.0652 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.4211406  0.1330016  10.685  < 2e-16 ***
## num_car_before      0.0067263  0.0034428   1.954 0.050862 .  
## totalprice_before   0.0001933  0.0000453   4.268 2.06e-05 ***
## lecturer_training  -0.2301006  0.0623961  -3.688 0.000232 ***
## internet_marketing -0.1509863  0.3213437  -0.470 0.638503    
## performance         0.4629422  0.4919510   0.941 0.346794    
## management          0.2489040  0.0180182  13.814  < 2e-16 ***
## interview_skill    -0.2085602  0.3023341  -0.690 0.490374    
## reward             -0.5694691  0.2955726  -1.927 0.054154 .  
## newproduct          0.2103619  0.0124899  16.843  < 2e-16 ***
## fresh              -0.0613739  0.0104568  -5.869 5.06e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.56 on 2142 degrees of freedom
## Multiple R-squared:  0.5429, Adjusted R-squared:  0.5408 
## F-statistic: 254.4 on 10 and 2142 DF,  p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 1707.083
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.2644379