Gehad Gad

April 4th, 2020

Assignment Instruction

In Text Mining with R, https://www.tidytextmining.com/sentiment.html, Chapter 2 looks at Sentiment Analysis. In this assignment, you should start by getting the primary example code from chapter 2 working in an R Markdown document. You should provide a citation to this base code.You’re then asked to extend the code in two ways:

  1. Work with a different corpus of your choosing, and

  2. Incorporate at least one additional sentiment lexicon (possibly from another R package that you’ve found through research).

#Import libraries
library(tidytext)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(NLP)
library(tm)
library(SnowballC)
library(fastDummies)
library(dplyr)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  2.1.3     v purrr   0.3.3
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x plyr::arrange()     masks dplyr::arrange()
## x purrr::compact()    masks plyr::compact()
## x plyr::count()       masks dplyr::count()
## x plyr::failwith()    masks dplyr::failwith()
## x dplyr::filter()     masks stats::filter()
## x plyr::id()          masks dplyr::id()
## x dplyr::lag()        masks stats::lag()
## x plyr::mutate()      masks dplyr::mutate()
## x plyr::rename()      masks dplyr::rename()
## x plyr::summarise()   masks dplyr::summarise()
## x plyr::summarize()   masks dplyr::summarize()
get_sentiments("afinn")
## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,901 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ... with 13,891 more rows
#Import the data.
WomensReviews <- read.csv("WomensE-CommerceReviews.csv")

#Data source: https://www.kaggle.com/nicapotato/womens-ecommerce-clothing-reviews#Womens%20Clothing%20E-Commerce%20Reviews.csv

head(WomensReviews)
##   X Clothing.ID Age                   Title
## 1 0         767  33                        
## 2 1        1080  34                        
## 3 2        1077  60 Some major design flaws
## 4 3        1049  50        My favorite buy!
## 5 4         847  47        Flattering shirt
## 6 5        1080  49 Not for the very petite
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Review.Text
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                Absolutely wonderful - silky and sexy and comfortable
## 2                                                                                                                                                                                                      Love this dress!  it's sooo pretty.  i happened to find it in a store, and i'm glad i did bc i never would have ordered it online bc it's petite.  i bought a petite and am 5'8".  i love the length on me- hits just a little below the knee.  would definitely be a true midi on someone who is truly petite.
## 3 I had such high hopes for this dress and really wanted it to work for me. i initially ordered the petite small (my usual size) but i found this to be outrageously small. so small in fact that i could not zip it up! i reordered it in petite medium, which was just ok. overall, the top half was comfortable and fit nicely, but the bottom half had a very tight under layer and several somewhat cheap (net) over layers. imo, a major design flaw was the net over layer sewn directly into the zipper - it c
## 4                                                                                                                                                                                                                                                                                                                                                                                         I love, love, love this jumpsuit. it's fun, flirty, and fabulous! every time i wear it, i get nothing but great compliments!
## 5                                                                                                                                                                                                                                                                                                                     This shirt is very flattering to all due to the adjustable front tie. it is the perfect length to wear with leggings and it is sleeveless so it pairs well with any cardigan. love this shirt!!!
## 6             I love tracy reese dresses, but this one is not for the very petite. i am just under 5 feet tall and usually wear a 0p in this brand. this dress was very pretty out of the package but its a lot of dress. the skirt is long and very full so it overwhelmed my small frame. not a stranger to alterations, shortening and narrowing the skirt would take away from the embellishment of the garment. i love the color and the idea of the style but it just did not work on me. i returned this dress.
##   Rating Recommended.IND Positive.Feedback.Count  Division.Name Department.Name
## 1      4               1                       0      Initmates        Intimate
## 2      5               1                       4        General         Dresses
## 3      3               0                       0        General         Dresses
## 4      5               1                       0 General Petite         Bottoms
## 5      5               1                       6        General            Tops
## 6      2               0                       4        General         Dresses
##   Class.Name
## 1  Intimates
## 2    Dresses
## 3    Dresses
## 4      Pants
## 5    Blouses
## 6    Dresses
#Create a subset for the data with some columns removed.

Data <- subset(WomensReviews, select = -c(X, Clothing.ID, Title,Rating, Positive.Feedback.Count, Division.Name, Class.Name))

head(Data)
##   Age
## 1  33
## 2  34
## 3  60
## 4  50
## 5  47
## 6  49
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Review.Text
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                Absolutely wonderful - silky and sexy and comfortable
## 2                                                                                                                                                                                                      Love this dress!  it's sooo pretty.  i happened to find it in a store, and i'm glad i did bc i never would have ordered it online bc it's petite.  i bought a petite and am 5'8".  i love the length on me- hits just a little below the knee.  would definitely be a true midi on someone who is truly petite.
## 3 I had such high hopes for this dress and really wanted it to work for me. i initially ordered the petite small (my usual size) but i found this to be outrageously small. so small in fact that i could not zip it up! i reordered it in petite medium, which was just ok. overall, the top half was comfortable and fit nicely, but the bottom half had a very tight under layer and several somewhat cheap (net) over layers. imo, a major design flaw was the net over layer sewn directly into the zipper - it c
## 4                                                                                                                                                                                                                                                                                                                                                                                         I love, love, love this jumpsuit. it's fun, flirty, and fabulous! every time i wear it, i get nothing but great compliments!
## 5                                                                                                                                                                                                                                                                                                                     This shirt is very flattering to all due to the adjustable front tie. it is the perfect length to wear with leggings and it is sleeveless so it pairs well with any cardigan. love this shirt!!!
## 6             I love tracy reese dresses, but this one is not for the very petite. i am just under 5 feet tall and usually wear a 0p in this brand. this dress was very pretty out of the package but its a lot of dress. the skirt is long and very full so it overwhelmed my small frame. not a stranger to alterations, shortening and narrowing the skirt would take away from the embellishment of the garment. i love the color and the idea of the style but it just did not work on me. i returned this dress.
##   Recommended.IND Department.Name
## 1               1        Intimate
## 2               1         Dresses
## 3               0         Dresses
## 4               1         Bottoms
## 5               1            Tops
## 6               0         Dresses
Data %>%
  ggplot(aes(x = factor(Recommended.IND), fill = Recommended.IND)) +
    geom_bar(alpha = 0.8) +
    guides(fill = FALSE)

The graph above displays the distribution of the possitive reviews (1) in the data.

Data %>%
  ggplot(aes(x = factor(Department.Name), fill = Department.Name)) +
    geom_bar(alpha = 0.8) +
    guides(fill = FALSE)

The graph above shows the count or purchases of each department. We can see that (Tops) are the highest.

hist(Data$Age)

The graph above shows the frequency of ages among purchases.Between 30-40 are the highest frequency.

#Create a corpus of the data.
corpus = Corpus(VectorSource(Data$Review.Text))
corpus[[1]][1]
## $content
## [1] "Absolutely wonderful - silky and sexy and comfortable"
Data$Recommended_IND[1]
## NULL
#Make plain text and make it all lower case.
corpus = tm_map(corpus, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(corpus, PlainTextDocument): transformation drops
## documents
corpus = tm_map(corpus, tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
corpus[[1]][1] 
## $content
## [1] "absolutely wonderful - silky and sexy and comfortable"
#Remove any punctuation marks
corpus = tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus[[1]][1]
## $content
## [1] "absolutely wonderful  silky and sexy and comfortable"
#Remove any stopwords. Stopwords are words that don't have a usefull meaning such as: (and, then, they, or, etc.)
corpus = tm_map(corpus, removeWords, c("cloth", stopwords("english")))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, c("cloth",
## stopwords("english"))): transformation drops documents
corpus[[1]][1]  
## $content
## [1] "absolutely wonderful  silky  sexy  comfortable"
#Word stemming. Stemming words bring words to its root.

corpus = tm_map(corpus, stemDocument)
## Warning in tm_map.SimpleCorpus(corpus, stemDocument): transformation drops
## documents
corpus[[1]][1]  
## $content
## [1] "absolut wonder silki sexi comfort"
frequencies = DocumentTermMatrix(corpus)
sparse = removeSparseTerms(frequencies, 0.995)
tSparse = as.data.frame(as.matrix(sparse))
colnames(tSparse) = make.names(colnames(tSparse))
tSparse$recommended_id = Data$Recommended.IND
tSparse$Age = Data$Age
tSparse$Department.Name = Data$Department.Name
prop.table(table(tSparse$recommended_id)) #73.6% is the baseline accuracy
## 
##         0         1 
## 0.1776377 0.8223623

The percentage of possitive reviews to negative is 0.8223623 to 0.1776377

#Change column Department.Name to dummy variable
FinalData<- fastDummies:: dummy_cols(tSparse)

#Remove the Department.Name column after the step above.
FinalData <- subset(FinalData, select = -c(Department.Name))

#Get semntiment Analysis
word_sentiment = FinalData %>% group_by(recommended_id) %>% summarise_each(funs(sum))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
#Transpose the dataframe.
word_sentiment = t(word_sentiment)
(word_sentiment)
##                            [,1]   [,2]
## recommended_id                0      1
## absolut                     103    740
## comfort                     193   3084
## sexi                         11    207
## silki                        16    109
## wonder                       56    423
## bought                      302   2679
## definit                     179   1225
## dress                      2117   9944
## find                        130    955
## glad                         23    594
## happen                       55    102
## hit                         143    763
## just                       1243   4329
## knee                         87    575
## length                      326   2303
## littl                       363   3410
## love                       1472   9879
## never                        79    341
## onlin                       377   1272
## order                       991   3992
## petit                       340   2098
## pretti                      428   1758
## someon                      155    252
## store                       300   1744
## true                        149   1515
## truli                        32    138
## bottom                      310   1032
## cheap                       234     98
## design                      451   1313
## fact                         41    176
## fit                        1674   9636
## flaw                         35    103
## found                       174    772
## half                         76    130
## high                        253   1197
## hope                        242    618
## layer                       107    688
## medium                      279   1948
## nice                        478   3322
## overal                      164    667
## realli                      868   3053
## sever                        67    243
## sewn                         46    102
## size                       1573   9024
## small                       882   3682
## somewhat                     38    152
## tight                       341   1380
## top                        1693   6568
## usual                       437   2449
## want                        615   1484
## work                        599   2259
## zip                          52    197
## zipper                      126    266
## compliment                   21   1047
## everi                        80    527
## fabul                         9    161
## fun                          35    672
## get                         405   1915
## great                       532   5552
## jumpsuit                     40    220
## noth                        116    179
## time                        248   1133
## wear                       1023   7024
## adjust                       22    159
## cardigan                     41    451
## due                          65    275
## flatter                     490   3166
## front                       319    988
## leg                         176   1252
## pair                        142   1271
## perfect                     276   4948
## shirt                       685   2108
## tie                          81    442
## well                        431   2806
## alter                        26    123
## away                         78    252
## brand                        83    343
## color                      1047   6144
## frame                       107    337
## full                         97    512
## idea                         92     92
## long                        333   2059
## lot                         163    946
## narrow                       49    135
## one                         653   3041
## overwhelm                    28    105
## packag                       57     72
## return                      898    850
## skirt                       335   1887
## style                       340   1611
## take                        130    529
## tall                        128    463
## baggi                        71    232
## decid                        93    572
## everyth                      82    483
## gorgeous                    130    911
## kept                         70    223
## last                         71    313
## like                       1914   5803
## look                       2393   6883
## pant                        307   1747
## person                      256   1130
## pick                         16    173
## said                        115    484
## see                         362   1324
## though                      233   1063
## tri                         804   2819
## turn                         43    115
## went                        155    816
## alreadi                      82    293
## also                        637   1938
## alway                        54    533
## big                         417   1415
## bit                         309   2565
## doesnt                      203    795
## flare                        82    334
## light                       107    982
## loos                        213   1002
## sinc                        147    737
## sleev                       373   1638
## sold                         35    344
## ton                          18    167
## use                         129    460
## xxs                          62    324
## bust                        262    804
## feminin                      18    470
## retail                      431   1726
## run                         387   2390
## snug                         52    473
## X125                         26    143
## X34b                         20    169
## busti                        48    211
## cup                          66    122
## fabric                     1261   3591
## lbs                         191   1082
## less                         62    222
## longer                      118    702
## make                        429   1915
## parti                        12    167
## regular                     132    995
## smaller                     149    486
## sure                        175    773
## typic                        75    522
## underneath                   93    646
## wasnt                       198    521
## will                        500   3155
## your                         63    317
## area                        192    546
## back                       1023   2157
## disappoint                  516    357
## even                        609   1682
## feel                        346   1902
## materi                      746   2070
## pull                        176    429
## say                         159    793
## year                         73    596
## beauti                      498   2966
## case                         32    161
## fall                        134   1281
## line                        277   1069
## made                        510   1316
## match                        34    188
## old                          50    103
## past                         54    108
## print                       167    840
## product                      83    126
## qualiti                     551   1699
## review                      476   1689
## right                       310   1404
## solid                        17    119
## tts                          45    535
## around                      250    867
## black                       174   1537
## bother                       28     93
## came                        130    386
## chest                       308    872
## didnt                       573   1209
## easili                       70    460
## flowi                        82    549
## lower                        54    194
## portion                      50     96
## strap                        57    344
## delic                        55    249
## enough                      124   1306
## got                         300   2111
## holiday                      13    142
## larger                      105    447
## offic                        13    161
## opinion                      30    139
## perhap                       57    123
## problem                     164    432
## terribl                      83     49
## type                        100    343
## waist                       492   2017
## bad                         220    214
## bodi                        277    903
## figur                       126    489
## happi                        33    671
## hip                         230    825
## hourglass                    50    164
## put                         186    479
## roomi                        27    264
## shape                       327    905
## still                       279   1471
## straight                     72    213
## tell                         44    163
## took                         82    374
## way                         646   1328
## wouldnt                     105    224
## X100                         32    113
## ankl                         30    313
## averag                       30     89
## can                         243   2893
## good                        401   1564
## height                       22    122
## hem                          86    524
## imagin                       74    128
## larg                        534   2257
## least                       105    220
## may                         124    681
## open                        126    385
## pleat                        59    190
## taller                       53    121
## think                       379   2031
## blous                       189    823
## cant                        162    782
## chanc                        25    116
## clean                        49    143
## drape                        81    573
## model                       428    988
## tuck                         38    232
## white                       209   1236
## wrong                        82    132
## X135                         24    163
## coat                        107    414
## cold                         53    191
## cozi                         27    318
## day                         127    988
## dri                         100    282
## jean                        182   2411
## outfit                       19    242
## super                       203   1485
## differ                      149    839
## form                         34    163
## new                          41    346
## someth                      161    506
## fix                          32    103
## pregnant                     48     90
## simpl                        16    271
## tailor                       40    202
## although                     81    340
## except                       68    245
## keep                        186   1026
## price                       329    984
## tad                          36    287
## thought                     360    745
## tini                         41    217
## uniqu                        54    711
## vibrant                      46    262
## wish                        132    819
## awkward                     110     98
## exchang                      40    273
## first                       281    924
## knew                         45    182
## next.                        33    170
## purchas                     257   2078
## sad                         292    229
## second                       84    179
## side                        278   1000
## slip                        109    423
## appear                      127    259
## cute                        531   2493
## dont                        329   1474
## pictur                      453   1222
## button                      229    897
## care                         45    215
## havent                       14    166
## need                        231   1277
## occas                         5    164
## saw                         153    902
## seethrough                   45    127
## vest                         33    322
## wedg                          3    120
## wore                        138    792
## yet                          44    539
## better                      278    953
## fell                         68    287
## felt                        283    482
## flat                         94    379
## much                        720   1982
## wider                        40    100
## arriv                       165    278
## excit                       233    204
## inch                        107    396
## move                         43    132
## short                       501   1870
## sweater                     493   2196
## three                        42    120
## wait                         70    637
## week                         39    183
## weight                       90    788
## wide                        252    465
## anyth                        76    326
## blue                        162   1071
## couldnt                     170    313
## dark                         51    390
## either                       94    307
## end                         144    527
## expect                      260    613
## thick                       114    461
## buy                         141   1021
## especi                      109    411
## hesit                         9    171
## mani                         89   1001
## wont                         37    227
## boxi                        239    459
## camisol                      20    111
## classic                      11    299
## comfi                        50    797
## crop                         57    316
## jacket                      120   1164
## knit                         86    418
## mention                      71    258
## other                        64    245
## part                        152    463
## show                        177    725
## thin                        344    676
## warm                         54    631
## belli                        28    124
## blazer                       28    162
## carri                        17    108
## given                        33     89
## navi                         34    500
## stripe                       67    309
## ago                          19    119
## call                         24     99
## els                          65    162
## final                        40    194
## guess                        45    105
## huge                        298    239
## issu                        104    435
## itchi                        91    188
## month                        56    227
## sale                        187   1174
## wool                         50    174
## worth                       133    491
## boot                         15    595
## necklac                      11    155
## soft                        306   3069
## tank                         85    625
## season                       19    358
## X130                         33    131
## amaz                         39    514
## gray                         34    301
## ill                          86    363
## photo                       295    760
## skinni                       47    559
## snap                         19    120
## winter                       31    444
## stretch                     133    774
## casual                       52   1020
## cut                         438   1390
## grey                         52    432
## necklin                     108    439
## peplum                       43    148
## pilcro                       42    277
## shoulder                    303   1037
## embroideri                   52    386
## heel                          8    394
## reason                       61    197
## worn                         80   1046
## across                       94    245
## babi                         19    156
## cream                        22    181
## roll                         25    181
## spring                       19    452
## along                        43    126
## consid                       63    232
## dot                          22    114
## hous                         28    104
## might                       153    605
## mind                         41    184
## natur                        29    141
## overs                        62    236
## pound                        72    229
## sheer                       136    511
## walk                         27    172
## general                      36    161
## local                        38    296
## probabl                     105    597
## tee                          69    646
## today                        56    349
## item                        123    328
## mine                         36    221
## seem                        302    769
## recommend                   113    989
## start                        74    123
## pop                          21    132
## ador                         92    600
## collar                       46    199
## rather                       89    231
## receiv                      215    799
## best                         54    455
## summer                      108   1366
## transit                      17    217
## wash                        342    841
## X140                         33    143
## favorit                      43    532
## snag                         65    162
## actual                      137    619
## add                          33    557
## creat                        33    109
## detail                      152   1404
## done                         27    116
## help                         71    258
## hide                         26    242
## interest                     44    284
## ive                         130    786
## mean                         50    136
## neck                        145    456
## seam                        141    225
## two                         198    569
## wardrob                      19    330
## armhol                       63    175
## bare                         70    124
## bra                         157    768
## hang                        139    449
## lightweight                  44    469
## now                         123    583
## slight                       94    864
## that                         93    372
## there                        63    172
## extrem                      126    323
## howev                       483    998
## ladi                         36    146
## pass                         31    144
## seen                         31    127
## without                     108    890
## lay                          97    178
## ruffl                        53    175
## belt                         77    435
## elast                        55    200
## plenti                       10    126
## sit                          51    207
## sort                         50    118
## waistband                    38    122
## night                        19    285
## tag                          68    113
## simpli                       31    101
## stun                         22    269
## give                        127    821
## left                         39    151
## thread                       72    130
## pattern                     199    913
## recent                       19    144
## torso                        99    346
## floral                       18    157
## arent                        18    123
## almost                      140    538
## extra                       126    573
## gotten                       29    182
## lace                        116    601
## mail                         64    141
## similar                      46    180
## sometim                      23    259
## stretchi                     46    325
## tshirt                       42    282
## eye                          44    262
## difficult                    31    110
## higher                       39    204
## must                         57    249
## unfortun                    357    298
## pocket                      101    610
## stand                        38    131
## stay                         39    198
## strang                      105     82
## weird                       123    133
## hung                         88     84
## swing                        56    288
## anyon                        58    108
## bigger                       53    213
## cami                         63    389
## X34c                         30    139
## anoth                       138    617
## goe                          72    373
## uncomfort                    53     84
## believ                       40    108
## fan                          41    138
## surpris                      55    481
## comment                      22    117
## descript                     41     93
## immedi                       63    206
## let                          29    122
## read                         93    303
## whole                        48    110
## come                        117    598
## piec                        141   1075
## arm                         397   1061
## fine                        124    512
## normal                      209   1144
## origin                       23    133
## plan                         33    362
## eleg                          8    234
## grab                          9    128
## stylish                      14    301
## appropri                     27    172
## brown                        22    174
## bulki                        73    135
## heavi                       115    383
## option                       26    171
## orang                        46    268
## rest                         51    101
## touch                        34    319
## concern                      16    140
## hole                        179    301
## slit                         39    202
## deep                         29    176
## looser                       16    160
## neutral                      14    209
## room                         56    445
## spot                         44    194
## X115                         22    222
## instead                      65    204
## tunic                        67    413
## curvi                        87    332
## linen                        13    188
## quit                        159    649
## life                         33     89
## otherwis                     64    290
## prettier                      7    121
## real                         52    121
## textur                       43    267
## caught                       16    126
## far                          49    218
## possibl                      36    106
## prefer                       30    293
## versatil                     20    583
## fantast                      14    150
## green                        74    654
## pleas                        32    315
## state                        39    116
## tone                         13    128
## daughter                     18    118
## tent                         82     80
## red                         100    620
## easi                         36    687
## exact                        51    325
## home                         45    155
## near                         45    118
## upper                        59    144
## forward                      35    151
## despit                       36     87
## construct                    34    120
## refer                       114    475
## unflatt                     226    107
## darker                       32    138
## gave                         39    171
## star                         23    203
## cuff                         21    133
## insid                        38    175
## ivori                        32    206
## tend                         17    147
## hot                          26    300
## weather                      17    363
## poor                        132     29
## plus                         62    237
## X120                         38    171
## broad                        27    187
## total                        65    209
## hour                         36     95
## cloth                        90    360
## send                         93    120
## stiff                       102    134
## X34d                         27    112
## subtl                         8    189
## closet                       14    192
## wrinkl                       58    246
## note                         41    183
## thing                       151    725
## previous                     66    214
## girl                         55    218
## place                        95    290
## unless                       57     98
## odd                         149    137
## cover                        60    371
## shrink                       22    122
## yes                          20    163
## basic                        54    245
## point                        66    169
## tummi                        13    145
## wow                          22    105
## close                        79    230
## hard                         76    310
## cotton                       70    393
## mayb                        196    356
## thicker                      32    158
## denim                        55    505
## silhouett                    21    117
## pink                         59    439
## scratchi                     63     88
## kind                        108    347
## abl                          58    365
## compar                       28    101
## justic                        6    186
## pic                          20    137
## rich                         17    191
## shown                        92    301
## chang                        18    117
## stitch                       57    190
## know                        104    412
## low                         118    460
## agre                         73    310
## skin                         53    279
## suit                        113    307
## base                         69    242
## dinner                        8    137
## ran                          96    217
## leather                      11    141
## combin                       25    132
## anyway                       30    173
## realiz                       41    110
## notic                        78    245
## bright                       45    246
## alin                         25    109
## coverag                      18    131
## maev                         41    137
## band                         39    153
## romper                       36    163
## friend                       10    127
## weekend                       7    185
## thank                        16    186
## sweatshirt                   33    136
## curv                         41    225
## depend                       15    213
## middl                        57    114
## structur                     40    181
## flower                       27    162
## pay                          35    120
## shorter                     134    472
## cooler                        1    123
## live                         30    170
## heavier                      39    127
## gone                         32    136
## worri                        21    303
## edg                          32    121
## complaint                    12    212
## special                      25    229
## peopl                        49    180
## suppos                       38    111
## swim                         54    101
## allow                        11    126
## X510                         32    131
## amount                       33    182
## throw                        11    212
## isnt                        129    383
## shade                        17    136
## women                        32    125
## dressi                       17    202
## coupl                        29    181
## yellow                       46    224
## bodic                        32    143
## butt                         24    129
## stick                        43     88
## forgiv                       11    128
## keeper                        6    126
## slim                         63    457
## sandal                        7    241
## hand                         62    161
## event                         8    143
## airi                         13    124
## thigh                        76    203
## addit                        52    225
## athlet                       39    180
## lighter                      25    121
## flow                         62    439
## particular                   43    113
## rise                         19    143
## bead                         33    152
## bum                          42     96
## everywher                    51    125
## hold                         56    256
## rib                          35    102
## theyr                        45    263
## gold                         15    145
## substanti                    25    191
## bunch                        52     90
## stock                        24    110
## head                         40     87
## incred                       51    219
## hug                          17    164
## leav                         43    108
## maxi                         26    115
## wed                          32    256
## ever                         48    271
## miss                         62     90
## stapl                        13    245
## late                         25    104
## yesterday                    17    104
## soon                         35    163
## version                      45    249
## fair                         48    182
## nude                         15    168
## awesom                       18    201
## slender                      31    109
## cool                         45    337
## motif                        22    149
## chic                          9    149
## avail                        35    251
## matern                       93     92
## wrap                         26    110
## clingi                       24    123
## build                        38    148
## excel                        11    141
## attract                      30    114
## mid                          20    111
## bag                          59     90
## complet                     115    170
## booti                        10    274
## husband                      33    127
## classi                        6    166
## offer                        18    108
## stomach                      32     97
## often                        16    212
## relax                        18    147
## deal                         29    140
## mix                          17    123
## togeth                       27    142
## meant                        19    137
## glove                         8    134
## beach                        17    123
## shop                         17    112
## everyon                      14    119
## becom                        14    117
## expens                       27    111
## frumpi                       53     91
## money                        62     71
## suggest                      29    124
## requir                       21    121
## silk                         30    153
## purpl                        21    146
## swingi                       19    101
## Age                      176849 837712
## Department.Name_              0     14
## Department.Name_Bottoms     565   3234
## Department.Name_Dresses    1212   5107
## Department.Name_Intimate    260   1475
## Department.Name_Jackets     169    863
## Department.Name_Tops       1935   8533
## Department.Name_Trend        31     88
#The two steps below are for modelling and evaluation purpose.
library(caTools)
set.seed(100)
split = sample.split(FinalData$recommended_id, SplitRatio = 0.7)
trainSparse = subset(FinalData, split==TRUE)
testSparse = subset(FinalData, split==FALSE)
x_train <- subset(trainSparse, select = -c(recommended_id))
y_train <- subset(trainSparse, select = c(recommended_id))

y_test <- subset(testSparse, select = c(recommended_id))
x_test <- subset(testSparse, select = -c(recommended_id))