# Analizaremos el dataset: "Women's E-Commerce Clothing Reviews and Ratings".
# Este dataset contiene mas de 23,000 reviews online de ropa de mujeres
# de varios retailers. Como se menciona en la sección "Overview de Kaggle.com
# el dataset contiene las siguientes variables
# URL dataset de Kaggle: https://www.kaggle.com/nicapotato/womens-ecommerce-clothing-reviews
# Variables:
# Clothing ID
# Age (of the reviewer)
# Title (of review)
# Review
# Rating (out of 5-stars)
# Recommendation index (i.e. whether customer would recommend this product to others: yes= 1/no = 0 )
# Positive Feedback Count (the number of readers who found the review useful)
# Division name (e.g. General Petite, Intimates)
# Department name (e.g. Jackets, Tops, Bottoms)
# Class name (e.g. Blouses, Casual bottoms, Skirts...)
# Comenzamos cargando los datos en R
url="https://raw.githubusercontent.com/msanchez50/Train/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
clothes=read.csv(url)
str(clothes)
## 'data.frame': 23486 obs. of 11 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Clothing.ID : int 767 1080 1077 1049 847 1080 858 858 1077 1077 ...
## $ Age : int 33 34 60 50 47 49 39 39 24 34 ...
## $ Title : Factor w/ 13994 levels "","\"beach business\"",..: 1 1 11451 8055 4365 8769 1973 10671 4299 11765 ...
## $ Review.Text : Factor w/ 22635 levels "","- this really is lovely. the overall design from the arms, front, and back makes this poncho unique. it's not t"| __truncated__,..: 247 13179 5545 8025 20324 7987 3330 8850 7378 2671 ...
## $ Rating : int 4 5 3 5 5 2 5 4 5 5 ...
## $ Recommended.IND : int 1 1 0 1 1 0 1 1 1 1 ...
## $ Positive.Feedback.Count: int 0 4 0 0 6 4 1 4 0 0 ...
## $ Division.Name : Factor w/ 4 levels "","General","General Petite",..: 4 2 2 3 2 2 3 3 2 2 ...
## $ Department.Name : Factor w/ 7 levels "","Bottoms","Dresses",..: 4 3 3 2 6 3 6 6 3 3 ...
## $ Class.Name : Factor w/ 21 levels "","Blouses","Casual bottoms",..: 7 5 5 15 2 5 10 10 5 5 ...
clothes$X <- NULL
colnames(clothes) <- c('ID', 'Age', 'Title', 'Review', 'Rating', 'Recommend', 'Liked', 'Division', 'Dept', 'Class')
str(clothes)
## 'data.frame': 23486 obs. of 10 variables:
## $ ID : int 767 1080 1077 1049 847 1080 858 858 1077 1077 ...
## $ Age : int 33 34 60 50 47 49 39 39 24 34 ...
## $ Title : Factor w/ 13994 levels "","\"beach business\"",..: 1 1 11451 8055 4365 8769 1973 10671 4299 11765 ...
## $ Review : Factor w/ 22635 levels "","- this really is lovely. the overall design from the arms, front, and back makes this poncho unique. it's not t"| __truncated__,..: 247 13179 5545 8025 20324 7987 3330 8850 7378 2671 ...
## $ Rating : int 4 5 3 5 5 2 5 4 5 5 ...
## $ Recommend: int 1 1 0 1 1 0 1 1 1 1 ...
## $ Liked : int 0 4 0 0 6 4 1 4 0 0 ...
## $ Division : Factor w/ 4 levels "","General","General Petite",..: 4 2 2 3 2 2 3 3 2 2 ...
## $ Dept : Factor w/ 7 levels "","Bottoms","Dresses",..: 4 3 3 2 6 3 6 6 3 3 ...
## $ Class : Factor w/ 21 levels "","Blouses","Casual bottoms",..: 7 5 5 15 2 5 10 10 5 5 ...
summary(clothes)
## ID Age Title
## Min. : 0.0 Min. :18.0 : 3810
## 1st Qu.: 861.0 1st Qu.:34.0 Love it! : 136
## Median : 936.0 Median :41.0 Beautiful : 95
## Mean : 918.1 Mean :43.2 Love : 88
## 3rd Qu.:1078.0 3rd Qu.:52.0 Love! : 84
## Max. :1205.0 Max. :99.0 Beautiful!: 72
## (Other) :19201
## Review
## : 845
## Perfect fit and i've gotten so many compliments. i buy all my suits from here now! : 3
## I bought this shirt at the store and after going home and trying it on, i promptly went online and ordered two more! i've gotten multiple compliments anytime i wear any of them. great for looking put together with no fuss. \npeople that have commented there's were destroyed in the wash didn't read the care label which says dry clean. : 2
## I purchased this and another eva franco dress during retailer's recent 20% off sale. i was looking for dresses that were work appropriate, but that would also transition well to happy hour or date night. they both seemed to be just what i was looking for. i ordered a 4 regular and a 6 regular, as i am usually in between sizes. the 4 was definitely too small. the 6 fit, technically, but was very ill fitting. not only is the dress itself short, but it is very short-waisted. i am only 5'3", but it fe: 2
## Lightweight, soft cotton top and shorts. i think it's meant to be a beach cover-up but i'm wearing it as a thin, light-weight summer outfit on these hot hot days. the top has a loose elastic around the bottom which i didn't realize when i ordered it, but i like it and it matches the look in the photos. and the shorts are very low-cut - don't expect them up around your waist. again, i like that. some might want to wear a cami underneath because it's a thin cotton but i'm fine as-is. i bought it i : 2
## Love, love these jeans. being short they come right to my ankle. super soft and don?t require any hemming. i ordered my typical jean size of 26 and they fit like a glove. would love to have these in black and grey. : 2
## (Other) :22630
## Rating Recommend Liked Division
## Min. :1.000 Min. :0.0000 Min. : 0.000 : 14
## 1st Qu.:4.000 1st Qu.:1.0000 1st Qu.: 0.000 General :13850
## Median :5.000 Median :1.0000 Median : 1.000 General Petite: 8120
## Mean :4.196 Mean :0.8224 Mean : 2.536 Initmates : 1502
## 3rd Qu.:5.000 3rd Qu.:1.0000 3rd Qu.: 3.000
## Max. :5.000 Max. :1.0000 Max. :122.000
##
## Dept Class
## : 14 Dresses :6319
## Bottoms : 3799 Knits :4843
## Dresses : 6319 Blouses :3097
## Intimate: 1735 Sweaters:1428
## Jackets : 1032 Pants :1388
## Tops :10468 Jeans :1147
## Trend : 119 (Other) :5264
library(gtrendsR)
clothes_trend <- gtrends(c("dresses", "bottoms", "blouses", "casual bottoms"))
plot(clothes_trend)

str(clothes_trend)
## List of 7
## $ interest_over_time :'data.frame': 1040 obs. of 6 variables:
## ..$ date : POSIXct[1:1040], format: "2013-12-01" ...
## ..$ hits : chr [1:1040] "67" "65" "57" "59" ...
## ..$ keyword : chr [1:1040] "dresses" "dresses" "dresses" "dresses" ...
## ..$ geo : chr [1:1040] "world" "world" "world" "world" ...
## ..$ gprop : chr [1:1040] "web" "web" "web" "web" ...
## ..$ category: int [1:1040] 0 0 0 0 0 0 0 0 0 0 ...
## $ interest_by_country:'data.frame': 1000 obs. of 5 variables:
## ..$ location: chr [1:1000] "Guyana" "Antigua & Barbuda" "Anguilla" "Botswana" ...
## ..$ hits : chr [1:1000] "" "" "" "" ...
## ..$ keyword : chr [1:1000] "dresses" "dresses" "dresses" "dresses" ...
## ..$ geo : chr [1:1000] "world" "world" "world" "world" ...
## ..$ gprop : chr [1:1000] "web" "web" "web" "web" ...
## $ interest_by_region : NULL
## $ interest_by_dma :'data.frame': 1224 obs. of 5 variables:
## ..$ location: chr [1:1224] "Greenwood-Greenville MS" "Monroe LA-El Dorado AR" "Lafayette LA" "Albany GA" ...
## ..$ hits : chr [1:1224] "100" "81" "79" "79" ...
## ..$ keyword : chr [1:1224] "dresses" "dresses" "dresses" "dresses" ...
## ..$ geo : chr [1:1224] "world" "world" "world" "world" ...
## ..$ gprop : chr [1:1224] "web" "web" "web" "web" ...
## $ interest_by_city :'data.frame': 208 obs. of 5 variables:
## ..$ location: chr [1:208] "Faisalabad" "Rawalpindi" "Belfast" "Lahore" ...
## ..$ hits : int [1:208] NA NA NA 100 97 NA NA 90 88 NA ...
## ..$ keyword : chr [1:208] "dresses" "dresses" "dresses" "dresses" ...
## ..$ geo : chr [1:208] "world" "world" "world" "world" ...
## ..$ gprop : chr [1:208] "web" "web" "web" "web" ...
## $ related_topics : NULL
## $ related_queries :'data.frame': 150 obs. of 5 variables:
## ..$ subject : chr [1:150] "100" "99" "58" "33" ...
## ..$ related_queries: chr [1:150] "top" "top" "top" "top" ...
## ..$ value : chr [1:150] "wedding dresses" "dress" "prom dresses" "bridesmaid dresses" ...
## ..$ keyword : chr [1:150] "dresses" "dresses" "dresses" "dresses" ...
## ..$ category : int [1:150] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "reshapeLong")=List of 4
## .. ..$ varying:List of 1
## .. .. ..$ value: chr "top"
## .. .. ..- attr(*, "v.names")= chr "value"
## .. .. ..- attr(*, "times")= chr "top"
## .. ..$ v.names: chr "value"
## .. ..$ idvar : chr "id"
## .. ..$ timevar: chr "related_queries"
## - attr(*, "class")= chr [1:2] "gtrends" "list"
library(ggplot2)
sp1 <- ggplot(clothes_trend$interest_by_country)
sp1 + geom_boxplot(aes(keyword, as.numeric(hits)))
## Warning in FUN(X[[i]], ...): NAs introducidos por coerción
## Warning in FUN(X[[i]], ...): NAs introducidos por coerción
## Warning: Removed 888 rows containing non-finite values (stat_boxplot).

sp2 <- ggplot(clothes_trend$interest_by_dma)
sp2 + geom_boxplot(aes(keyword, hits))

sp3 <- ggplot(subset(clothes_trend$interest_by_city, !is.na(clothes_trend$interest_by_city$hits)))
sp3 + geom_boxplot(aes(location, hits)) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

#Library:
library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## v tibble 1.4.2 v purrr 0.2.5
## v tidyr 0.8.2 v dplyr 0.7.8
## v readr 1.2.1 v stringr 1.3.1
## v tibble 1.4.2 v forcats 0.3.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidytext)
library(dplyr)
library(purrr)
library(stringr)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggplot2)
library(wordcloud2)
library(ggraph)
library(topicmodels)
#