proyectoMarcos.R

# Analizaremos el dataset: "Women's E-Commerce Clothing Reviews and Ratings".
# Este dataset contiene mas de 23,000 reviews online de ropa de mujeres
# de varios retailers. Como se menciona en la secciÃ³n "Overview de Kaggle.com
# el dataset contiene las siguientes variables
# URL dataset de Kaggle: https://www.kaggle.com/nicapotato/womens-ecommerce-clothing-reviews

# Variables:
# Clothing ID
# Age (of the reviewer)
# Title (of review)
# Review
# Rating (out of 5-stars)
# Recommendation index (i.e. whether customer would recommend this product to others: yes= 1/no = 0 )
# Positive Feedback Count (the number of readers who found the review useful)
# Division name (e.g. General Petite, Intimates)
# Department name (e.g. Jackets, Tops, Bottoms)
# Class name (e.g. Blouses, Casual bottoms, Skirts...)

# Comenzamos cargando los datos en R
url="https://raw.githubusercontent.com/msanchez50/Train/master/Womens%20Clothing%20E-Commerce%20Reviews.csv"
clothes=read.csv(url)
str(clothes)

## 'data.frame':    23486 obs. of  11 variables:
##  $ X                      : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Clothing.ID            : int  767 1080 1077 1049 847 1080 858 858 1077 1077 ...
##  $ Age                    : int  33 34 60 50 47 49 39 39 24 34 ...
##  $ Title                  : Factor w/ 13994 levels "","\"beach business\"",..: 1 1 11451 8055 4365 8769 1973 10671 4299 11765 ...
##  $ Review.Text            : Factor w/ 22635 levels "","- this really is lovely. the overall design from the arms, front, and back makes this poncho unique. it's not t"| __truncated__,..: 247 13179 5545 8025 20324 7987 3330 8850 7378 2671 ...
##  $ Rating                 : int  4 5 3 5 5 2 5 4 5 5 ...
##  $ Recommended.IND        : int  1 1 0 1 1 0 1 1 1 1 ...
##  $ Positive.Feedback.Count: int  0 4 0 0 6 4 1 4 0 0 ...
##  $ Division.Name          : Factor w/ 4 levels "","General","General Petite",..: 4 2 2 3 2 2 3 3 2 2 ...
##  $ Department.Name        : Factor w/ 7 levels "","Bottoms","Dresses",..: 4 3 3 2 6 3 6 6 3 3 ...
##  $ Class.Name             : Factor w/ 21 levels "","Blouses","Casual bottoms",..: 7 5 5 15 2 5 10 10 5 5 ...

clothes$X <- NULL
colnames(clothes) <- c('ID', 'Age', 'Title', 'Review', 'Rating', 'Recommend', 'Liked', 'Division', 'Dept', 'Class')
str(clothes)

## 'data.frame':    23486 obs. of  10 variables:
##  $ ID       : int  767 1080 1077 1049 847 1080 858 858 1077 1077 ...
##  $ Age      : int  33 34 60 50 47 49 39 39 24 34 ...
##  $ Title    : Factor w/ 13994 levels "","\"beach business\"",..: 1 1 11451 8055 4365 8769 1973 10671 4299 11765 ...
##  $ Review   : Factor w/ 22635 levels "","- this really is lovely. the overall design from the arms, front, and back makes this poncho unique. it's not t"| __truncated__,..: 247 13179 5545 8025 20324 7987 3330 8850 7378 2671 ...
##  $ Rating   : int  4 5 3 5 5 2 5 4 5 5 ...
##  $ Recommend: int  1 1 0 1 1 0 1 1 1 1 ...
##  $ Liked    : int  0 4 0 0 6 4 1 4 0 0 ...
##  $ Division : Factor w/ 4 levels "","General","General Petite",..: 4 2 2 3 2 2 3 3 2 2 ...
##  $ Dept     : Factor w/ 7 levels "","Bottoms","Dresses",..: 4 3 3 2 6 3 6 6 3 3 ...
##  $ Class    : Factor w/ 21 levels "","Blouses","Casual bottoms",..: 7 5 5 15 2 5 10 10 5 5 ...

summary(clothes)

##        ID              Age              Title      
##  Min.   :   0.0   Min.   :18.0             : 3810  
##  1st Qu.: 861.0   1st Qu.:34.0   Love it!  :  136  
##  Median : 936.0   Median :41.0   Beautiful :   95  
##  Mean   : 918.1   Mean   :43.2   Love      :   88  
##  3rd Qu.:1078.0   3rd Qu.:52.0   Love!     :   84  
##  Max.   :1205.0   Max.   :99.0   Beautiful!:   72  
##                                  (Other)   :19201  
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Review     
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        :  845  
##  Perfect fit and i've gotten so many compliments. i buy all my suits from here now!                                                                                                                                                                                                                                                                                                                                                                                                                                    :    3  
##  I bought this shirt at the store and after going home and trying it on, i promptly went online and ordered two more! i've gotten multiple compliments anytime i wear any of them. great for looking put together with no fuss. \npeople that have commented there's were destroyed in the wash didn't read the care label which says dry clean.                                                                                                                                                                       :    2  
##  I purchased this and another eva franco dress during retailer's recent 20% off sale. i was looking for dresses that were work appropriate, but that would also transition well to happy hour or date night. they both seemed to be just what i was looking for. i ordered a 4 regular and a 6 regular, as i am usually in between sizes. the 4 was definitely too small. the 6 fit, technically, but was very ill fitting. not only is the dress itself short, but it is very short-waisted. i am only 5'3", but it fe:    2  
##  Lightweight, soft cotton top and shorts. i think it's meant to be a beach cover-up but i'm wearing it as a thin, light-weight summer outfit on these hot hot days. the top has a loose elastic around the bottom which i didn't realize when i ordered it, but i like it and it matches the look in the photos. and the shorts are very low-cut - don't expect them up around your waist. again, i like that. some might want to wear a cami underneath because it's a thin cotton but i'm fine as-is. i bought it i  :    2  
##  Love, love these jeans. being short they come right to my ankle. super soft and don?t require any hemming. i ordered my typical jean size of 26 and they fit like a glove. would love to have these in black and grey.                                                                                                                                                                                                                                                                                                :    2  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               :22630  
##      Rating        Recommend          Liked                   Division    
##  Min.   :1.000   Min.   :0.0000   Min.   :  0.000                 :   14  
##  1st Qu.:4.000   1st Qu.:1.0000   1st Qu.:  0.000   General       :13850  
##  Median :5.000   Median :1.0000   Median :  1.000   General Petite: 8120  
##  Mean   :4.196   Mean   :0.8224   Mean   :  2.536   Initmates     : 1502  
##  3rd Qu.:5.000   3rd Qu.:1.0000   3rd Qu.:  3.000                         
##  Max.   :5.000   Max.   :1.0000   Max.   :122.000                         
##                                                                           
##        Dept            Class     
##          :   14   Dresses :6319  
##  Bottoms : 3799   Knits   :4843  
##  Dresses : 6319   Blouses :3097  
##  Intimate: 1735   Sweaters:1428  
##  Jackets : 1032   Pants   :1388  
##  Tops    :10468   Jeans   :1147  
##  Trend   :  119   (Other) :5264

library(gtrendsR)
clothes_trend <- gtrends(c("dresses", "bottoms", "blouses", "casual bottoms"))
plot(clothes_trend)

str(clothes_trend)

## List of 7
##  $ interest_over_time :'data.frame': 1040 obs. of  6 variables:
##   ..$ date    : POSIXct[1:1040], format: "2013-12-01" ...
##   ..$ hits    : chr [1:1040] "67" "65" "57" "59" ...
##   ..$ keyword : chr [1:1040] "dresses" "dresses" "dresses" "dresses" ...
##   ..$ geo     : chr [1:1040] "world" "world" "world" "world" ...
##   ..$ gprop   : chr [1:1040] "web" "web" "web" "web" ...
##   ..$ category: int [1:1040] 0 0 0 0 0 0 0 0 0 0 ...
##  $ interest_by_country:'data.frame': 1000 obs. of  5 variables:
##   ..$ location: chr [1:1000] "Guyana" "Antigua & Barbuda" "Anguilla" "Botswana" ...
##   ..$ hits    : chr [1:1000] "" "" "" "" ...
##   ..$ keyword : chr [1:1000] "dresses" "dresses" "dresses" "dresses" ...
##   ..$ geo     : chr [1:1000] "world" "world" "world" "world" ...
##   ..$ gprop   : chr [1:1000] "web" "web" "web" "web" ...
##  $ interest_by_region : NULL
##  $ interest_by_dma    :'data.frame': 1224 obs. of  5 variables:
##   ..$ location: chr [1:1224] "Greenwood-Greenville MS" "Monroe LA-El Dorado AR" "Lafayette LA" "Albany GA" ...
##   ..$ hits    : chr [1:1224] "100" "81" "79" "79" ...
##   ..$ keyword : chr [1:1224] "dresses" "dresses" "dresses" "dresses" ...
##   ..$ geo     : chr [1:1224] "world" "world" "world" "world" ...
##   ..$ gprop   : chr [1:1224] "web" "web" "web" "web" ...
##  $ interest_by_city   :'data.frame': 208 obs. of  5 variables:
##   ..$ location: chr [1:208] "Faisalabad" "Rawalpindi" "Belfast" "Lahore" ...
##   ..$ hits    : int [1:208] NA NA NA 100 97 NA NA 90 88 NA ...
##   ..$ keyword : chr [1:208] "dresses" "dresses" "dresses" "dresses" ...
##   ..$ geo     : chr [1:208] "world" "world" "world" "world" ...
##   ..$ gprop   : chr [1:208] "web" "web" "web" "web" ...
##  $ related_topics     : NULL
##  $ related_queries    :'data.frame': 150 obs. of  5 variables:
##   ..$ subject        : chr [1:150] "100" "99" "58" "33" ...
##   ..$ related_queries: chr [1:150] "top" "top" "top" "top" ...
##   ..$ value          : chr [1:150] "wedding dresses" "dress" "prom dresses" "bridesmaid dresses" ...
##   ..$ keyword        : chr [1:150] "dresses" "dresses" "dresses" "dresses" ...
##   ..$ category       : int [1:150] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "reshapeLong")=List of 4
##   .. ..$ varying:List of 1
##   .. .. ..$ value: chr "top"
##   .. .. ..- attr(*, "v.names")= chr "value"
##   .. .. ..- attr(*, "times")= chr "top"
##   .. ..$ v.names: chr "value"
##   .. ..$ idvar  : chr "id"
##   .. ..$ timevar: chr "related_queries"
##  - attr(*, "class")= chr [1:2] "gtrends" "list"

library(ggplot2)

sp1 <- ggplot(clothes_trend$interest_by_country)
sp1 + geom_boxplot(aes(keyword, as.numeric(hits)))

## Warning in FUN(X[[i]], ...): NAs introducidos por coerción

## Warning in FUN(X[[i]], ...): NAs introducidos por coerción

## Warning: Removed 888 rows containing non-finite values (stat_boxplot).

sp2 <- ggplot(clothes_trend$interest_by_dma)
sp2 + geom_boxplot(aes(keyword, hits))

sp3 <- ggplot(subset(clothes_trend$interest_by_city, !is.na(clothes_trend$interest_by_city$hits)))
sp3 + geom_boxplot(aes(location, hits)) +  theme(axis.text.x = element_text(angle = 90, hjust = 1))

#Library:
library(tidyverse)

## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --

## v tibble  1.4.2     v purrr   0.2.5
## v tidyr   0.8.2     v dplyr   0.7.8
## v readr   1.2.1     v stringr 1.3.1
## v tibble  1.4.2     v forcats 0.3.0

## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(tidytext)
library(dplyr)
library(purrr)
library(stringr)
library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:purrr':
## 
##     compose, simplify

## The following object is masked from 'package:tidyr':
## 
##     crossing

## The following object is masked from 'package:tibble':
## 
##     as_data_frame

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(ggplot2)
library(wordcloud2)
library(ggraph)
library(topicmodels)
#

proyectoMarcos.R

marco

Tue Nov 27 18:40:43 2018