Following package have been used to analysis the data set.
library(readr) # Useful for importing data
library(knitr) # Useful for creating nice tables
library(dplyr) # Useful for data manipulation
Women’s Clothing E-Commerce data set about the reviews given by each customer about the apparel product. As this is real commercial data, references to the company in the review text have been anonymized and with “retailer”.
This data has been sourced from the following link:
Dataset consists of 23486 rows and 11 feature variables. Each entry reflects individual customer’s review.
Base R function readr has been used retrieve the data from the working directory.
Womens_Clothing <- read_csv("~/Desktop/Womens Clothing E-Commerce Reviews.csv")
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## `Clothing ID` = col_integer(),
## Age = col_integer(),
## Title = col_character(),
## `Review Text` = col_character(),
## Rating = col_integer(),
## `Recommended IND` = col_integer(),
## `Positive Feedback Count` = col_integer(),
## `Division Name` = col_character(),
## `Department Name` = col_character(),
## `Class Name` = col_character()
## )
head(Womens_Clothing)
str(Womens_Clothing)
## Classes 'tbl_df', 'tbl' and 'data.frame': 23486 obs. of 11 variables:
## $ X1 : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Clothing ID : int 767 1080 1077 1049 847 1080 858 858 1077 1077 ...
## $ Age : int 33 34 60 50 47 49 39 39 24 34 ...
## $ Title : chr NA NA "Some major design flaws" "My favorite buy!" ...
## $ Review Text : chr "Absolutely wonderful - silky and sexy and comfortable" "Love this dress! it's sooo pretty. i happened to find it in a store, and i'm glad i did bc i never would have"| __truncated__ "I had such high hopes for this dress and really wanted it to work for me. i initially ordered the petite small "| __truncated__ "I love, love, love this jumpsuit. it's fun, flirty, and fabulous! every time i wear it, i get nothing but great compliments!" ...
## $ Rating : int 4 5 3 5 5 2 5 4 5 5 ...
## $ Recommended IND : int 1 1 0 1 1 0 1 1 1 1 ...
## $ Positive Feedback Count: int 0 4 0 0 6 4 1 4 0 0 ...
## $ Division Name : chr "Initmates" "General" "General" "General Petite" ...
## $ Department Name : chr "Intimate" "Dresses" "Dresses" "Bottoms" ...
## $ Class Name : chr "Intimates" "Dresses" "Dresses" "Pants" ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 11
## .. ..$ X1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Clothing ID : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Age : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Title : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Review Text : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Rating : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Recommended IND : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Positive Feedback Count: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Division Name : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Department Name : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Class Name : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
In this section we have subset the data frame using first 10 observations along with all variables.
Womens_Clothing10 <- Womens_Clothing[1:10,]
Womens_Clothing10
Womens_Clothing10_mattrix <- as.matrix(Womens_Clothing10)
str(Womens_Clothing10_mattrix)
## chr [1:10, 1:11] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" " 767" ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:11] "X1" "Clothing ID" "Age" "Title" ...
attributes(Womens_Clothing10_mattrix)
## $dim
## [1] 10 11
##
## $dimnames
## $dimnames[[1]]
## NULL
##
## $dimnames[[2]]
## [1] "X1" "Clothing ID"
## [3] "Age" "Title"
## [5] "Review Text" "Rating"
## [7] "Recommended IND" "Positive Feedback Count"
## [9] "Division Name" "Department Name"
## [11] "Class Name"
New data frame has been created which consist of only the first and the last variable of the data set.
Womens_Clothing_First_Last <- (Womens_Clothing[c(1,11)])
Womens_Clothing_First_Last
save(Womens_Clothing_First_Last, file = "Womens_Clothing_First_Last_frame.RData")
New data frame created with 2 variables and 4 observations.
Data_frame1 <- data.frame (col1 = 1:4,
col2 = c ("Melbourne", "Newyork", "Londan", "Bangalore"))
str(Data_frame1)
## 'data.frame': 4 obs. of 2 variables:
## $ col1: int 1 2 3 4
## $ col2: Factor w/ 4 levels "Bangalore","Londan",..: 3 4 2 1
V3 <- c (37, 36, 41,29)
Data_frame2 <- cbind(Data_frame1, V3)
colnames(Data_frame2) <- c("Number", "City", "Average age in each city")
str(Data_frame2)
## 'data.frame': 4 obs. of 3 variables:
## $ Number : int 1 2 3 4
## $ City : Factor w/ 4 levels "Bangalore","Londan",..: 3 4 2 1
## $ Average age in each city: num 37 36 41 29