Setting Working Directory, Installing Packages, and Read Data


setwd("~/Documents/Documents/Study/Semester 5/PBA")
library(dplyr)
UCData<-read.csv("online_retail.csv",stringsAsFactors = FALSE )


Part A


head(UCData)
##   InvoiceNo StockCode                         Description Quantity  InvoiceDate
## 1    536365    85123A  WHITE HANGING HEART T-LIGHT HOLDER        6 12/1/10 8:26
## 2    536365     71053                 WHITE METAL LANTERN        6 12/1/10 8:26
## 3    536365    84406B      CREAM CUPID HEARTS COAT HANGER        8 12/1/10 8:26
## 4    536365    84029G KNITTED UNION FLAG HOT WATER BOTTLE        6 12/1/10 8:26
## 5    536365    84029E      RED WOOLLY HOTTIE WHITE HEART.        6 12/1/10 8:26
## 6    536365     22752        SET 7 BABUSHKA NESTING BOXES        2 12/1/10 8:26
##   UnitPrice CustomerID        Country
## 1      2.55      17850 United Kingdom
## 2      3.39      17850 United Kingdom
## 3      2.75      17850 United Kingdom
## 4      3.39      17850 United Kingdom
## 5      3.39      17850 United Kingdom
## 6      7.65      17850 United Kingdom
str(UCData)
## 'data.frame':    541909 obs. of  8 variables:
##  $ InvoiceNo  : chr  "536365" "536365" "536365" "536365" ...
##  $ StockCode  : chr  "85123A" "71053" "84406B" "84029G" ...
##  $ Description: chr  "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
##  $ Quantity   : int  6 6 8 6 6 2 6 6 6 32 ...
##  $ InvoiceDate: chr  "12/1/10 8:26" "12/1/10 8:26" "12/1/10 8:26" "12/1/10 8:26" ...
##  $ UnitPrice  : num  2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
##  $ CustomerID : int  17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
##  $ Country    : chr  "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...


Part B


✢ Converting Invoice Date to Date Class
UCData$InvoiceDate<-as.Date(UCData$InvoiceDate,"%m/%d/%y")
✢ Result Head()
head(UCData)
##   InvoiceNo StockCode                         Description Quantity InvoiceDate
## 1    536365    85123A  WHITE HANGING HEART T-LIGHT HOLDER        6  2010-12-01
## 2    536365     71053                 WHITE METAL LANTERN        6  2010-12-01
## 3    536365    84406B      CREAM CUPID HEARTS COAT HANGER        8  2010-12-01
## 4    536365    84029G KNITTED UNION FLAG HOT WATER BOTTLE        6  2010-12-01
## 5    536365    84029E      RED WOOLLY HOTTIE WHITE HEART.        6  2010-12-01
## 6    536365     22752        SET 7 BABUSHKA NESTING BOXES        2  2010-12-01
##   UnitPrice CustomerID        Country
## 1      2.55      17850 United Kingdom
## 2      3.39      17850 United Kingdom
## 3      2.75      17850 United Kingdom
## 4      3.39      17850 United Kingdom
## 5      3.39      17850 United Kingdom
## 6      7.65      17850 United Kingdom


✢ Subsetting Data from 2011/07 to 2011/08
Filtered <- subset(UCData,InvoiceDate>='2011-07-01' & InvoiceDate<'2011-09-01')
✢ Result Head()
head(Filtered)
##        InvoiceNo StockCode                      Description Quantity
## 245904    558638     84836      ZINC METAL HEART DECORATION       12
## 245905    558638     71459   HANGING JAM JAR T-LIGHT HOLDER       24
## 245906    558638     22784            LANTERN CREAM GAZEBO         3
## 245907    558638     23145   ZINC T-LIGHT HOLDER STAR LARGE       12
## 245908    558638     22674    FRENCH TOILET SIGN BLUE METAL       12
## 245909    558638     21174 POTTERING IN THE SHED METAL SIGN       12
##        InvoiceDate UnitPrice CustomerID        Country
## 245904  2011-07-01      1.25      16317 United Kingdom
## 245905  2011-07-01      0.85      16317 United Kingdom
## 245906  2011-07-01      4.95      16317 United Kingdom
## 245907  2011-07-01      0.95      16317 United Kingdom
## 245908  2011-07-01      1.25      16317 United Kingdom
## 245909  2011-07-01      2.08      16317 United Kingdom


✢ Unique rows of InvoiceNo
cat("Unique Rows of InvoiceNo: ",length(unique(Filtered$InvoiceNo)))
## Unique Rows of InvoiceNo:  3664


Part C


✢ Using For-Loops :


1) Quantity Mean & Unit Price Mean
lenrow<- nrow(Filtered)
total<- 0
pricetotal<- 0
for(i in 1:lenrow)
{
  total<-total+Filtered[i,"Quantity"]
  pricetotal<-pricetotal+Filtered[i,"UnitPrice"]
}
qmean <- total/lenrow
pmean <- pricetotal/lenrow
cat("Quantity Mean  : ",qmean)
## Quantity Mean  :  10.65901
cat("Unit Price Mean: ",pmean)
## Unit Price Mean:  4.308608


2) Types of each column
for(i in names(Filtered))
{
  cat(i,"type : ",class(Filtered[,i]),"\n")
}
## InvoiceNo type :  character 
## StockCode type :  character 
## Description type :  character 
## Quantity type :  integer 
## InvoiceDate type :  Date 
## UnitPrice type :  numeric 
## CustomerID type :  integer 
## Country type :  character


3) Number of unique values each column
for(i in names(Filtered))
{
  cat("Unique Values",i," :",length(unique(Filtered[,i])),"\n")
}
## Unique Values InvoiceNo  : 3664 
## Unique Values StockCode  : 2982 
## Unique Values Description  : 2953 
## Unique Values Quantity  : 287 
## Unique Values InvoiceDate  : 52 
## Unique Values UnitPrice  : 447 
## Unique Values CustomerID  : 1541 
## Unique Values Country  : 28


Part D


✢ Subset the data for UK,Netherlands,Australia
countryfiltered <- subset(Filtered,Country=="United Kingdom"|Country=="Australia"|Country=="Netherlands")


4) Report the average and standard deviation of Unit Price (Rounded to 3 Decimal Places)
cat("Average Unit Price : ",round(mean(countryfiltered$UnitPrice),digits = 3))
## Average Unit Price :  4.344
cat("Standard Deviation Unit Price : ",round(sd(countryfiltered$UnitPrice),digits = 3))
## Standard Deviation Unit Price :  98.961


5) The number of unique transactions
cat("Number of unique transactions :",length(unique(countryfiltered$InvoiceNo)))
## Number of unique transactions : 3332


6) The number of customers residing in these countries who made transactions in July and August of 2011 (CustomerID “NA” is included as 1)
cat("Number of Customers :",length(unique(countryfiltered$CustomerID)))
## Number of Customers : 1380


Part E


7) Number of customer who made refunds exclude the NA using date subset
customer<-subset(Filtered,substr(Filtered$InvoiceNo,1,1)=='C')
customer<-na.omit(customer)
cat("Number of customer who made refunds exclude NA :",length(unique(customer$CustomerID)))
## Number of customer who made refunds exclude NA : 381


✢ Assign ID to cust_refund
cust_refund = unique(customer[["CustomerID"]])


Part F


✢ Creating Variable Sales using data from date subset (Quantity x UnitPrice)
unknown<-filter(Filtered,is.na(CustomerID))
sales<-unknown$Quantity*unknown$UnitPrice


8) Total Sales for those without CustomerID
cat("Total Sales for those without CustomerID:",sum(sales))
## Total Sales for those without CustomerID: 173374.1


9) Number of Transaction without CustomerID
cat("Number of Transaction without CustomerID:",length(unique(unknown$InvoiceNo)))
## Number of Transaction without CustomerID: 527


EXTRA CREDITS


EC1) Monthly Aggregate Spending for Each Customer
ec_sales<-Filtered$Quantity*Filtered$UnitPrice
Filtered$Sales = ec_sales
Filtered$Year<-strftime(Filtered$InvoiceDate, "%Y")   
Filtered$Month<-strftime(Filtered$InvoiceDate, "%m")   
monthly_aggregate<-aggregate(Sales ~ CustomerID+Year+Month, data = Filtered, FUN = sum, na.rm = TRUE)
head(monthly_aggregate)
##   CustomerID Year Month   Sales
## 1      12358 2011    07  484.86
## 2      12362 2011    07  303.76
## 3      12379 2011    07   -1.95
## 4      12388 2011    07  902.09
## 5      12397 2011    07 1214.81
## 6      12405 2011    07 1710.39


EC2) Monthly purchase amount of the five customers who have spent the most money in July 2011 (starting from 5th customer)
july_data<-subset(monthly_aggregate,monthly_aggregate$Month == "07")
tail(july_data[order(july_data$Sales),],5)
##     CustomerID Year Month    Sales
## 316      14088 2011    07  9038.69
## 935      17949 2011    07 11590.58
## 459      14911 2011    07 13445.33
## 957      18102 2011    07 19889.16
## 327      14156 2011    07 26464.99