Setting Working Directory, Installing Packages, and Read
Data
setwd("~/Documents/Documents/Study/Semester 5/PBA")
library(dplyr)
UCData<-read.csv("online_retail.csv",stringsAsFactors = FALSE )
Part A
head(UCData)
## InvoiceNo StockCode Description Quantity InvoiceDate
## 1 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 12/1/10 8:26
## 2 536365 71053 WHITE METAL LANTERN 6 12/1/10 8:26
## 3 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 12/1/10 8:26
## 4 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 12/1/10 8:26
## 5 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 12/1/10 8:26
## 6 536365 22752 SET 7 BABUSHKA NESTING BOXES 2 12/1/10 8:26
## UnitPrice CustomerID Country
## 1 2.55 17850 United Kingdom
## 2 3.39 17850 United Kingdom
## 3 2.75 17850 United Kingdom
## 4 3.39 17850 United Kingdom
## 5 3.39 17850 United Kingdom
## 6 7.65 17850 United Kingdom
str(UCData)
## 'data.frame': 541909 obs. of 8 variables:
## $ InvoiceNo : chr "536365" "536365" "536365" "536365" ...
## $ StockCode : chr "85123A" "71053" "84406B" "84029G" ...
## $ Description: chr "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ InvoiceDate: chr "12/1/10 8:26" "12/1/10 8:26" "12/1/10 8:26" "12/1/10 8:26" ...
## $ UnitPrice : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : chr "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
Part B
✢ Converting Invoice Date to Date Class
UCData$InvoiceDate<-as.Date(UCData$InvoiceDate,"%m/%d/%y")
✢ Result Head()
head(UCData)
## InvoiceNo StockCode Description Quantity InvoiceDate
## 1 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01
## 2 536365 71053 WHITE METAL LANTERN 6 2010-12-01
## 3 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01
## 4 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01
## 5 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01
## 6 536365 22752 SET 7 BABUSHKA NESTING BOXES 2 2010-12-01
## UnitPrice CustomerID Country
## 1 2.55 17850 United Kingdom
## 2 3.39 17850 United Kingdom
## 3 2.75 17850 United Kingdom
## 4 3.39 17850 United Kingdom
## 5 3.39 17850 United Kingdom
## 6 7.65 17850 United Kingdom
✢ Subsetting Data from 2011/07 to 2011/08
Filtered <- subset(UCData,InvoiceDate>='2011-07-01' & InvoiceDate<'2011-09-01')
✢ Result Head()
head(Filtered)
## InvoiceNo StockCode Description Quantity
## 245904 558638 84836 ZINC METAL HEART DECORATION 12
## 245905 558638 71459 HANGING JAM JAR T-LIGHT HOLDER 24
## 245906 558638 22784 LANTERN CREAM GAZEBO 3
## 245907 558638 23145 ZINC T-LIGHT HOLDER STAR LARGE 12
## 245908 558638 22674 FRENCH TOILET SIGN BLUE METAL 12
## 245909 558638 21174 POTTERING IN THE SHED METAL SIGN 12
## InvoiceDate UnitPrice CustomerID Country
## 245904 2011-07-01 1.25 16317 United Kingdom
## 245905 2011-07-01 0.85 16317 United Kingdom
## 245906 2011-07-01 4.95 16317 United Kingdom
## 245907 2011-07-01 0.95 16317 United Kingdom
## 245908 2011-07-01 1.25 16317 United Kingdom
## 245909 2011-07-01 2.08 16317 United Kingdom
✢ Unique rows of InvoiceNo
cat("Unique Rows of InvoiceNo: ",length(unique(Filtered$InvoiceNo)))
## Unique Rows of InvoiceNo: 3664
Part C
1) Quantity Mean & Unit Price Mean
lenrow<- nrow(Filtered)
total<- 0
pricetotal<- 0
for(i in 1:lenrow)
{
total<-total+Filtered[i,"Quantity"]
pricetotal<-pricetotal+Filtered[i,"UnitPrice"]
}
qmean <- total/lenrow
pmean <- pricetotal/lenrow
cat("Quantity Mean : ",qmean)
## Quantity Mean : 10.65901
cat("Unit Price Mean: ",pmean)
## Unit Price Mean: 4.308608
2) Types of each column
for(i in names(Filtered))
{
cat(i,"type : ",class(Filtered[,i]),"\n")
}
## InvoiceNo type : character
## StockCode type : character
## Description type : character
## Quantity type : integer
## InvoiceDate type : Date
## UnitPrice type : numeric
## CustomerID type : integer
## Country type : character
3) Number of unique values each column
for(i in names(Filtered))
{
cat("Unique Values",i," :",length(unique(Filtered[,i])),"\n")
}
## Unique Values InvoiceNo : 3664
## Unique Values StockCode : 2982
## Unique Values Description : 2953
## Unique Values Quantity : 287
## Unique Values InvoiceDate : 52
## Unique Values UnitPrice : 447
## Unique Values CustomerID : 1541
## Unique Values Country : 28
Part D
✢ Subset the data for UK,Netherlands,Australia
countryfiltered <- subset(Filtered,Country=="United Kingdom"|Country=="Australia"|Country=="Netherlands")
4) Report the average and standard deviation of Unit Price
(Rounded to 3 Decimal Places)
cat("Average Unit Price : ",round(mean(countryfiltered$UnitPrice),digits = 3))
## Average Unit Price : 4.344
cat("Standard Deviation Unit Price : ",round(sd(countryfiltered$UnitPrice),digits = 3))
## Standard Deviation Unit Price : 98.961
5) The number of unique transactions
cat("Number of unique transactions :",length(unique(countryfiltered$InvoiceNo)))
## Number of unique transactions : 3332
6) The number of customers residing in these countries who made
transactions in July and August of 2011 (CustomerID “NA” is included as
1)
cat("Number of Customers :",length(unique(countryfiltered$CustomerID)))
## Number of Customers : 1380
Part E
7) Number of customer who made refunds exclude the NA using date
subset
customer<-subset(Filtered,substr(Filtered$InvoiceNo,1,1)=='C')
customer<-na.omit(customer)
cat("Number of customer who made refunds exclude NA :",length(unique(customer$CustomerID)))
## Number of customer who made refunds exclude NA : 381
✢ Assign ID to cust_refund
cust_refund = unique(customer[["CustomerID"]])
Part F
✢ Creating Variable Sales using data from date subset (Quantity
x UnitPrice)
unknown<-filter(Filtered,is.na(CustomerID))
sales<-unknown$Quantity*unknown$UnitPrice
8) Total Sales for those without CustomerID
cat("Total Sales for those without CustomerID:",sum(sales))
## Total Sales for those without CustomerID: 173374.1
9) Number of Transaction without CustomerID
cat("Number of Transaction without CustomerID:",length(unique(unknown$InvoiceNo)))
## Number of Transaction without CustomerID: 527