Load Data into R
Problem 1:
away <- read.csv("https://raw.githubusercontent.com/ntlrs/awayproject/master/CSV",header = T, sep="\t" )
names(away) <- c("Publisher ID", "Transaction ID", "Commission %", "Link Type", "Sale Amount","Total Commission", "Affiliate Commission", "Vendor Commission", "Click Date","Sale Date","Status")
head(away)
## Publisher ID Transaction ID Commission % Link Type Sale Amount
## 1 2 72368298 0.06 Generic $275.00
## 2 2 72333671 0.06 Generic $275.00
## 3 2 72334498 0.06 Generic $245.00
## 4 5 72334896 0.09 Generic $225.00
## 5 2 72337920 0.06 Generic $470.00
## 6 2 72348691 0.06 Generic $450.00
## Total Commission Affiliate Commission Vendor Commission Click Date
## 1 $23.38 $16.50 $6.88 6/16/17 9:21
## 2 $23.38 $16.50 $6.88 6/16/17 9:21
## 3 $20.83 $14.70 $6.13 6/21/17 20:25
## 4 $25.88 $20.25 $5.63 6/11/17 10:55
## 5 $39.95 $28.20 $11.75 7/1/17 13:07
## 6 $38.25 $27.00 $11.25 6/5/17 12:47
## Sale Date Status
## 1 7/1/17 6:54 Paid
## 2 7/1/17 9:46 Paid
## 3 7/1/17 10:31 Paid
## 4 7/1/17 10:52 Paid
## 5 7/1/17 13:22 Paid
## 6 7/1/17 19:58 Paid
Define Transaction ID
x <- 74398726
Use x to find Sale Date
answer1 <- away$`Sale Date`[away$`Transaction ID` == x]
answer1
## [1] 9/4/17 15:47
## 2949 Levels: 10/1/17 11:14 10/1/17 12:20 10/1/17 13:34 ... 9/9/17 8:17
Problem 2
away$`Total Commission` = as.numeric(gsub("\\$", "", away$`Total Commission`)) #Change the data type to numeric.
answer2 <- head(sort(away$`Total Commission`, decreasing=TRUE), 3) #sort commission in decsending order, find top 3 commissions.
y <- answer2[3] #Isolate the 3rd highest commission.
answer2 <- away$`Transaction ID`[away$`Total Commission` == y] #find the associated 'Transaction ID'
print(paste0("Transaction ID ", answer2))
## [1] "Transaction ID 79483698"
Problem 3
pub2 <- subset(away, away$`Publisher ID` == 2) #subset data to include only the entries with published ID equalling '2'.
w <- sd(pub2$`Total Commission`) #find the standard deviation for the total commission.
w <- w*2 #multiply the SD by 2
w <- (mean(away$`Total Commission`)+w) #add the mean to 2*SD
w
## [1] 51.28017
count(pub2$`Total Commission` >= w) #count the number of instances where the total commission is larger than 2*SD.
## x freq
## 1 FALSE 611
## 2 TRUE 41