Load Data into R

Problem 1:
away <- read.csv("https://raw.githubusercontent.com/ntlrs/awayproject/master/CSV",header = T, sep="\t" )
names(away) <- c("Publisher ID",    "Transaction ID",   "Commission %", "Link Type",    "Sale Amount","Total Commission",   "Affiliate Commission", "Vendor Commission",    "Click Date","Sale Date","Status")
head(away)
##   Publisher ID Transaction ID Commission % Link Type Sale Amount
## 1            2       72368298         0.06   Generic    $275.00 
## 2            2       72333671         0.06   Generic    $275.00 
## 3            2       72334498         0.06   Generic    $245.00 
## 4            5       72334896         0.09   Generic    $225.00 
## 5            2       72337920         0.06   Generic    $470.00 
## 6            2       72348691         0.06   Generic    $450.00 
##   Total Commission Affiliate Commission Vendor Commission    Click Date
## 1          $23.38               $16.50             $6.88   6/16/17 9:21
## 2          $23.38               $16.50             $6.88   6/16/17 9:21
## 3          $20.83               $14.70             $6.13  6/21/17 20:25
## 4          $25.88               $20.25             $5.63  6/11/17 10:55
## 5          $39.95               $28.20            $11.75   7/1/17 13:07
## 6          $38.25               $27.00            $11.25   6/5/17 12:47
##      Sale Date Status
## 1  7/1/17 6:54   Paid
## 2  7/1/17 9:46   Paid
## 3 7/1/17 10:31   Paid
## 4 7/1/17 10:52   Paid
## 5 7/1/17 13:22   Paid
## 6 7/1/17 19:58   Paid
Define Transaction ID
x <- 74398726
Use x to find Sale Date
answer1 <- away$`Sale Date`[away$`Transaction ID` == x]
answer1
## [1] 9/4/17 15:47
## 2949 Levels: 10/1/17 11:14 10/1/17 12:20 10/1/17 13:34 ... 9/9/17 8:17
Problem 2
away$`Total Commission` = as.numeric(gsub("\\$", "", away$`Total Commission`)) #Change the data type to numeric.

answer2 <- head(sort(away$`Total Commission`, decreasing=TRUE), 3) #sort commission in decsending order, find top 3 commissions.
y <- answer2[3] #Isolate the 3rd highest commission.

answer2 <- away$`Transaction ID`[away$`Total Commission` == y] #find the associated 'Transaction ID' 

print(paste0("Transaction ID ", answer2))
## [1] "Transaction ID 79483698"
Problem 3
pub2 <- subset(away, away$`Publisher ID` == 2) #subset data to include only the entries with published ID equalling '2'.
w <- sd(pub2$`Total Commission`) #find the standard deviation for the total commission.
w <- w*2 #multiply the SD by 2
w <- (mean(away$`Total Commission`)+w) #add the mean to 2*SD
w
## [1] 51.28017
count(pub2$`Total Commission` >= w) #count the number of instances where the total commission is larger than 2*SD.
##       x freq
## 1 FALSE  611
## 2  TRUE   41