R Markdown

read sp500 close csv, overview date using summary

display mean and median

SP500_close <- read.csv('https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/DAAG/SP500close.csv'
)
summary(SP500_close)
##        X                x         
##  Min.   :   1.0   Min.   : 295.5  
##  1st Qu.: 695.8   1st Qu.: 418.8  
##  Median :1390.5   Median : 548.5  
##  Mean   :1390.5   Mean   : 723.8  
##  3rd Qu.:2085.2   3rd Qu.:1035.7  
##  Max.   :2780.0   Max.   :1527.5
print(paste("Index mean is", mean(SP500_close[[1]]) ,sep=" "))
## [1] "Index mean is 1390.5"
print(paste("Index median is", median(SP500_close[[1]]) ,sep=" "))
## [1] "Index median is 1390.5"
print(paste("Stock mean is", mean(SP500_close[[2]]) ,sep=" "))
## [1] "Stock mean is 723.789068345326"
print(paste("Stock median is", median(SP500_close[[2]]) ,sep=" "))
## [1] "Stock median is 548.485000000003"

Create subset

stock_subset <- SP500_close[1:10,1:2]
stock_subset
##     X      x
## 1   1 358.76
## 2   2 355.67
## 3   3 352.20
## 4   4 353.79
## 5   5 349.62
## 6   6 347.31
## 7   7 348.53
## 8   8 339.93
## 9   9 337.00
## 10 10 340.75

rename column

names(stock_subset) <- c('Index','S')
names(stock_subset)
## [1] "Index" "S"

Use summary to overview sub set and compare mean and median

summary(stock_subset)
##      Index             S        
##  Min.   : 1.00   Min.   :337.0  
##  1st Qu.: 3.25   1st Qu.:342.4  
##  Median : 5.50   Median :349.1  
##  Mean   : 5.50   Mean   :348.4  
##  3rd Qu.: 7.75   3rd Qu.:353.4  
##  Max.   :10.00   Max.   :358.8
print(paste("Index mean for subset is", mean(stock_subset$Index) ,sep=" "))
## [1] "Index mean for subset is 5.5"
print(paste("Index median for subset is", median(stock_subset$Index) ,sep=" "))
## [1] "Index median for subset is 5.5"
print(paste("Stock mean for subset is", mean(stock_subset$S) ,sep=" "))
## [1] "Stock mean for subset is 348.356"
print(paste("Stock median for subset is", median(stock_subset$S) ,sep=" "))
## [1] "Stock median for subset is 349.075"
if (mean(stock_subset$Index) > mean(SP500_close[[1]]))
{
 print(" subset mean is bigger") 
}else
{
 print(" orignal mean is bigger")  
}
## [1] " orignal mean is bigger"
if (median(stock_subset$S) > median(SP500_close[[2]]))
{
 print(" subset median is bigger") 
}else
{
 print(" orignal median is bigger")  
}
## [1] " orignal median is bigger"

add a rank column for value editing

stock_subset$rank <- sapply(stock_subset$S , function(x) if (x> 350) 'h' else if (x>340) 'm' else 'l' )
stock_subset
##    Index      S rank
## 1      1 358.76    h
## 2      2 355.67    h
## 3      3 352.20    h
## 4      4 353.79    h
## 5      5 349.62    m
## 6      6 347.31    m
## 7      7 348.53    m
## 8      8 339.93    l
## 9      9 337.00    l
## 10    10 340.75    m

change atlest three value

stock_subset$rank[stock_subset$rank== 'l'] <- 'Low'
stock_subset$rank[stock_subset$rank== 'm'] <- 'Medium'
stock_subset$rank[stock_subset$rank== 'h'] <- 'High'

Display result

stock_subset
##    Index      S   rank
## 1      1 358.76   High
## 2      2 355.67   High
## 3      3 352.20   High
## 4      4 353.79   High
## 5      5 349.62 Medium
## 6      6 347.31 Medium
## 7      7 348.53 Medium
## 8      8 339.93    Low
## 9      9 337.00    Low
## 10    10 340.75 Medium

read csv from github

data <- read.csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/DAAG/SP500close.csv")
head(data)
##   X      x
## 1 1 358.76
## 2 2 355.67
## 3 3 352.20
## 4 4 353.79
## 5 5 349.62
## 6 6 347.31