Assighnment_3_McCabe

The goal of this assignment is to perform exploratory data analysis, create data visualizations, and write report in R Markdown. I will document my steps, add code blocks, prepare plots and then write up a simple conclusion based on the analysis.

Step 1: The first step includes setting your path to file to set the working directory. This allows for the data to be pulled from the correct folder on your computer, and to prepare your data for how you will be using it in R. Here we will be setting the date to a standard date format, and removing any data that does not include dates.

See the code below in the gray area.

setwd("C:/Users/PLU5638/Desktop/Business Analytics/") #Use your path to file to set the working directory
stocksdata<-read.csv("Stock.data")
str(stocksdata)
## 'data.frame':    961 obs. of  11 variables:
##  $ Date : Factor w/ 961 levels "01APR91","01AUG88",..: 101 131 163 196 230 325 360 392 425 458 ...
##  $ CompA: num  17.2 17.9 18.4 18.7 17.4 ...
##  $ CompB: num  50.5 51.4 50.9 51.5 49 ...
##  $ CompC: num  18.8 19.6 19.9 20 20 ...
##  $ CompD: num  43 44 43.9 44 41.4 ...
##  $ CompE: num  60.9 62 61.9 62.6 59.8 ...
##  $ CompF: num  26.4 26.1 27.2 27.9 25.9 ...
##  $ CompG: num  67.8 68.1 68.5 69.4 63.2 ...
##  $ CompH: num  19 19.1 18.2 18.4 16.5 ...
##  $ CompI: num  48.8 48.8 49 49.6 47.5 ...
##  $ CompJ: num  34.9 35.6 36.4 36.2 35.5 ...
stocksdata$Date<-as.Date(stocksdata$Date, "%d%b%y")

stocksdata<-as.data.frame(stocksdata)
length(stocksdata$CompA)
## [1] 961
dim(stocksdata)
## [1] 961  11
summary(stocksdata)
##       Date                CompA           CompB           CompC      
##  Min.   :1988-01-04   Min.   :17.22   Min.   :19.25   Min.   :12.75  
##  1st Qu.:1988-12-13   1st Qu.:27.78   1st Qu.:35.44   1st Qu.:16.12  
##  Median :1989-11-24   Median :38.92   Median :49.00   Median :19.38  
##  Mean   :1989-11-25   Mean   :37.94   Mean   :43.96   Mean   :18.71  
##  3rd Qu.:1990-11-06   3rd Qu.:46.88   3rd Qu.:53.25   3rd Qu.:20.88  
##  Max.   :1991-10-18   Max.   :61.50   Max.   :60.25   Max.   :25.12  
##                       NA's   :10      NA's   :10      NA's   :10     
##      CompD           CompE           CompF           CompG      
##  Min.   :34.38   Min.   :27.75   Min.   :14.12   Min.   :58.00  
##  1st Qu.:41.38   1st Qu.:49.66   1st Qu.:18.00   1st Qu.:65.62  
##  Median :44.00   Median :61.75   Median :25.75   Median :68.62  
##  Mean   :45.36   Mean   :60.86   Mean   :24.12   Mean   :70.67  
##  3rd Qu.:48.19   3rd Qu.:71.84   3rd Qu.:28.88   3rd Qu.:76.38  
##  Max.   :60.12   Max.   :94.12   Max.   :35.25   Max.   :87.25  
##  NA's   :10      NA's   :11      NA's   :11      NA's   :11     
##      CompH           CompI           CompJ      
##  Min.   :16.38   Min.   :31.50   Min.   :34.00  
##  1st Qu.:21.25   1st Qu.:41.75   1st Qu.:41.38  
##  Median :22.62   Median :44.75   Median :46.69  
##  Mean   :23.48   Mean   :44.21   Mean   :46.99  
##  3rd Qu.:26.38   3rd Qu.:47.62   3rd Qu.:52.12  
##  Max.   :40.12   Max.   :53.00   Max.   :62.00  
##                  NA's   :11      NA's   :11
head(stocksdata)
##         Date  CompA  CompB  CompC  CompD  CompE  CompF  CompG  CompH  CompI
## 1 1988-01-04 17.219 50.500 18.750 43.000 60.875 26.375 67.750 19.000 48.750
## 2 1988-01-05 17.891 51.375 19.625 44.000 62.000 26.125 68.125 19.125 48.750
## 3 1988-01-06 18.438 50.875 19.875 43.875 61.875 27.250 68.500 18.250 49.000
## 4 1988-01-07 18.672 51.500 20.000 44.000 62.625 27.875 69.375 18.375 49.625
## 5 1988-01-08 17.438 49.000 20.000 41.375 59.750 25.875 63.250 16.500 47.500
## 6 1988-01-11 18.109 49.000 19.500 41.875 59.625 26.625 66.250 17.125 47.750
##    CompJ
## 1 34.875
## 2 35.625
## 3 36.375
## 4 36.250
## 5 35.500
## 6 34.375
class(stocksdata$Date)
## [1] "Date"
sum(is.na(stocksdata))
## [1] 95
stocksdata_clean<-na.omit(stocksdata)
head(stocksdata_clean) #Please make sure you see the dates in the Date column
##         Date  CompA  CompB  CompC  CompD  CompE  CompF  CompG  CompH  CompI
## 1 1988-01-04 17.219 50.500 18.750 43.000 60.875 26.375 67.750 19.000 48.750
## 2 1988-01-05 17.891 51.375 19.625 44.000 62.000 26.125 68.125 19.125 48.750
## 3 1988-01-06 18.438 50.875 19.875 43.875 61.875 27.250 68.500 18.250 49.000
## 4 1988-01-07 18.672 51.500 20.000 44.000 62.625 27.875 69.375 18.375 49.625
## 5 1988-01-08 17.438 49.000 20.000 41.375 59.750 25.875 63.250 16.500 47.500
## 6 1988-01-11 18.109 49.000 19.500 41.875 59.625 26.625 66.250 17.125 47.750
##    CompJ
## 1 34.875
## 2 35.625
## 3 36.375
## 4 36.250
## 5 35.500
## 6 34.375

Create line plots for each company

Step C part 1:

Here we created line plots for each company individually stacking multiple plots in a chart. We looked at the Date and Stock Prices of the data and you can compare the companies lines side by side.

See the code below in the gray area.

#multiple plots in a chart
par(mfrow=c(2,5))
plot(stocksdata_clean$Date,stocksdata_clean$CompA,type="l",main="CompA",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompB,type="l",main="CompB",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompC,type="l",main="CompC",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompD,type="l",main="CompD",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompE,type="l",main="CompE",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompF,type="l",main="CompF",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompG,type="l",main="CompG",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompH,type="l",main="CompH",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompI,type="l",main="CompI",xlab="Date",ylab="Stock Prices")
plot(stocksdata_clean$Date,stocksdata_clean$CompJ,type="l",main="CompJ",xlab="Date",ylab="Stock Prices")

Create line plots for each company

Step C part 2:

Plot all 10 time series on a single chart (i.e. 10 lines each with a different color and a legend. First we used the par()command to return to one plot per chart, and then we plotted the first company with the data, line type, and set the x and y axis and assigned a color to the line. After this we use the lines() to set multiple lines and plot them in a single chart.

#multiple lines plotted in a chart
par(mfrow=c(1,1))
plot(stocksdata_clean$Date,stocksdata_clean$CompA, type="l",lwd = 2, xlab="Date",ylab="Stock Prices",col="red",ylim=c(0, 120))
lines(stocksdata_clean$Date,stocksdata_clean$CompB, type="l",lwd = 2,col="green")
lines(stocksdata_clean$Date,stocksdata_clean$CompC, type="l",lwd = 2,col="yellow")
lines(stocksdata_clean$Date,stocksdata_clean$CompD, type="l",lwd = 2,col="blue")
lines(stocksdata_clean$Date,stocksdata_clean$CompE, type="l",lwd = 2,col="aquamarine")
lines(stocksdata_clean$Date,stocksdata_clean$CompF, type="l",lwd = 2,col="orange")
lines(stocksdata_clean$Date,stocksdata_clean$CompG, type="l",lwd = 2,col="purple")
lines(stocksdata_clean$Date,stocksdata_clean$CompH, type="l",lwd = 2,col="pink")
lines(stocksdata_clean$Date,stocksdata_clean$CompI, type="l",lwd = 2,col="cyan")
lines(stocksdata_clean$Date,stocksdata_clean$CompJ, type="l",lwd = 2,col="darkblue")
legend("topright",legend=c("CompA","CompB","CompC","CompD","CompE","CompF","CompG","CompH","CompI","CompJ"),col=c("red","green","yellow","blue","aquamarine","orange","purple","pink","cyan","darkblue"),ncol=2,lwd = 2,lty=1,cex=0.75) 

Summarize the data for each company

Step D:

Here we will be finding a mean of the stocks and create a variable called mean_stocks. This will allow us to create a bar chart from this data with the x-axis being the companies, and the y-axis being the stock prices. This will allow us to see the average stock prices each company sells for and compare the companies side by side. With this information you can determine which companies stocks sell for a high price and which ones sell for relatively low prices. Companies C, F, and H all have very low stock prices, and the companies G and E have very high stock prices. With this information you could create groupings of low, medium, and high stock prices.

#mean of stocks
mean_stocks<-sapply(stocksdata_clean[c("CompA","CompB","CompC","CompD","CompE","CompF","CompG","CompH","CompI","CompJ")], mean)

mean_stocks
##    CompA    CompB    CompC    CompD    CompE    CompF    CompG    CompH 
## 37.92559 43.95553 18.70447 45.35289 60.86355 24.12263 70.67342 23.29421 
##    CompI    CompJ 
## 44.21421 46.99408
barplot(mean_stocks,col=c("red","green","yellow","blue","aquamarine","orange","purple","pink","cyan","darkblue"), xlab = "Companies", ylab = "Stock Price ($)", main = "Average Stock Price")