attach(mtcars)
# With the above line of code, I have attached the data set mtcars - that is - the column names are automatically created as variables.
# ------ BOXPLOT ------
# BElow we create 3 boxplots, building them up with more adn more arguments.
boxplot(wt,mpg)
#Lets try adn rotate the plots1
boxplot(wt,mpg, horizontal = TRUE)
# Add more to the parameters of the plot
boxplot(wt, mpg, horizontal = TRUE,names = c("Weight", "Miles Per Gal."), boxwex = .4, notch = FALSE)
## There is a crucial error in the plots above however, the data is not proper for the plot. ## We are using the boxplot to compare weight of the car (per 1000 pbs) and the miles per gallon. These data sets are wildly different and therefore dont really show us anything. ## We need to use data that is comparative. Usually this takes one variable, and split it up into multiple groups
# A good exaple of a box plot, as the data is organized ahead of time.
boxplot(mpg ~ cyl, xlab = "Number of Cylinders", ylab = "Miles Per Gallon", notch = FALSE, main = "Good Ex. of Boxplot", horizontal = TRUE)
# Notice the ~
# Usually, the ~ refers to "the relationship between". That is Split up the mpg into cyl factors.
# Remember to use "?" whenever you are clueless about a function
# For example, use ?plot if you need to learn more about to funiton and the arguments it takes
# Histograms are another form of data visualization
h = hist(mpg)
# Then we can add more arguments to the plot
hist(mpg, main = "Title", xlab = "Miles Per Gallon", breaks = 12)
## Line Plot
# We use the plot function
plot(mpg, wt)
plot(mpg, wt, main = "Main Title", xlab = "X axis", ylab = "Y Axis", type = 'p', col = 'blue')
# Adding multiple lines onto the same plots.
# I will create a sequence of points. from -pi to pi in .1 steps
X = seq(-pi, pi, .1)
plot(X, sin(X), main = "Main Title", xlab = "X Label", ylab = "Y Label", type = 'l', col = 'blue')
lines(X,cos(X), col = 'red')
# I used lines() to create a new line with the two x-y inputs
# Add a legend
legend('topleft', c("sin(x)", "cos(x)"), fill = c('blue', 'red'))
# Subplot
par(mfrow = c(2,2))
boxplot(mpg, main = "Box Plot")
pie(wt, radius = 1, main = "Pie Chart")
plot(X, sin(X), main = "Line PLot", col = 'red')
lines(X, cos(X), col = 'blue')
legend("topright",c("sin(x)", "cos(x)"), fill = c('blue', 'red'))
plot(wt, hp, main = "Another Line Plot")
# lets go back to linear regression from last week and break it down.
# I want to look at the relationship between
plot(wt, hp, main = "Main Title")
LinearRelationship = lm(hp ~ wt) # This line is responsible for the creation of the linear relationship between wt and hp
abline(LinearRelationship, col = 'red')
#### The coding line that created the linear model is, naturally, lm()
# Here is what is actualy created when you choose lm(Y-variable ~ X-variable)
LinearRelationship
##
## Call:
## lm(formula = hp ~ wt)
##
## Coefficients:
## (Intercept) wt
## -1.821 46.160
b = coef(LinearRelationship)[[1]] # We use this notation to extact just the number from the list
m = coef(LinearRelationship)[[2]]
#y = mx + b
MyFunction = function(x){m*(x) + b}
MyFunction(6) # If I had a 6000 lb car, what would be the horsepower?
## [1] 275.1394
summary(LinearRelationship)
##
## Call:
## lm(formula = hp ~ wt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.430 -33.596 -13.587 7.913 172.030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.821 32.325 -0.056 0.955
## wt 46.160 9.625 4.796 4.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 52.44 on 30 degrees of freedom
## Multiple R-squared: 0.4339, Adjusted R-squared: 0.4151
## F-statistic: 23 on 1 and 30 DF, p-value: 4.146e-05