Task 1: Evaluate the sum of integers between 5 and 55
x <- c(5:55); sum (x)
## [1] 1530
Task 2a: Write a function called sumfun with one input parameter n, that calculates the sum of integers between 5 and n. b: Calculate n=10, n=20, n=100
sumfun <- function(n) {sum(5:n)}
sumfun(10)
## [1] 45
sumfun(20)
## [1] 200
sumfun(100)
## [1] 5040
Task 3:Write and Rscript for a loop to calculate and print out the first 12 entries of the Fabonacci Series
fib <- function(n) { # function for calculating fibonnaci of a vector with n values
fib <-numeric(n)
fib[1] <- 1 # fib of the first element is 1
fib[2] <- 1 # fib of the second element is 2
i = 3 # start the loop at the third element
while (i <= n) { # repeat the loop for the nth element of fib
fib[i]=fib[i - 1] + fib[i - 2]
i<- i + 1
}
return(fib) # return the modified vector
}
fib(12)
## [1] 1 1 2 3 5 8 13 21 34 55 89 144
Task 4:With the mtcars dataset bundled with R, use ggplot to generate a box plot of miles per gallon (in the variable mpg) as a function of the number of gears (in the variable gear). Use the fill aesthetic to colour bars by number of gears. (5pt)
library(ggplot2)
ggplot(data = mtcars, aes(x= as.factor(gear), y= mpg)) + geom_boxplot(aes(fill= as.factor(gear) )) + ggtitle(" Box plot of Miles per gallon(mpg) as a function of gears")
Task 5: Using the cars dataset and the function lm, fit a linear relationship between speed and breaking distance in the variable distance. What are the fitted slope and intercept of the line, and their standard errors? What are the units used for the variables in the dataset?
y <- cars $dist; x <- cars $speed;
model <- lm(formula = "y ~ x")
summary(model)
##
## Call:
## lm(formula = "y ~ x")
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## x 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
# Answers
# Fitted slope=3.9324
# Intercept = -17.579
# Standard errors ( 6.7584,0.4155)
# Speed units = milesperhour breaking distance = feet
Task 6. Use ggplot to plot the data points from Task 6 and the linear fit
library(ggplot2)
#Assign the ggplot function to the variable ggplot_01
ggplot_1.0 <- ggplot(data = cars, aes(x= speed, y=dist)) + geom_point() + geom_smooth(method = "lm",formula = "y ~ x")
# Create a new variable called ggplplot_1.1 and add to it a title, x,y labels.
ggplot_1.1 <- ggplot_1.0 + ggtitle ("Linear model of the relationship between breaking distance(dist) and speed")+ xlab("speed(milesperhour)")+ ylab("dist(feet)")
ggplot_1.1
Task 7.Again using the cars dataset, now use linear regression (lm) to estimate the average reaction time for the driver to start breaking (in seconds). To simplify matters you may assume that once breaking commences, breaking distance is proportional to the square of the speed. Explain the steps in your analysis. Do you get reasonable results? Finally, use ggplot to plot the data points and the fitted relationship.
# Assign the variable "dist_m" to breaking distance, which is the distance in miles
dist_m <- cars$dist* 0.000189 # converting distance to miles by multiplying by the value of one foot in miles
dist_m
## [1] 0.000378 0.001890 0.000756 0.004158 0.003024 0.001890 0.003402 0.004914
## [9] 0.006426 0.003213 0.005292 0.002646 0.003780 0.004536 0.005292 0.004914
## [17] 0.006426 0.006426 0.008694 0.004914 0.006804 0.011340 0.015120 0.003780
## [25] 0.004914 0.010206 0.006048 0.007560 0.006048 0.007560 0.009450 0.007938
## [33] 0.010584 0.014364 0.015876 0.006804 0.008694 0.012852 0.006048 0.009072
## [41] 0.009828 0.010584 0.012096 0.012474 0.010206 0.013230 0.017388 0.017577
## [49] 0.022680 0.016065
# Create a variable "speed_m_h" for speed in miles per hour
speed_m_h <- cars $speed^2 #Since breaking distance is proportional to the square of speed, square the value of
speed_m_h
## [1] 16 16 49 49 64 81 100 100 100 121 121 144 144 144 144 169 169 169 169
## [20] 196 196 196 196 225 225 225 256 256 289 289 289 324 324 324 324 361 361 361
## [39] 400 400 400 400 400 484 529 576 576 576 576 625
lm(formula = dist_m ~ speed_m_h) #creating the linear model
##
## Call:
## lm(formula = dist_m ~ speed_m_h)
##
## Coefficients:
## (Intercept) speed_m_h
## 1.675e-03 2.438e-05
#From the model,slope is equal to half the average reaction time, if speed and distance are constant
reaction_time <- 2.438e-05*2 # Reaction time is therefore equal to two times the value of the slope reaction_time
#Converting reaction time in hours to seconds
coverted_reaction_time <- reaction_time/3600
coverted_reaction_time # Answer: I dont get resonable results, I get a negative value for reaction time
## [1] 1.354444e-08
library(ggplot2)
plot_1 <- ggplot(data = cars, aes(speed_m_h, dist_m))+geom_point()
plot_1
plot_2 <- plot_1 + geom_smooth(method = "lm", formula = dist_m ~ speed_m_h)
plot_2
## Warning: 'newdata' had 80 rows but variables found have 50 rows
## Warning: Computation failed in `stat_smooth()`:
## arguments imply differing number of rows: 80, 50
plot_3 <- plot_2 + ggtitle("Regression model between breaking distance and speed")
plot_3
## Warning: 'newdata' had 80 rows but variables found have 50 rows
## Warning: Computation failed in `stat_smooth()`:
## arguments imply differing number of rows: 80, 50