Task 1: Evaluate the sum of integers between 5 and 55

x <- c(5:55); sum (x)
## [1] 1530

Task 2a: Write a function called sumfun with one input parameter n, that calculates the sum of integers between 5 and n. b: Calculate n=10, n=20, n=100

sumfun <- function(n) {sum(5:n)}
sumfun(10)
## [1] 45
sumfun(20)
## [1] 200
sumfun(100)
## [1] 5040

Task 3:Write and Rscript for a loop to calculate and print out the first 12 entries of the Fabonacci Series

fib <- function(n) {  # function for calculating fibonnaci of a vector with n values
  fib <-numeric(n)
  fib[1] <- 1 # fib of the first element is 1
  fib[2] <- 1 # fib of the second element is 2
  i = 3 # start the loop at the third element
  
  while (i <= n) { # repeat the loop for the nth  element of fib
  fib[i]=fib[i - 1] + fib[i - 2]
  i<- i + 1
 }
 return(fib) # return the modified vector
}

fib(12)
##  [1]   1   1   2   3   5   8  13  21  34  55  89 144

Task 4:With the mtcars dataset bundled with R, use ggplot to generate a box plot of miles per gallon (in the variable mpg) as a function of the number of gears (in the variable gear). Use the fill aesthetic to colour bars by number of gears. (5pt)

library(ggplot2)
ggplot(data = mtcars, aes(x= as.factor(gear), y= mpg)) + geom_boxplot(aes(fill= as.factor(gear) )) + ggtitle(" Box plot of Miles per gallon(mpg) as a function of gears")

Task 5: Using the cars dataset and the function lm, fit a linear relationship between speed and breaking distance in the variable distance. What are the fitted slope and intercept of the line, and their standard errors? What are the units used for the variables in the dataset?

y <- cars $dist; x <- cars $speed; 
model <- lm(formula = "y ~ x")
summary(model)
## 
## Call:
## lm(formula = "y ~ x")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## x             3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
# Answers
# Fitted slope=3.9324
# Intercept = -17.579
# Standard errors ( 6.7584,0.4155)
# Speed units = milesperhour breaking distance = feet

Task 6. Use ggplot to plot the data points from Task 6 and the linear fit

library(ggplot2) 
#Assign the ggplot function to the variable ggplot_01
ggplot_1.0 <- ggplot(data = cars, aes(x= speed, y=dist)) + geom_point() + geom_smooth(method = "lm",formula = "y ~ x")
# Create a new variable called ggplplot_1.1 and add to it a title, x,y labels.
ggplot_1.1 <- ggplot_1.0 + ggtitle ("Linear model of the relationship between breaking distance(dist) and speed")+ xlab("speed(milesperhour)")+ ylab("dist(feet)")
ggplot_1.1

Task 7.Again using the cars dataset, now use linear regression (lm) to estimate the average reaction time for the driver to start breaking (in seconds). To simplify matters you may assume that once breaking commences, breaking distance is proportional to the square of the speed. Explain the steps in your analysis. Do you get reasonable results? Finally, use ggplot to plot the data points and the fitted relationship.

# Assign the variable "dist_m" to breaking distance, which is the distance in miles    
    dist_m <- cars$dist* 0.000189 # converting distance to miles by multiplying by the value of one foot in miles
    dist_m 
##  [1] 0.000378 0.001890 0.000756 0.004158 0.003024 0.001890 0.003402 0.004914
##  [9] 0.006426 0.003213 0.005292 0.002646 0.003780 0.004536 0.005292 0.004914
## [17] 0.006426 0.006426 0.008694 0.004914 0.006804 0.011340 0.015120 0.003780
## [25] 0.004914 0.010206 0.006048 0.007560 0.006048 0.007560 0.009450 0.007938
## [33] 0.010584 0.014364 0.015876 0.006804 0.008694 0.012852 0.006048 0.009072
## [41] 0.009828 0.010584 0.012096 0.012474 0.010206 0.013230 0.017388 0.017577
## [49] 0.022680 0.016065
# Create a variable "speed_m_h" for speed in miles per hour
    speed_m_h <- cars $speed^2  #Since breaking distance is proportional to the square of speed, square the value of 
    speed_m_h 
##  [1]  16  16  49  49  64  81 100 100 100 121 121 144 144 144 144 169 169 169 169
## [20] 196 196 196 196 225 225 225 256 256 289 289 289 324 324 324 324 361 361 361
## [39] 400 400 400 400 400 484 529 576 576 576 576 625
  lm(formula = dist_m ~ speed_m_h) #creating the linear model 
## 
## Call:
## lm(formula = dist_m ~ speed_m_h)
## 
## Coefficients:
## (Intercept)    speed_m_h  
##   1.675e-03    2.438e-05
    #From the model,slope is equal to half the average reaction time, if speed and distance are constant
    reaction_time <- 2.438e-05*2 # Reaction time is therefore equal to two times the value of the slope reaction_time
    #Converting reaction time in hours to seconds
    coverted_reaction_time <- reaction_time/3600
    coverted_reaction_time  # Answer: I dont get resonable results, I get a negative value for reaction time
## [1] 1.354444e-08
    library(ggplot2)
    plot_1 <- ggplot(data = cars, aes(speed_m_h, dist_m))+geom_point() 
    plot_1

    plot_2 <- plot_1 + geom_smooth(method = "lm", formula = dist_m ~ speed_m_h)
    plot_2
## Warning: 'newdata' had 80 rows but variables found have 50 rows
## Warning: Computation failed in `stat_smooth()`:
## arguments imply differing number of rows: 80, 50

    plot_3 <- plot_2 + ggtitle("Regression model between breaking distance and speed")
    plot_3 
## Warning: 'newdata' had 80 rows but variables found have 50 rows

## Warning: Computation failed in `stat_smooth()`:
## arguments imply differing number of rows: 80, 50