Section A

Exericse A.1

### Your code here:
2+2
## [1] 4
## [1] 4

##install.packages("ggplot2")
##install.packages()
library(ggplot2)
?"ggplot2"
## starting httpd help server ... done

Exericse A.2

### Your code here:
typeof(1.333)
## [1] "double"
## double

typeof(2)
## [1] "double"
## double

typeof("5.654")
## [1] "character"
## character 

typeof(TRUE)
## [1] "logical"
## logical 

typeof("Green is a pretty color")
## [1] "character"
## character 

typeof(as.integer(4.5))
## [1] "integer"
## integer 

typeof(FALSE)
## [1] "logical"
## logical

## "5.654"+"5.654"
## printed error

as.double("5.654")+ as.double("5.654")
## [1] 11.308
## 11.308

as.integer("5.654")+ as.integer("5.654")
## [1] 10
## 10

## as.double("TestTestTest")
## Warning message: NAs introduced by coercion, This warning message is expected because the characters in quotation marks cannot be converted into numbers. 

Exericse A.3

### Your code here:
a <- 2.5
b <- 3.0

a+b
## [1] 5.5
## reassign values
a <- 10.0

a*b
## [1] 30

Exericse A.4

### Your code here:
v1 <- c(1,2,3,4,5)
v2 <- c(-1,-2,-3,-4,-5)

## adding the two vectors together
v1+v2
## [1] 0 0 0 0 0
## the result 0 0 0 0 0 is what I expected because the two vectors have the same dimension, and the corresponding values sum to 0. 

v3 <- seq(from=1, to=5, by=+1)
v3
## [1] 1 2 3 4 5
s1 <- seq(from=-10, to=100,by=10)

c('a','b','c','d','e')[2]
## [1] "b"
## 'b'

c('a','b','c','d','e')[2:4]
## [1] "b" "c" "d"
## 'b' 'c' 'd'

p <- c(1,2,3,4,5)
p^2
## [1]  1  4  9 16 25
## 1 4 9 16 25
sqrt(p)
## [1] 1.000000 1.414214 1.732051 2.000000 2.236068
## 1.000000 1.414214 1.732051 2.000000 2.236068
exp(p)
## [1]   2.718282   7.389056  20.085537  54.598150 148.413159
## 2.718282   7.389056  20.085537  54.598150 148.413159
?exp
sum(p)
## [1] 15
## 15
mean(p)
## [1] 3
## 3

Exericse A.5

### Your code here:
data <- seq(from=1, to=10,by=+1)
data
##  [1]  1  2  3  4  5  6  7  8  9 10
?matrix
d1 <- matrix(data=data,nrow=2,ncol=5)
d1
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10
d2 <- matrix(data=data,nrow=5,ncol=2)

d1%*%d2
##      [,1] [,2]
## [1,]   95  220
## [2,]  110  260
## 95  220
## 110 260

d2%*%d1
##      [,1] [,2] [,3] [,4] [,5]
## [1,]   13   27   41   55   69
## [2,]   16   34   52   70   88
## [3,]   19   41   63   85  107
## [4,]   22   48   74  100  126
## [5,]   25   55   85  115  145
## 5*5 matrix

## the resulting matrix products are different because d1%*%d2 gives a 2*2 matrix and d2%*%d1 gives a 5*5 matrix

?data.frame()

d3 <- data.frame("col1"=data[1:5],"col2"=data[6:10])

colors_data <- c("green","blue","red","purple","yellow")

cbind(d3,colors_data)
## successfully add the colors_data as the third column

d3$colors <- colors_data
d3
## i got to decide what the new column name is called 

##d3[colors] <- colors_data
## error message

d3["colors"] <- colors_data
## need to put a quotation mark around the column name

col2_data <-d3$col2
col2_data
## [1]  6  7  8  9 10
sum(col2_data)
## [1] 40
## 40 
mean(col2_data)
## [1] 8
## 8

Exericse A.6

### Your code here:

## setwd("C:\OneDrive\桌面\Rice University\Spring 2025 -- In Progress\STAT 410\Lab 1")
vapor_df <- read.table(file="Vapor.txt", header=TRUE)
head(vapor_df)
vapor_df$Temp
##  [1] 273 283 293 303 313 323 333 343 353 363 373
## [1] 273 283 293 303 313 323 333 343 353 363 373
vapor_df$vp
##  [1]   4.6   9.2  17.5  31.8  55.3  92.5 149.4 233.7 355.1 525.8 760.0
## [1]   4.6   9.2  17.5  31.8  55.3  92.5 149.4 233.7 355.1 525.8 760.0

##setwd("C:\OneDrive\桌面\Rice University\Spring 2025 -- In Progress\STAT 410\Lab 1")
athlete_events_df <- read.csv(file="athlete_events.csv",header=TRUE)

View(athlete_events_df)
head(athlete_events_df)

Exericse A.7

### Your code here:
my_func <- function(x){
  return(x**2-5*x+2)
}

my_func(2)
## [1] -4
## -4

my_nums <- seq(from=0, to=10, by=+1)
my_nums
##  [1]  0  1  2  3  4  5  6  7  8  9 10
my_mean_func <- function(nums){
  return(sum(nums)/length(nums))
}

my_mean_func(my_nums)
## [1] 5
## 5 

mean(my_nums)
## [1] 5
## 5

## the results are the same because my function is defined correctly

Exericse A.8

### Your code here:
##setwd("C:\OneDrive\桌面\Rice University\Spring 2025 -- In Progress\STAT 410\Lab 1")
vapor_df <- read.table(file="Vapor.txt", header=TRUE, col.names=c("TEMP","VP"))

plot(vapor_df$TEMP,vapor_df$VP,type='b',xlab="Temperature",ylab="Vapor Pressure",col="blue",pch=7)

Section B

Exericse B.1.a

### Your code here:
##setwd("C:\OneDrive\桌面\Rice University\Spring 2025 -- In Progress\STAT 410\Lab 1")
vball <- read.csv(file="fallingballmars.csv", header=TRUE)

head(vball)
## the speedometer records "speed_mps"

plot(vball$time,vball$speed_mps,xlab="Time",ylab="Speed (m/s)")

## the data seems to show a strong negative linear relationship/correlation

Exericse B.1.b

### Your code here:

para_estimate <- function(x,y){
  y_mean <- mean(y)
  x_mean <- mean(x)
  g_mars <- sum(y*(x-x_mean))/sum((x-x_mean)**2)
  v_t0 <- y_mean - g_mars*x_mean
  var <- sum((y-v_t0-g_mars*x)**2)/length(y)
  para <- c(v_t0, g_mars,var)
  return(para)
}

para <- para_estimate(vball$time,vball$speed_mps)
## v_t0 =15.224757
## g_mars = -3.750958
## variance = 10.029875

## the number seems reasonable 
## according to google results, g_mars is 3.73m/s^2, so very reasonable 
## the gravitation force on earth is 9.8

plot(vball$time,vball$speed_mps,xlab="Time",ylab="Speed (m/s)")
abline(a=para[1], b= para[2])

x <- para[1]/-para[2]
x
## [1] 4.058899
## expected time when speed = 0, expected time would be 4.05889s

Exericse B.1.c

### Your code here:

pred_speed <- para[1]+para[2]*vball$time

resi_error <- vball$speed_mps - pred_speed

plot(pred_speed,resi_error,xlab="Predicted Speed",ylab="Residuals")
abline(a=0,b=0)

## the graph doesn't have a distinct pattern or shape because it is not sqaured

sum(resi_error)
## [1] 3.608225e-14
## sum is 3.608225e-14

## theoretically, the value of sum of residuals should be 0

qqnorm(resi_error)
qqline(resi_error,col="blue")

calc_sig2 <- function(x,y){
  y_mean <- mean(y)
  x_mean <- mean(x)
  g_mars <- sum(y*(x-x_mean))/sum((x-x_mean)**2)
  v_t0 <- y_mean - g_mars*x_mean
  var <- sum((y-v_t0-g_mars*x)**2)/length(y)
  return(var)
}

calc_sig2(vball$time,vball$speed_mps)
## [1] 10.02987
## 10.02987