Mode Function
my_mode<-function(x){
unique_x<-unique(x)
tabulate_x<-tabulate(match(x,unique_x))
unique_x[tabulate_x==max(tabulate_x)]
}
my_mode(c(10,20,30,40,50,20,2222222,13,342))
[1] 20
Correlation coefficient Function
cor.coeff<-function(x,y){
avg_x<-mean(x)
avg_y<-mean(y)
nom<-sum((x-avg_x)*(y-avg_y))
ssx<-sum((x-avg_x)^2)
ssy<-sum((y-avg_y)^2)
denom<-sqrt(ssx*ssy)
r<-nom/denom
r
}
cor.coeff(c(1,2,3,4,5,6,7,8,9), c(10,20,30,40,50,20,2222222,13,342))
[1] 0.273945
Custom (Range of a vector) Function
own.range <- function(x){
n<-length(x)
for (i in 1:n){
for (j in 1:(n-1)){
if (x[j] > x[j+1]){
temp <- x[j]
x[j] <- x[j+1]
x[j+1] <- temp
}
}
}
range <- x[n]-x[1]
range
}
own.range(c(10,20,30,40,50,20,2222222,13,342))
[1] 2222212
Suppose a box contains 10 identical items and the probability of
having a defective item is 0.2.Then
i)Prob. of having no defective item ii)Find the number of defective
items in 1000 such boxes iii)Estimate the probability of containing
exactly 1 defective item iv)Estimate the probability of containing at
most 1 defective item
# Given
n <- 10 # items per box
p <- 0.2 # probability of defect
# i) Probability of no defective item
prob_0 <- dbinom(0, size = n, prob = p)
prob_0
[1] 0.1073742
# ii) Expected number of defective items in 1000 boxes
expected_defects <- 1000 * (n * p)
expected_defects
[1] 2000
# iii) Probability of exactly 1 defective item
prob_1 <- dbinom(1, size = n, prob = p)
prob_1
[1] 0.2684355
# iv) Probability of at most 1 defective item
prob_at_most_1 <- pbinom(1, size = n, prob = p)
prob_at_most_1
[1] 0.3758096
Plotting PMF and CDF of X~binomial(5,0.6)
x<-0:5
y<-dbinom(x,5,0.6)
z<-pbinom(x,5,0.6)
plot(x,y,type="h",xlab="x",ylab="f(x)",
main="PMF of X")

plot(x,z,type="s",xlab="x",ylab="F(x)",
main="CDF of X")

Log Likelihood of Bernoulli Distribution
LL.Ber<-function(x,p){
sum(x)*log(p)+(length(x)-sum(x))*log(1-p)
}
sample <- rbinom(10, size = 1, prob = 0.3)
LL.Ber(x = sample, p)
[1] -6.390319
Sum function
sum_value<-function(x){
n<-length(x)
s<-0
for(i in 1:n){
s<-s+x[i]
}
s
}
sum_value(c(10,20,30,40,50,20,2222222,13,342))
[1] 2222747
Factorial function
own.factorial<-function(n){
x<-1:n
L<-length(x)
prod<-1
for(i in 1:L){
prod<-prod*x[i]
}
prod
}
own.factorial(5)
[1] 120
Write a function that finds absolute value of a vector
bs_value<-function(x){
ifelse(x<0,-x,x)}
abs_value(c(10,20,30,40,50,20,-2222222,13,-342))
[1] 10 20 30 40 50
[6] 20 2222222 13 342
Ascending(sort) function
asc.sort <- function(x){
n<-length(x)
for (i in 1:n){
for (j in 1:(n-1)){
if (x[j] > x[j+1]){
temp <- x[j]
x[j] <- x[j+1]
x[j+1] <- temp
}
}
}
x
}
asc.sort(c(10,20,30,40,50,20,-2222222,13,-342))
[1] -2222222 -342 10 13
[5] 20 20 30 40
[9] 50
Descending(sort)function
des.sort <- function(x){
n<-length(x)
for (i in 1:n){
for (j in 1:(n-1)){
if (x[j] < x[j+1]){
temp <- x[j]
x[j] <- x[j+1]
x[j+1] <- temp
}
}
}
x
}
des.sort(c(10,20,30,40,50,20,-2222222,13,-342))
[1] 50 40 30 20
[5] 20 13 10 -342
[9] -2222222
Write a function to find maximum value of a function
max_value<-function(x){
n<-length(x)
for(i in 2:n){
if(x[1]<x[i]){
x[1]<-x[i]
}
}
x[1]
}
max_value(c(10,20,30,40,50,20,-2222222,13,-342))
[1] 50
Write a function that counts length without missing value
non.na.count<-function(x){
i=1
count=0
n<- length(x)
while (i<=n){
if (!is.na(x[i])){
count<-count+1
}
i=i+1
}
count
}
non.na.count(c(NA,20,30,40,50,NA,20,-2222222,13,-342))
[1] 8
Let X1,…,Xn be a sequence of n random integers between 1 and 10 and
let Yn=ΣXi.Find n so that max(Yn)<100
max100<- function(x){
set.seed(209)
sum=0
i=1
n=length(x)
while (sum<100){
sum<-sum+x[i]
i<-i+1
}
cat("n =",i,"\n")
cat("Sum =",sum)
}
max100(sample(1:10, 100, replace=TRUE))
n = 19
Sum = 100
Create a user-defined function to compute the mean of numeric
variables only and apply it to the birthwt dataset.
data.mean<-function(x){
if(is.numeric(x)==T){
mean(x)}
else{
NA
}
}
library(MASS)
head(birthwt)
attach(birthwt)
birthwt$low<-as.factor(birthwt$low)
birthwt$race<-as.factor(birthwt$race)
birthwt$smoke<-as.factor(birthwt$smoke)
birthwt$ui<-as.factor(birthwt$ui)
birthwt$ht<-as.factor(birthwt$ht)
sapply(birthwt,data.mean)
low age lwt
NA 23.2380952 129.8148148
race smoke ptl
NA NA 0.1957672
ht ui ftv
NA NA 0.7936508
bwt
2944.5873016
INC_2024-1
Part-1
"(a)"
[1] "(a)"
x <- seq(3, 6, by = 0.1)
v <- exp(x) * cos(x^2)
sum_v <- sum(v)
sum_v
[1] -339.2342
"(b)"
[1] "(b)"
# y<-rnorm(15, 0, 1)
n <- length(y)
avg_y <- mean(y)
for (i in 1:n){
v <- sqrt(abs(y - avg_y))
}
print(v)
[1] 1.1653621 0.8584013 0.9961283 0.9553444
[5] 0.5351151 1.0487096 1.1013675 0.5778597
[9] 0.4415788 1.2865604 0.6413056 0.7041377
[13] 1.0721042 0.8403968 0.4679993
avg_v <- mean(v)
print(avg_v)
[1] 0.8461581
"(c)"
[1] "(c)"
# v <- c(5.2, 7.0, 9.1, 3.8, 7.5, 5.9, 6.5)
picked <- v[v < 6]
indices <- which(v > 6 & v < 8)
print(picked)
[1] 1.1653621 0.8584013 0.9961283 0.9553444
[5] 0.5351151 1.0487096 1.1013675 0.5778597
[9] 0.4415788 1.2865604 0.6413056 0.7041377
[13] 1.0721042 0.8403968 0.4679993
print(indices)
integer(0)
"(d)"
[1] "(d)"
int.v <- round(v)
sum(int.v %% 3 == 0)
[1] 2
"(e)"
[1] "(e)"
my.means <- function(x){
am <- mean(x)
gm <- exp(mean(log(x[x>0]))) # GM requires positive values
hm <- length(x) / sum(1/x[x!=0])
c(AM = am, GM = gm, HM = hm)
}
"(f)"
[1] "(f)"
my.means(v)
AM GM HM
0.8461581 0.8023689 0.7569923
Part-2
# (a)
set.seed(75)
x <- sample(1:100, 60, replace = TRUE)
M <- matrix(x, nrow = 6, ncol = 10, byrow = TRUE)
rowtotals <- rowSums(M)
rowtotals
[1] 565 623 392 506 539 542
# (b)
count.over <- function(vec, k) {
sum(vec > k)
}
# (c)
apply(M, MARGIN = 1, count.over, k = 4) # 1 means by row, 2 means by column
[1] 10 10 10 10 9 9
Part-3
# -----------------------------
# Simulate sample (given)
# -----------------------------
"(a)"
[1] "(a)"
set.seed(123)
Y <- rbinom(2000, size = 15, prob = 0.25)
# (i) Estimated P(2 < Y < 10)
est_prob <- mean(Y > 2 & Y < 10)
est_prob
[1] 0.7665
# (ii)
quantile(Y, 0.75)
75%
5
"(b)"
[1] "(b)"
# (i) Actual P(2 < Y < 10)
actual_prob <- pbinom(9, 15, 0.25) - pbinom(2, 15, 0.25)
actual_prob
[1] 0.7631172
# (ii) Actual Q3 from the exact distribution
actual_Q3 <- qbinom(0.75, 15, 0.25)
actual_Q3
[1] 5
INC_2023-1
Part 1
- Create a vector x that stores the sequence {1.1, 1.5, 1.9, …,
111.1}
x <- seq(1.1,111.1, by=0.4)
- Create a vector y where y=logx
y <- log(x)
- Find the number of observations in y
n <- length(y)
n
[1] 276
- Create vector y12 with every 12th element
y12 <- y[seq(12, length(y), by = 12)]
- Display the positions (indices) of the elements in vector Y that lie
between 0.5 and 2.
indices <- which((y > 0.5) & (y < 2))
print(indices)
[1] 3 4 5 6 7 8 9 10 11 12 13 14 15
[14] 16
- Sum of elements in y that are below 3
sum_below_3 <- sum(y[y < 3])
sum_below_3
[1] 103.0407
Part 2 (regression context)
'(a)'
[1] "(a)"
# Set seed for reproducibility
set.seed(123)
# Generate data
n <- 1000
# X ~ N(0, 4) (variance = 4, so sd = 2)
X <- rnorm(n, mean = 0, sd = sqrt(4))
# ε ~ N(10, 2) (variance = 2, so sd = sqrt(2))
# Note: This error term with mean 10 seems unusual for regression
e <- rnorm(n, mean = 0, sd = sqrt(2))
# Generate Y = 0.5 + 0.8*X + e
Y <- 0.5 + 0.8 * X + e
# Store the values of X as x and Y as y, as requested
x <- X
y <- Y
'(b)'
[1] "(b)"
# 1. Create matrix D:
# First column: constant 1 (1000 rows)
# Second column: vector x from part (a)
D <- matrix(c(rep(1, n), x), ncol = 2)
# 2. Create column matrix M using vector y from part (a)
M <- matrix(y, ncol = 1)
'(C)'
[1] "(C)"
# 1. Calculate b = (D^T D)^{-1} D^T M
D_transpose <- t(D)
D_transpose_D <- D_transpose %*% D
D_transpose_M <- D_transpose %*% M
# Calculate inverse and b
D_transpose_D_inv <- solve(D_transpose_D)
b <- D_transpose_D_inv %*% D_transpose_M
print(b)
[,1]
[1,] 0.5580467
[2,] 0.8622588
# 2. True parameter vector B from part (a)
B <- c(0.5, 0.8)
print(B)
[1] 0.5 0.8
# 3. Calculate absolute differences |b - B|
abs_difference <- abs(b-B)
cat("Absolute differences:\n")
Absolute differences:
print(abs_difference)
[,1]
[1,] 0.05804673
[2,] 0.06225884
---
title: "2Dec"
output: html_notebook
---
### Median Function
```{r}
med_val <- function(x){
  n <- length(x)
  y <- sort(x)
  if (n%%2==1){
    med <- y[(n+1)/2]
  }
  else{
    med <- mean(c(y[n/2], y[(n/2)+1]))
  }
  med
}
```

```{r}
med_val(c(10,20,30,40,50,20,2222222,13,342))
```
### Mode Function
```{r}
my_mode<-function(x){
  unique_x<-unique(x)
  tabulate_x<-tabulate(match(x,unique_x))
  unique_x[tabulate_x==max(tabulate_x)]
}
my_mode(c(10,20,30,40,50,20,2222222,13,342))
```
### Correlation coefficient Function
```{r}
cor.coeff<-function(x,y){
  avg_x<-mean(x)
  avg_y<-mean(y)
  nom<-sum((x-avg_x)*(y-avg_y))
  ssx<-sum((x-avg_x)^2)
  ssy<-sum((y-avg_y)^2)
  denom<-sqrt(ssx*ssy)
  r<-nom/denom
  r
}

cor.coeff(c(1,2,3,4,5,6,7,8,9), c(10,20,30,40,50,20,2222222,13,342))
```
### Custom (Range of a vector) Function
```{r}
own.range <- function(x){
  n<-length(x)
  for (i in 1:n){
    for (j in 1:(n-1)){
      if (x[j] > x[j+1]){
        temp <- x[j]
        x[j] <- x[j+1]
        x[j+1] <- temp
      }
    }
  }
  range <- x[n]-x[1]
  range
}

own.range(c(10,20,30,40,50,20,2222222,13,342))
```
Suppose a box contains 10 identical
items and the probability of having a
defective item is 0.2.Then

i)Prob. of having no defective item
ii)Find the number of defective items
in 1000 such boxes
iii)Estimate the probability of
containing exactly 1 defective item
iv)Estimate the probability of
containing at most 1 defective item
```{r}
# Given
n <- 10        # items per box
p <- 0.2       # probability of defect

# i) Probability of no defective item
prob_0 <- dbinom(0, size = n, prob = p)
prob_0

# ii) Expected number of defective items in 1000 boxes
expected_defects <- 1000 * (n * p)
expected_defects

# iii) Probability of exactly 1 defective item
prob_1 <- dbinom(1, size = n, prob = p)
prob_1

# iv) Probability of at most 1 defective item
prob_at_most_1 <- pbinom(1, size = n, prob = p)
prob_at_most_1

```
### Plotting PMF and CDF of X~binomial(5,0.6)
```{r}
x<-0:5
y<-dbinom(x,5,0.6)
z<-pbinom(x,5,0.6)
plot(x,y,type="h",xlab="x",ylab="f(x)",
     main="PMF of X")
plot(x,z,type="s",xlab="x",ylab="F(x)",
     main="CDF of X")
```
### Log Likelihood of Bernoulli Distribution
```{r}
LL.Ber<-function(x,p){
  sum(x)*log(p)+(length(x)-sum(x))*log(1-p)
}

sample <- rbinom(10, size = 1, prob = 0.3)
LL.Ber(x = sample, p)
```
### Sum function
```{r}
sum_value<-function(x){
  n<-length(x)
  s<-0
  for(i in 1:n){
    s<-s+x[i]
  }
  s
}

sum_value(c(10,20,30,40,50,20,2222222,13,342))
```
### Factorial function
```{r}
own.factorial<-function(n){
x<-1:n
L<-length(x)
prod<-1
for(i in 1:L){
prod<-prod*x[i]
}
prod
}

own.factorial(5)
```
### Write a function that finds absolute value of a vector
```{r}
bs_value<-function(x){
  ifelse(x<0,-x,x)}
abs_value(c(10,20,30,40,50,20,-2222222,13,-342))
```
### Ascending(sort) function
```{r}
asc.sort <- function(x){
  n<-length(x)
  for (i in 1:n){
    for (j in 1:(n-1)){
      if (x[j] > x[j+1]){
        temp <- x[j]
        x[j] <- x[j+1]
        x[j+1] <- temp
      }
    }
  }
  x
}

asc.sort(c(10,20,30,40,50,20,-2222222,13,-342))
```
### Descending(sort)function
```{r}
des.sort <- function(x){
  n<-length(x)
  for (i in 1:n){
    for (j in 1:(n-1)){
      if (x[j] < x[j+1]){
        temp <- x[j]
        x[j] <- x[j+1]
        x[j+1] <- temp
      }
    }
  }
  x
}

des.sort(c(10,20,30,40,50,20,-2222222,13,-342))
```
### Write a function to find maximum value of a function
```{r}
max_value<-function(x){
  n<-length(x)
  for(i in 2:n){
    if(x[1]<x[i]){
      x[1]<-x[i]
    }
  }
  x[1]
}

max_value(c(10,20,30,40,50,20,-2222222,13,-342))
```
### Write a function that counts length without missing value
```{r}
non.na.count<-function(x){
  i=1
  count=0
  n<- length(x)
  while (i<=n){
    if (!is.na(x[i])){
      count<-count+1
    }
    i=i+1
  }
  count 
}

non.na.count(c(NA,20,30,40,50,NA,20,-2222222,13,-342))
```
Let X1,...,Xn be a sequence of n random
integers between 1 and 10 and let
Yn=ΣXi.Find n so that max(Yn)<100
```{r}
max100<- function(x){
  set.seed(209)
  sum=0
  i=1
  n=length(x)
  while (sum<100){
    sum<-sum+x[i]
    i<-i+1
  }
  cat("n =",i,"\n")
  cat("Sum =",sum)
}

max100(sample(1:10, 100, replace=TRUE))
```
Create a user-defined function to compute the mean of numeric variables only and apply it to the birthwt dataset.
```{r}
data.mean<-function(x){
  if(is.numeric(x)==T){
    mean(x)}
  else{
    NA
  }
}
library(MASS)
head(birthwt)
attach(birthwt)
birthwt$low<-as.factor(birthwt$low)
birthwt$race<-as.factor(birthwt$race)
birthwt$smoke<-as.factor(birthwt$smoke)
birthwt$ui<-as.factor(birthwt$ui)
birthwt$ht<-as.factor(birthwt$ht)
sapply(birthwt,data.mean)

```
# INC_2024-1

### Part-1
```{r}
"(a)"
x <- seq(3, 6, by = 0.1)
v <- exp(x) * cos(x^2)
sum_v <- sum(v)
sum_v

"(b)"
# y<-rnorm(15, 0, 1)
n <- length(y)
avg_y <- mean(y)
for (i in 1:n){
  v <- sqrt(abs(y - avg_y))
}
print(v)
avg_v <- mean(v)
print(avg_v)


"(c)"
# v <- c(5.2, 7.0, 9.1, 3.8, 7.5, 5.9, 6.5)
picked <- v[v < 6]
indices <- which(v > 6 & v < 8)
print(picked)
print(indices)

"(d)"
int.v <- round(v)
sum(int.v %% 3 == 0)

"(e)"
my.means <- function(x){
  am <- mean(x)
  gm <- exp(mean(log(x[x>0])))   # GM requires positive values
  hm <- length(x) / sum(1/x[x!=0])
  c(AM = am, GM = gm, HM = hm)
}

"(f)"
my.means(v)
```
### Part-2
```{r}
# (a)
set.seed(75)
x <- sample(1:100, 60, replace = TRUE)
M <- matrix(x, nrow = 6, ncol = 10, byrow = TRUE)
rowtotals <- rowSums(M)
rowtotals

# (b)
count.over <- function(vec, k) {
  sum(vec > k)
}

# (c)
apply(M, MARGIN = 1, count.over, k = 4) # 1 means by row, 2 means by column
```
### Part-3
```{r}
# -----------------------------
# Simulate sample (given)
# -----------------------------
"(a)"
set.seed(123)
Y <- rbinom(2000, size = 15, prob = 0.25)

# (i) Estimated P(2 < Y < 10)
est_prob <- mean(Y > 2 & Y < 10)
est_prob
# (ii)
quantile(Y, 0.75)

"(b)"

# (i) Actual P(2 < Y < 10)
actual_prob <- pbinom(9, 15, 0.25) - pbinom(2, 15, 0.25)
actual_prob

# (ii) Actual Q3 from the exact distribution
actual_Q3 <- qbinom(0.75, 15, 0.25)
actual_Q3

```
# INC_2023-1
### Part 1
(a) Create a vector x that stores the sequence {1.1, 1.5, 1.9, ..., 111.1}
```{r}
x <- seq(1.1,111.1, by=0.4)
```
(b) Create a vector y where y=logx
```{r}
y <- log(x)

```
(c) Find the number of observations in y
```{r}
n <- length(y)
n
```
(d) Create vector y12 with every 12th element
```{r}
y12 <- y[seq(12, length(y), by = 12)]
```
(e) Display the positions (indices) of the elements in vector Y that lie between 0.5 and 2.
```{r}
indices <- which((y > 0.5) & (y < 2))
print(indices)
```
(f) Sum of elements in y that are below 3
```{r}
sum_below_3 <- sum(y[y < 3])
sum_below_3
```
### Part 2 (regression context)
```{r}
'(a)'
# Set seed for reproducibility
set.seed(123)

# Generate data
n <- 1000

# X ~ N(0, 4) (variance = 4, so sd = 2)
X <- rnorm(n, mean = 0, sd = sqrt(4))

# ε ~ N(10, 2) (variance = 2, so sd = sqrt(2))
# Note: This error term with mean 10 seems unusual for regression
e <- rnorm(n, mean = 0, sd = sqrt(2))

# Generate Y = 0.5 + 0.8*X + e
Y <- 0.5 + 0.8 * X + e

# Store the values of X as x and Y as y, as requested
x <- X
y <- Y
```
```{r}
'(b)'
# 1. Create matrix D:
# First column: constant 1 (1000 rows)
# Second column: vector x from part (a)
D <- matrix(c(rep(1, n), x), ncol = 2)

# 2. Create column matrix M using vector y from part (a)
M <- matrix(y, ncol = 1)
```
```{r}
'(C)'
# 1. Calculate b = (D^T D)^{-1} D^T M
D_transpose <- t(D)
D_transpose_D <- D_transpose %*% D
D_transpose_M <- D_transpose %*% M

# Calculate inverse and b
D_transpose_D_inv <- solve(D_transpose_D)
b <- D_transpose_D_inv %*% D_transpose_M
print(b)
# 2. True parameter vector B from part (a)
B <- c(0.5, 0.8)
print(B)

# 3. Calculate absolute differences |b - B|
abs_difference <- abs(b-B)

cat("Absolute differences:\n")
print(abs_difference)
```





