TakeHomeAssignment1

Question 1:

hours <- c(2.5, 3, 0, 4, 1.5, 5, 0)
hours

## [1] 2.5 3.0 0.0 4.0 1.5 5.0 0.0

total_hours<-sum(hours)
total_hours

## [1] 16

avg_hours<-mean(hours)
avg_hours

## [1] 2.285714

diff <- hours - avg_hours
Student_Score <- data.frame(
  student_names = c("Mon","Tue","Wed","Thurs","Fri","Sat","Sun"),
  week = rep(1,7),
  hours_studied = hours,
  difference = diff
)

print(Student_Score)

##   student_names week hours_studied difference
## 1           Mon    1           2.5  0.2142857
## 2           Tue    1           3.0  0.7142857
## 3           Wed    1           0.0 -2.2857143
## 4         Thurs    1           4.0  1.7142857
## 5           Fri    1           1.5 -0.7857143
## 6           Sat    1           5.0  2.7142857
## 7           Sun    1           0.0 -2.2857143

Question 2

expenses <- matrix (
c(500 , 520 , 510 , 530 ,
300 , 290 , 310 , 305 ,
200 , 220 , 210 , 215) ,
nrow = 3 ,
byrow = TRUE
)
expenses

##      [,1] [,2] [,3] [,4]
## [1,]  500  520  510  530
## [2,]  300  290  310  305
## [3,]  200  220  210  215

total_per_category = rowSums(expenses)
total_per_category

## [1] 2060 1205  845

avg_per_month = colMeans(expenses)
avg_per_month

## [1] 333.3333 343.3333 343.3333 350.0000

highest_total_index = which.max(total_per_category)
category_names <- c("Category1", "Category2", "Category3")
highest_category <- category_names[highest_total_index]

print(highest_category)

## [1] "Category1"

Question 3

usage <- c(18 , 25 , 12 , 30 , 20)
household_size <- c(2 , 4 , 3 , 5 , 2)

df <- data.frame(usage=usage, size=household_size)
print(df)

##   usage size
## 1    18    2
## 2    25    4
## 3    12    3
## 4    30    5
## 5    20    2

df$usage_per_person<-df$usage/df$size
print(df$usage_per_person)

## [1]  9.00  6.25  4.00  6.00 10.00

avg_per_person <- mean(df$usage_per_person)
above_avg_households <- df[df$usage_per_person > avg_per_person, ]
print(above_avg_households)

##   usage size usage_per_person
## 1    18    2                9
## 5    20    2               10

Question 4

classes_held <- c(40, 40, 40, 40)
classes_attended <- c(38, 30, 35, 28)

attendance_df <- data.frame(held = classes_held, attended = classes_attended)
attendance_df

##   held attended
## 1   40       38
## 2   40       30
## 3   40       35
## 4   40       28

attendance_df$percentage <- (attendance_df$attended / attendance_df$held) * 100
attendance_df

##   held attended percentage
## 1   40       38       95.0
## 2   40       30       75.0
## 3   40       35       87.5
## 4   40       28       70.0

low_attendance <- attendance_df[attendance_df$percentage < 75, ]
low_attendance

##   held attended percentage
## 4   40       28         70

Question 5

rainfall <- c(0, 3, 12, 7, 0, 22, 15)

rainfall_level <- function(x){
  if (x<5) return("Light")
  else if (x<=20) return("Moderate")
  else return("Heavy")
}

rain_df = data.frame(
  mm = rainfall,
  status = sapply(rainfall, rainfall_level)
)

print(rain_df)

##   mm   status
## 1  0    Light
## 2  3    Light
## 3 12 Moderate
## 4  7 Moderate
## 5  0    Light
## 6 22    Heavy
## 7 15 Moderate

Question 6

travel_cost <- function(distance){
  base_price = 15
  
  if (distance <= 30){
    total <- base_price + (distance*1.50)
  }else{
    extra_dist <- distance - 30
    total <- base_price + (30*1.50) + (extra_dist*1.00)
    }
  return(total)
}

distances <- c(10,25,30,45,60)
final_costs <- sapply(distances, travel_cost)
results<-data.frame(Distance_inKM = distances, Total_Cost_INR = final_costs)
print(results)

##   Distance_inKM Total_Cost_INR
## 1            10           30.0
## 2            25           52.5
## 3            30           60.0
## 4            45           75.0
## 5            60           90.0

Question 7

data("USArrests")
USArrests$violent_crime <- USArrests$Murder + USArrests$Assault + USArrests$Rape
head(USArrests)

##            Murder Assault UrbanPop Rape violent_crime
## Alabama      13.2     236       58 21.2         270.4
## Alaska       10.0     263       48 44.5         317.5
## Arizona       8.1     294       80 31.0         333.1
## Arkansas      8.8     190       50 19.5         218.3
## California    9.0     276       91 40.6         325.6
## Colorado      7.9     204       78 38.7         250.6

total_murders <- sum(USArrests$Murder)
USArrests$murder_share <- round((USArrests$Murder/total_murders)*100,2)

classification <- c()

for (i in 1:nrow(USArrests)){
  share <- USArrests$murder_share[i]
  
  if (share>3){
    classification[i] <- "High"
  }else if (share>=2){
    classification[i] <- "Medium"
  }
  else {
    classification[i] <- "Low"
  }
}
USArrests$share_category <- classification

classify_share <- function(data_vector, low, high){
  categories <- ifelse(data_vector > high, "High",
                ifelse(data_vector >= low, "Medium", "Low"))
  return(categories)
}
data("LifeCycleSavings")
LifeCycleSavings$savings_level <- classify_share(LifeCycleSavings$sr, 10, 15)
head(LifeCycleSavings)

##              sr pop15 pop75     dpi ddpi savings_level
## Australia 11.43 29.35  2.87 2329.68 2.87        Medium
## Austria   12.07 23.32  4.41 1507.99 3.93        Medium
## Belgium   13.17 23.80  4.43 2108.47 3.82        Medium
## Bolivia    5.75 41.89  1.67  189.13 0.22           Low
## Brazil    12.88 42.19  0.83  728.47 4.56        Medium
## Canada     8.79 31.72  2.85 2982.88 2.43           Low

aggregate(violent_crime ~ share_category, data = USArrests, FUN = mean)

##   share_category violent_crime
## 1           High      295.2182
## 2            Low      130.3296
## 3         Medium      268.5583

plot(USArrests$UrbanPop, USArrests$violent_crime,
     main = "Urbanization vs Violent Crime",
     xlab = "Urbanization Population (%)", ylab = "Violent Crime Score",
     pch = 21, col = "red")

TakeHomeAssignment1

Anirvin

2026-01-28

Question 1:

Question 2

Question 3

Question 4

Question 5

Question 6

Question 7