Question 1:
hours <- c(2.5, 3, 0, 4, 1.5, 5, 0)
hours
## [1] 2.5 3.0 0.0 4.0 1.5 5.0 0.0
total_hours<-sum(hours)
total_hours
## [1] 16
avg_hours<-mean(hours)
avg_hours
## [1] 2.285714
diff <- hours - avg_hours
Student_Score <- data.frame(
student_names = c("Mon","Tue","Wed","Thurs","Fri","Sat","Sun"),
week = rep(1,7),
hours_studied = hours,
difference = diff
)
print(Student_Score)
## student_names week hours_studied difference
## 1 Mon 1 2.5 0.2142857
## 2 Tue 1 3.0 0.7142857
## 3 Wed 1 0.0 -2.2857143
## 4 Thurs 1 4.0 1.7142857
## 5 Fri 1 1.5 -0.7857143
## 6 Sat 1 5.0 2.7142857
## 7 Sun 1 0.0 -2.2857143
Question 2
expenses <- matrix (
c(500 , 520 , 510 , 530 ,
300 , 290 , 310 , 305 ,
200 , 220 , 210 , 215) ,
nrow = 3 ,
byrow = TRUE
)
expenses
## [,1] [,2] [,3] [,4]
## [1,] 500 520 510 530
## [2,] 300 290 310 305
## [3,] 200 220 210 215
total_per_category = rowSums(expenses)
total_per_category
## [1] 2060 1205 845
avg_per_month = colMeans(expenses)
avg_per_month
## [1] 333.3333 343.3333 343.3333 350.0000
highest_total_index = which.max(total_per_category)
category_names <- c("Category1", "Category2", "Category3")
highest_category <- category_names[highest_total_index]
print(highest_category)
## [1] "Category1"
Question 3
usage <- c(18 , 25 , 12 , 30 , 20)
household_size <- c(2 , 4 , 3 , 5 , 2)
df <- data.frame(usage=usage, size=household_size)
print(df)
## usage size
## 1 18 2
## 2 25 4
## 3 12 3
## 4 30 5
## 5 20 2
df$usage_per_person<-df$usage/df$size
print(df$usage_per_person)
## [1] 9.00 6.25 4.00 6.00 10.00
avg_per_person <- mean(df$usage_per_person)
above_avg_households <- df[df$usage_per_person > avg_per_person, ]
print(above_avg_households)
## usage size usage_per_person
## 1 18 2 9
## 5 20 2 10
Question 4
classes_held <- c(40, 40, 40, 40)
classes_attended <- c(38, 30, 35, 28)
attendance_df <- data.frame(held = classes_held, attended = classes_attended)
attendance_df
## held attended
## 1 40 38
## 2 40 30
## 3 40 35
## 4 40 28
attendance_df$percentage <- (attendance_df$attended / attendance_df$held) * 100
attendance_df
## held attended percentage
## 1 40 38 95.0
## 2 40 30 75.0
## 3 40 35 87.5
## 4 40 28 70.0
low_attendance <- attendance_df[attendance_df$percentage < 75, ]
low_attendance
## held attended percentage
## 4 40 28 70
Question 5
rainfall <- c(0, 3, 12, 7, 0, 22, 15)
rainfall_level <- function(x){
if (x<5) return("Light")
else if (x<=20) return("Moderate")
else return("Heavy")
}
rain_df = data.frame(
mm = rainfall,
status = sapply(rainfall, rainfall_level)
)
print(rain_df)
## mm status
## 1 0 Light
## 2 3 Light
## 3 12 Moderate
## 4 7 Moderate
## 5 0 Light
## 6 22 Heavy
## 7 15 Moderate
Question 6
travel_cost <- function(distance){
base_price = 15
if (distance <= 30){
total <- base_price + (distance*1.50)
}else{
extra_dist <- distance - 30
total <- base_price + (30*1.50) + (extra_dist*1.00)
}
return(total)
}
distances <- c(10,25,30,45,60)
final_costs <- sapply(distances, travel_cost)
results<-data.frame(Distance_inKM = distances, Total_Cost_INR = final_costs)
print(results)
## Distance_inKM Total_Cost_INR
## 1 10 30.0
## 2 25 52.5
## 3 30 60.0
## 4 45 75.0
## 5 60 90.0
Question 7
data("USArrests")
USArrests$violent_crime <- USArrests$Murder + USArrests$Assault + USArrests$Rape
head(USArrests)
## Murder Assault UrbanPop Rape violent_crime
## Alabama 13.2 236 58 21.2 270.4
## Alaska 10.0 263 48 44.5 317.5
## Arizona 8.1 294 80 31.0 333.1
## Arkansas 8.8 190 50 19.5 218.3
## California 9.0 276 91 40.6 325.6
## Colorado 7.9 204 78 38.7 250.6
total_murders <- sum(USArrests$Murder)
USArrests$murder_share <- round((USArrests$Murder/total_murders)*100,2)
classification <- c()
for (i in 1:nrow(USArrests)){
share <- USArrests$murder_share[i]
if (share>3){
classification[i] <- "High"
}else if (share>=2){
classification[i] <- "Medium"
}
else {
classification[i] <- "Low"
}
}
USArrests$share_category <- classification
classify_share <- function(data_vector, low, high){
categories <- ifelse(data_vector > high, "High",
ifelse(data_vector >= low, "Medium", "Low"))
return(categories)
}
data("LifeCycleSavings")
LifeCycleSavings$savings_level <- classify_share(LifeCycleSavings$sr, 10, 15)
head(LifeCycleSavings)
## sr pop15 pop75 dpi ddpi savings_level
## Australia 11.43 29.35 2.87 2329.68 2.87 Medium
## Austria 12.07 23.32 4.41 1507.99 3.93 Medium
## Belgium 13.17 23.80 4.43 2108.47 3.82 Medium
## Bolivia 5.75 41.89 1.67 189.13 0.22 Low
## Brazil 12.88 42.19 0.83 728.47 4.56 Medium
## Canada 8.79 31.72 2.85 2982.88 2.43 Low
aggregate(violent_crime ~ share_category, data = USArrests, FUN = mean)
## share_category violent_crime
## 1 High 295.2182
## 2 Low 130.3296
## 3 Medium 268.5583
plot(USArrests$UrbanPop, USArrests$violent_crime,
main = "Urbanization vs Violent Crime",
xlab = "Urbanization Population (%)", ylab = "Violent Crime Score",
pch = 21, col = "red")
