assignment 02

dataframe

exam_score = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("Alice", "Bob", "David", "John", "Jenny"),
  Age = c(20, 25, 30, 22, 18),
  Score = c(100, 78, 90, 55, 81)
)
exam_score
##   ID  Name Age Score
## 1  1 Alice  20   100
## 2  2   Bob  25    78
## 3  3 David  30    90
## 4  4  John  22    55
## 5  5 Jenny  18    81

add 2 new rows

row1 = c(6, "isa", 25, 10)
row2 = c(7, "asma", 20, 20)
exam_score = rbind(exam_score, row1,row2)
exam_score
##   ID  Name Age Score
## 1  1 Alice  20   100
## 2  2   Bob  25    78
## 3  3 David  30    90
## 4  4  John  22    55
## 5  5 Jenny  18    81
## 6  6   isa  25    10
## 7  7  asma  20    20

add new column Income

Income = c( 50000, 60000, 70000, 45000, 55000, 65000, 75000)
exam_score = cbind(exam_score, Income)
exam_score
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 2  2   Bob  25    78  60000
## 3  3 David  30    90  70000
## 4  4  John  22    55  45000
## 5  5 Jenny  18    81  55000
## 6  6   isa  25    10  65000
## 7  7  asma  20    20  75000

find all function

a= exam_score$Age
b= exam_score$Score
c= exam_score$Income
a
## [1] "20" "25" "30" "22" "18" "25" "20"
 Max = c(max(a), max(b), max(c))
 Max
## [1] "30"    "90"    "75000"
 Min = c(min(a), min(b), min(c))
 Min
## [1] "18"    "10"    "45000"
 Median =c(median(a), median(b), median(c))
 Median
## [1] "22"    "55"    "60000"
 a= as.numeric(a)
 b= as.numeric(b)
 c= as.numeric(c)
 
 Sum = c(sum(a), sum(b), sum(c))
 Sum
## [1]    160    434 420000
 Mean = c(mean(a), mean(b), mean(c))
 Mean
## [1]    22.85714    62.00000 60000.00000
 sd= c(sd(a),sd(b),sd(c))
 sd
## [1]     4.099942    35.028560 10801.234497
 var= c(var(a),var(b),var(c))
 var
## [1] 1.680952e+01 1.227000e+03 1.166667e+08
quan =  c(quantile(a,0.5),quantile(b,0.5),quantile(c,0.5))
quan
##   50%   50%   50% 
##    22    78 60000
table = data.frame(
  Max = Max,
  Min = Min,
  Median = Median,
  Sum = Sum,
  Mean = Mean,
  SD = sd,
  Variance = var,
  Quantile =quan 
) 

table
##     Max   Min Median    Sum        Mean           SD     Variance Quantile
## 1    30    18     22    160    22.85714     4.099942 1.680952e+01       22
## 2    90    10     55    434    62.00000    35.028560 1.227000e+03       78
## 3 75000 45000  60000 420000 60000.00000 10801.234497 1.166667e+08    60000

correlation

a= exam_score$Age
b= exam_score$Score
c= exam_score$Income
a= as.numeric(a)
 b= as.numeric(b)
 c= as.numeric(c)
 
age_score= cor(a, b)
age_score
## [1] 0.05570446
age_income= cor(a, c)
age_income
## [1] 0.3951727
score_income = cor(b, c)
age_income
## [1] 0.3951727
x= exam_score[ b >= 80, ]
x
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 3  3 David  30    90  70000
## 5  5 Jenny  18    81  55000
x= exam_score[ b >= 80, ]
x
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 3  3 David  30    90  70000
## 5  5 Jenny  18    81  55000
y= exam_score[a>= 20 & a<=30, ]
y
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 2  2   Bob  25    78  60000
## 3  3 David  30    90  70000
## 4  4  John  22    55  45000
## 6  6   isa  25    10  65000
## 7  7  asma  20    20  75000
z= exam_score[ a == 22 | a == 25 | a == 35 , ]
z
##   ID Name Age Score Income
## 2  2  Bob  25    78  60000
## 4  4 John  22    55  45000
## 6  6  isa  25    10  65000

x= exam_score[ b >= 80, ]
x
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 3  3 David  30    90  70000
## 5  5 Jenny  18    81  55000
y= exam_score[a>= 20 & a<=30, ]
y
##   ID  Name Age Score Income
## 1  1 Alice  20   100  50000
## 2  2   Bob  25    78  60000
## 3  3 David  30    90  70000
## 4  4  John  22    55  45000
## 6  6   isa  25    10  65000
## 7  7  asma  20    20  75000
z= exam_score[ a == 22 | a == 25 | a == 35 , ]
z
##   ID Name Age Score Income
## 2  2  Bob  25    78  60000
## 4  4 John  22    55  45000
## 6  6  isa  25    10  65000