Description: https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/doc/wooldridge/meap93.html
CSV: https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/wooldridge/meap93.csv
408 rows and 17 variables:
Load data as data.frame
# set working directory
setwd("C:/Users/stina/Documents/R programming Bridge Workshop/Week 2 Assignment")
# load data onto dataframe
data.frame_meap <- read.csv("Meap93Data.csv")
# get summary of data.frame_meap
summary(data.frame_meap)
## row lnchprg enroll staff
## Min. : 1.0 Min. : 1.40 Min. : 212 Min. : 65.90
## 1st Qu.:102.8 1st Qu.:14.62 1st Qu.: 1038 1st Qu.: 91.45
## Median :204.5 Median :23.85 Median : 1840 Median : 99.00
## Mean :204.5 Mean :25.20 Mean : 2664 Mean :100.64
## 3rd Qu.:306.2 3rd Qu.:33.83 3rd Qu.: 3085 3rd Qu.:108.03
## Max. :408.0 Max. :79.50 Max. :16793 Max. :166.60
## expend salary benefits droprate
## Min. :3332 Min. :19764 Min. : 0 Min. : 0.000
## 1st Qu.:3821 1st Qu.:28186 1st Qu.: 5536 1st Qu.: 1.900
## Median :4145 Median :31266 Median : 6304 Median : 3.700
## Mean :4377 Mean :31775 Mean : 6463 Mean : 5.066
## 3rd Qu.:4659 3rd Qu.:34500 3rd Qu.: 7228 3rd Qu.: 6.500
## Max. :7419 Max. :52812 Max. :11618 Max. :61.900
## gradrate math10 sci11 totcomp
## Min. : 23.50 Min. : 1.90 Min. : 7.20 Min. :24498
## 1st Qu.: 77.00 1st Qu.:16.62 1st Qu.:41.30 1st Qu.:34032
## Median : 86.30 Median :23.40 Median :49.10 Median :37444
## Mean : 83.65 Mean :24.11 Mean :49.18 Mean :38238
## 3rd Qu.: 93.22 3rd Qu.:30.05 3rd Qu.:57.15 3rd Qu.:41637
## Max. :127.10 Max. :66.70 Max. :85.70 Max. :63518
## ltotcomp lexpend lenroll lstaff
## Min. :10.11 Min. :8.111 Min. :5.357 Min. :4.188
## 1st Qu.:10.44 1st Qu.:8.248 1st Qu.:6.945 1st Qu.:4.516
## Median :10.53 Median :8.330 Median :7.518 Median :4.595
## Mean :10.54 Mean :8.370 Mean :7.510 Mean :4.603
## 3rd Qu.:10.64 3rd Qu.:8.447 3rd Qu.:8.034 3rd Qu.:4.682
## Max. :11.06 Max. :8.912 Max. :9.729 Max. :5.116
## bensal lsalary
## Min. :0.0000 Min. : 9.892
## 1st Qu.:0.1880 1st Qu.:10.247
## Median :0.2024 Median :10.350
## Mean :0.2045 Mean :10.354
## 3rd Qu.:0.2203 3rd Qu.:10.449
## Max. :0.4500 Max. :10.874
#mean of enroll
all.enroll_mean <- mean(data.frame_meap$enroll)
all.enroll_mean
## [1] 2663.806
# median of enroll
all.enroll_median <- median(data.frame_meap$enroll)
all.enroll_median
## [1] 1840.5
#mean of math10
all.math10_mean <- mean(data.frame_meap$math10)
all.math10_mean
## [1] 24.10686
#median of math10
all.math10_median <- median(data.frame_meap$math10)
all.math10_median
## [1] 23.4
data.frame_MEAPsubset <- data.frame_meap[1:20, c(2,3, 10,11)]
data.frame_MEAPsubset
## lnchprg enroll math10 sci11
## 1 1.4 1862 56.4 67.9
## 2 2.3 11355 42.7 65.3
## 3 2.7 7685 43.8 54.3
## 4 3.4 1148 25.3 60.0
## 5 3.4 1572 15.3 65.8
## 6 3.4 2496 46.0 60.5
## 7 3.6 3358 33.6 67.4
## 8 3.6 11983 40.1 69.4
## 9 4.2 3499 42.1 71.7
## 10 4.2 5095 39.8 55.0
## 11 4.5 16793 30.8 58.1
## 12 4.5 984 14.6 74.6
## 13 5.1 1116 51.1 48.6
## 14 5.5 4156 29.2 55.0
## 15 5.5 4046 49.7 62.0
## 16 5.6 10695 42.6 53.1
## 17 5.8 3117 35.0 54.1
## 18 6.1 2168 35.9 45.4
## 19 6.2 2317 17.8 44.5
## 20 6.2 1391 25.5 74.4
names(data.frame_MEAPsubset)[1] <- "lunch_program_rate"
names(data.frame_MEAPsubset)[2] <- "enrollment_count"
names(data.frame_MEAPsubset)[3] <- "math_pass_rate"
names(data.frame_MEAPsubset)[4] <- "science_pass_rate"
data.frame_MEAPsubset
## lunch_program_rate enrollment_count math_pass_rate science_pass_rate
## 1 1.4 1862 56.4 67.9
## 2 2.3 11355 42.7 65.3
## 3 2.7 7685 43.8 54.3
## 4 3.4 1148 25.3 60.0
## 5 3.4 1572 15.3 65.8
## 6 3.4 2496 46.0 60.5
## 7 3.6 3358 33.6 67.4
## 8 3.6 11983 40.1 69.4
## 9 4.2 3499 42.1 71.7
## 10 4.2 5095 39.8 55.0
## 11 4.5 16793 30.8 58.1
## 12 4.5 984 14.6 74.6
## 13 5.1 1116 51.1 48.6
## 14 5.5 4156 29.2 55.0
## 15 5.5 4046 49.7 62.0
## 16 5.6 10695 42.6 53.1
## 17 5.8 3117 35.0 54.1
## 18 6.1 2168 35.9 45.4
## 19 6.2 2317 17.8 44.5
## 20 6.2 1391 25.5 74.4
summary(data.frame_MEAPsubset)
## lunch_program_rate enrollment_count math_pass_rate science_pass_rate
## Min. :1.400 Min. : 984 Min. :14.60 Min. :44.50
## 1st Qu.:3.400 1st Qu.: 1790 1st Qu.:28.27 1st Qu.:54.25
## Median :4.350 Median : 3238 Median :37.85 Median :60.25
## Mean :4.360 Mean : 4842 Mean :35.87 Mean :60.35
## 3rd Qu.:5.525 3rd Qu.: 5742 3rd Qu.:42.98 3rd Qu.:67.53
## Max. :6.200 Max. :16793 Max. :56.40 Max. :74.60
# mean and median of enroll
subset.enroll_mean <- mean(data.frame_MEAPsubset$enrollment_count)
# mean of enroll in subset
subset.enroll_mean
## [1] 4841.8
subset.enroll_median <- median(data.frame_MEAPsubset$enrollment_count)
# median of enroll in subset
subset.enroll_median
## [1] 3237.5
# mean and median of math10
subset.math10_mean <- mean(data.frame_MEAPsubset$math_pass_rate)
# mean of math10 in subset
subset.math10_mean
## [1] 35.865
subset.math10_median <- median(data.frame_MEAPsubset$math_pass_rate)
# median of math in subset
subset.math10_median
## [1] 37.85
print(paste("Mean enrollment of main data is ", all.enroll_mean, " while mean enrollment of subset is ", subset.enroll_mean, "."))
## [1] "Mean enrollment of main data is 2663.80637254902 while mean enrollment of subset is 4841.8 ."
print(paste("Median enrollment of main data is ", all.enroll_median, " while median enrollment of subset is ", subset.enroll_median, "."))
## [1] "Median enrollment of main data is 1840.5 while median enrollment of subset is 3237.5 ."
print(paste("Mean math pass rate of main data is ", all.math10_mean, " while mean math pass rate of subset is ", subset.math10_mean, "."))
## [1] "Mean math pass rate of main data is 24.1068627194877 while mean math pass rate of subset is 35.864999723 ."
print(paste("Median math pass rate of main data is ", all.math10_median, "while median math pass rate of subset is ", subset.math10_median, "."))
## [1] "Median math pass rate of main data is 23.39999962 while median math pass rate of subset is 37.850000385 ."
In our data, we are going to update all rows with salary that is less than 25,000 to 25,000.
library("dplyr")
# show rows of salary < 25000
salaryLess25K <- filter(data.frame_meap, data.frame_meap$salary < 25000)
salaryLess25K[, c(2,3,6,10,11)]
## lnchprg enroll salary math10 sci11
## 1 19.0 1260 22778 10.8 65.0
## 2 20.2 740 24887 9.8 42.2
## 3 23.9 297 20951 8.3 57.3
## 4 28.3 1047 24994 16.7 60.4
## 5 31.3 1514 24554 20.6 48.3
## 6 32.5 1067 24739 17.5 39.3
## 7 32.6 471 24768 11.6 40.9
## 8 32.9 1119 24907 24.1 37.2
## 9 34.0 2311 20394 13.8 53.8
## 10 34.6 575 22242 15.4 52.5
## 11 34.8 1173 24134 20.7 51.5
## 12 35.8 757 23969 11.9 85.7
## 13 37.3 2839 24709 27.5 56.5
## 14 38.6 507 24850 10.3 49.0
## 15 38.7 434 23039 10.5 33.3
## 16 38.8 1874 24270 15.3 45.0
## 17 39.0 340 23414 13.8 42.1
## 18 42.7 212 21674 18.8 34.1
## 19 45.7 394 23700 26.8 31.7
## 20 48.5 303 24058 33.3 41.4
## 21 49.9 852 20524 4.4 44.9
## 22 52.9 688 23437 30.0 46.5
## 23 59.9 363 19764 5.9 26.5
# assign salary that is less than 25000 to 25000
data.frame_meap[data.frame_meap$salary < 25000, "salary"] <- 25000
# show rows of salary == 25000
salary25K <- filter(data.frame_meap, data.frame_meap$salary == 25000)
# only display columns lnchprg, enroll, salary, math10, and sci11
salary25K[, c(2,3,6,10,11)]
## lnchprg enroll salary math10 sci11
## 1 19.0 1260 25000 10.8 65.0
## 2 20.2 740 25000 9.8 42.2
## 3 23.9 297 25000 8.3 57.3
## 4 28.3 1047 25000 16.7 60.4
## 5 31.3 1514 25000 20.6 48.3
## 6 32.5 1067 25000 17.5 39.3
## 7 32.6 471 25000 11.6 40.9
## 8 32.9 1119 25000 24.1 37.2
## 9 34.0 2311 25000 13.8 53.8
## 10 34.6 575 25000 15.4 52.5
## 11 34.8 1173 25000 20.7 51.5
## 12 35.8 757 25000 11.9 85.7
## 13 37.3 2839 25000 27.5 56.5
## 14 38.6 507 25000 10.3 49.0
## 15 38.7 434 25000 10.5 33.3
## 16 38.8 1874 25000 15.3 45.0
## 17 39.0 340 25000 13.8 42.1
## 18 42.7 212 25000 18.8 34.1
## 19 45.7 394 25000 26.8 31.7
## 20 48.5 303 25000 33.3 41.4
## 21 49.9 852 25000 4.4 44.9
## 22 52.9 688 25000 30.0 46.5
## 23 59.9 363 25000 5.9 26.5
Display first 50 rows out of 408 observations of columns lnchprg, enroll, salary, math10, sci11
data.frame_meap[1:50, c(2,3,6,10,11)]
## lnchprg enroll salary math10 sci11
## 1 1.4 1862 37498 56.4 67.9
## 2 2.3 11355 48722 42.7 65.3
## 3 2.7 7685 44541 43.8 54.3
## 4 3.4 1148 31566 25.3 60.0
## 5 3.4 1572 29781 15.3 65.8
## 6 3.4 2496 36801 46.0 60.5
## 7 3.6 3358 37863 33.6 67.4
## 8 3.6 11983 40133 40.1 69.4
## 9 4.2 3499 36451 42.1 71.7
## 10 4.2 5095 33449 39.8 55.0
## 11 4.5 16793 40859 30.8 58.1
## 12 4.5 984 42785 14.6 74.6
## 13 5.1 1116 34085 51.1 48.6
## 14 5.5 4156 29700 29.2 55.0
## 15 5.5 4046 47436 49.7 62.0
## 16 5.6 10695 40304 42.6 53.1
## 17 5.8 3117 38873 35.0 54.1
## 18 6.1 2168 35536 35.9 45.4
## 19 6.2 2317 37350 17.8 44.5
## 20 6.2 1391 31076 25.5 74.4
## 21 6.3 3691 35538 33.2 69.2
## 22 6.3 1673 31271 39.6 67.2
## 23 6.7 2671 35547 30.3 57.1
## 24 6.8 650 34194 54.8 72.8
## 25 6.9 2119 33345 62.4 66.7
## 26 7.8 1002 25453 29.7 56.5
## 27 7.8 2549 41451 36.9 56.7
## 28 8.0 5805 33234 22.4 57.9
## 29 8.0 2822 36954 23.9 50.9
## 30 8.1 5491 34499 39.2 45.0
## 31 8.2 2331 37519 22.9 57.1
## 32 8.3 4725 39481 27.8 64.6
## 33 8.4 2629 33867 36.9 67.1
## 34 8.4 2475 34520 37.3 58.4
## 35 8.5 1278 27955 21.3 65.5
## 36 8.6 8016 36461 48.7 57.0
## 37 8.7 913 31204 34.2 45.8
## 38 8.8 891 34105 32.8 51.0
## 39 8.8 5125 34315 33.2 39.4
## 40 9.0 3263 28948 31.7 58.3
## 41 9.1 444 26682 20.0 44.0
## 42 9.1 10591 39982 28.0 39.9
## 43 9.2 13684 52812 23.4 65.4
## 44 9.3 5926 28053 42.9 69.9
## 45 9.3 2100 37112 16.8 71.0
## 46 9.4 669 36183 30.9 47.7
## 47 9.5 1789 36622 16.2 40.4
## 48 9.5 2674 45358 23.6 49.6
## 49 9.6 2405 33341 18.1 60.4
## 50 9.6 891 32197 30.5 61.8