Read in the data from source.
ELAMAT <- readr::read_csv("https://raw.githubusercontent.com/sadia-perveen/Project-2/master/ELAMAT1.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## NRC_CODE = col_double(),
## COUNTY_CODE = col_logical(),
## COUNTY_DESC = col_logical(),
## BEDSCODE = col_double(),
## SUBGROUP_CODE = col_double(),
## TOTAL_TESTED = col_double()
## )
## See spec(...) for full column specifications.
Order data by the mean score in descending order.
AVG_SCORE <- dplyr::arrange(ELAMAT, desc(as.numeric(MEAN_SCALE_SCORE)))
AVG_SCORE
## # A tibble: 3,861 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 2 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 3 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 4 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 5 6/30/2019 7 Charters NA NA 7.00e 0 NRC ~
## 6 6/30/2019 7 Charters NA NA 7.00e 0 NRC ~
## 7 6/30/2019 7 Charters NA NA 7.00e 0 NRC ~
## 8 6/30/2019 7 Charters NA NA 7.00e 0 NRC ~
## 9 6/30/2019 1 NYC NA NA 1.00e 0 NRC ~
## 10 6/30/2019 NA <NA> TRUE NA 1.00e10 ALBA~
## # ... with 3,851 more rows, and 16 more variables:
## # ITEM_SUBJECT_AREA <chr>, ITEM_DESC <chr>, SUBGROUP_CODE <dbl>,
## # SUBGROUP_NAME <chr>, TOTAL_TESTED <dbl>, L1_COUNT <chr>, L1_PCT <chr>,
## # L2_COUNT <chr>, L2_PCT <chr>, L3_COUNT <chr>, L3_PCT <chr>,
## # L4_COUNT <chr>, L4_PCT <chr>, `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>,
## # MEAN_SCALE_SCORE <chr>
Filter out data that has either a Male or Female listing in SUBGROUP_NAME column and stor eit in ALL_GENDER data frame.
ALL_GENDER <- dplyr::filter(ELAMAT, SUBGROUP_NAME == "Female" | SUBGROUP_NAME == "Male")
ALL_GENDER
## # A tibble: 368 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 NA <NA> NA NA 0 STAT~
## 2 6/30/2019 NA <NA> NA NA 0 STAT~
## 3 6/30/2019 NA <NA> NA NA 0 STAT~
## 4 6/30/2019 NA <NA> NA NA 0 STAT~
## 5 6/30/2019 NA <NA> NA NA 0 STAT~
## 6 6/30/2019 NA <NA> NA NA 0 STAT~
## 7 6/30/2019 NA <NA> NA NA 0 STAT~
## 8 6/30/2019 NA <NA> NA NA 0 STAT~
## 9 6/30/2019 NA <NA> NA NA 0 STAT~
## 10 6/30/2019 NA <NA> NA NA 0 STAT~
## # ... with 358 more rows, and 16 more variables: ITEM_SUBJECT_AREA <chr>,
## # ITEM_DESC <chr>, SUBGROUP_CODE <dbl>, SUBGROUP_NAME <chr>,
## # TOTAL_TESTED <dbl>, L1_COUNT <chr>, L1_PCT <chr>, L2_COUNT <chr>,
## # L2_PCT <chr>, L3_COUNT <chr>, L3_PCT <chr>, L4_COUNT <chr>,
## # L4_PCT <chr>, `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>,
## # MEAN_SCALE_SCORE <chr>
Arrange data in the ALL_GENDER data frame by MEAN_SCALE_SCORE and store it in AVG_SCORE_GEN data frame.
AVG_SCORE_GEN <- dplyr::arrange(ALL_GENDER, desc(as.numeric(MEAN_SCALE_SCORE)))
AVG_SCORE_GEN
## # A tibble: 368 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 3 Urban-S~ TRUE NA 1.01e10 MONT~
## 2 6/30/2019 3 Urban-S~ TRUE NA 1.01e10 MONT~
## 3 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 4 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 5 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 6 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 7 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 8 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 9 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## 10 6/30/2019 6 Low Nee~ NA NA 6.00e 0 NRC ~
## # ... with 358 more rows, and 16 more variables: ITEM_SUBJECT_AREA <chr>,
## # ITEM_DESC <chr>, SUBGROUP_CODE <dbl>, SUBGROUP_NAME <chr>,
## # TOTAL_TESTED <dbl>, L1_COUNT <chr>, L1_PCT <chr>, L2_COUNT <chr>,
## # L2_PCT <chr>, L3_COUNT <chr>, L3_PCT <chr>, L4_COUNT <chr>,
## # L4_PCT <chr>, `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>,
## # MEAN_SCALE_SCORE <chr>
Filter out all Asian or Pacific Islander students and store it in ALL_ASIAN data frame. Then order the data by MEAN_SCALE_SCORE column.
ALL_ASIAN <- dplyr::filter(ELAMAT, SUBGROUP_NAME == "Asian or Pacific Islander")
AVG_SCORE_ASN <- dplyr::arrange(ALL_ASIAN, desc(MEAN_SCALE_SCORE))
AVG_SCORE_ASN
## # A tibble: 181 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 6 Low Nee~ NA NA 6 NRC ~
## 2 6/30/2019 6 Low Nee~ NA NA 6 NRC ~
## 3 6/30/2019 6 Low Nee~ NA NA 6 NRC ~
## 4 6/30/2019 6 Low Nee~ NA NA 6 NRC ~
## 5 6/30/2019 7 Charters NA NA 7 NRC ~
## 6 6/30/2019 7 Charters NA NA 7 NRC ~
## 7 6/30/2019 7 Charters NA NA 7 NRC ~
## 8 6/30/2019 7 Charters NA NA 7 NRC ~
## 9 6/30/2019 1 NYC NA NA 1 NRC ~
## 10 6/30/2019 1 NYC NA NA 1 NRC ~
## # ... with 171 more rows, and 16 more variables: ITEM_SUBJECT_AREA <chr>,
## # ITEM_DESC <chr>, SUBGROUP_CODE <dbl>, SUBGROUP_NAME <chr>,
## # TOTAL_TESTED <dbl>, L1_COUNT <chr>, L1_PCT <chr>, L2_COUNT <chr>,
## # L2_PCT <chr>, L3_COUNT <chr>, L3_PCT <chr>, L4_COUNT <chr>,
## # L4_PCT <chr>, `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>,
## # MEAN_SCALE_SCORE <chr>
ALL_ASIAN
## # A tibble: 181 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 NA <NA> NA NA 0 STAT~
## 2 6/30/2019 NA <NA> NA NA 0 STAT~
## 3 6/30/2019 NA <NA> NA NA 0 STAT~
## 4 6/30/2019 NA <NA> NA NA 0 STAT~
## 5 6/30/2019 NA <NA> NA NA 0 STAT~
## 6 6/30/2019 NA <NA> NA NA 0 STAT~
## 7 6/30/2019 NA <NA> NA NA 0 STAT~
## 8 6/30/2019 NA <NA> NA NA 0 STAT~
## 9 6/30/2019 NA <NA> NA NA 0 STAT~
## 10 6/30/2019 NA <NA> NA NA 0 STAT~
## # ... with 171 more rows, and 16 more variables: ITEM_SUBJECT_AREA <chr>,
## # ITEM_DESC <chr>, SUBGROUP_CODE <dbl>, SUBGROUP_NAME <chr>,
## # TOTAL_TESTED <dbl>, L1_COUNT <chr>, L1_PCT <chr>, L2_COUNT <chr>,
## # L2_PCT <chr>, L3_COUNT <chr>, L3_PCT <chr>, L4_COUNT <chr>,
## # L4_PCT <chr>, `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>,
## # MEAN_SCALE_SCORE <chr>
Filter data out based on SUBGROUP_NAME and NRC_DESC.
ALL_STUDENTS <- dplyr::filter(ELAMAT, SUBGROUP_NAME == "All Students" & NRC_DESC == "NYC")
ALL_STUDENTS
## # A tibble: 12 x 23
## SY_END_DATE NRC_CODE NRC_DESC COUNTY_CODE COUNTY_DESC BEDSCODE NAME
## <chr> <dbl> <chr> <lgl> <lgl> <dbl> <chr>
## 1 6/30/2019 1 NYC NA NA 1 NRC ~
## 2 6/30/2019 1 NYC NA NA 1 NRC ~
## 3 6/30/2019 1 NYC NA NA 1 NRC ~
## 4 6/30/2019 1 NYC NA NA 1 NRC ~
## 5 6/30/2019 1 NYC NA NA 1 NRC ~
## 6 6/30/2019 1 NYC NA NA 1 NRC ~
## 7 6/30/2019 1 NYC NA NA 1 NRC ~
## 8 6/30/2019 1 NYC NA NA 1 NRC ~
## 9 6/30/2019 1 NYC NA NA 1 NRC ~
## 10 6/30/2019 1 NYC NA NA 1 NRC ~
## 11 6/30/2019 1 NYC NA NA 1 NRC ~
## 12 6/30/2019 1 NYC NA NA 1 NRC ~
## # ... with 16 more variables: ITEM_SUBJECT_AREA <chr>, ITEM_DESC <chr>,
## # SUBGROUP_CODE <dbl>, SUBGROUP_NAME <chr>, TOTAL_TESTED <dbl>,
## # L1_COUNT <chr>, L1_PCT <chr>, L2_COUNT <chr>, L2_PCT <chr>,
## # L3_COUNT <chr>, L3_PCT <chr>, L4_COUNT <chr>, L4_PCT <chr>,
## # `L2-L4_PCT` <chr>, `L3-L4_PCT` <chr>, MEAN_SCALE_SCORE <chr>
ALL_STUDENTS$MEAN_SCALE_SCORE
## [1] "600" "600" "599" "600" "600" "600" "599" "600" "601" "601" "601"
## [12] "601"
Generate a barplot based on L4_PCT and ITEM_DESC.
barplot(as.numeric(gsub("%", "", ALL_STUDENTS$L4_PCT)), names = ALL_STUDENTS$ITEM_DESC,
xlab = "Students", ylab = "Percentage",
main = "PERCENT OF STUDENTS IN LEVEL 4", las=2, cex.names=.5, col=c("darkblue","green"))
Generate a barplot based on L1_PCT and ITEM_DESC.
barplot(as.numeric(gsub("%", "", ALL_STUDENTS$L1_PCT)), names = ALL_STUDENTS$ITEM_DESC,
xlab = "Students", ylab = "Percentage",
main = "PERCENT OF STUDENTS IN LEVEL 1", las=2, cex.names=.5, col=c("orange","green"))