1.load the data

library(readr)
## Warning: package 'readr' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
AU <- read_csv("hd2016.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   INSTNM = col_character(),
##   IALIAS = col_character(),
##   ADDR = col_character(),
##   CITY = col_character(),
##   STABBR = col_character(),
##   ZIP = col_character(),
##   CHFNM = col_character(),
##   CHFTITLE = col_character(),
##   GENTELE = col_double(),
##   EIN = col_character(),
##   DUNS = col_character(),
##   OPEID = col_character(),
##   WEBADDR = col_character(),
##   ADMINURL = col_character(),
##   FAIDURL = col_character(),
##   APPLURL = col_character(),
##   NPRICURL = col_character(),
##   VETURL = col_character(),
##   ATHURL = col_character(),
##   DISAURL = col_character()
##   # ... with 6 more columns
## )
## See spec(...) for full column specifications.
## Warning: 2 parsing failures.
## row # A tibble: 2 x 5 col     row      col           expected actual         file expected   <int>    <chr>              <chr>  <chr>        <chr> actual 1    68 CHFTITLE delimiter or quote      A 'hd2016.csv' file 2    68 CHFTITLE delimiter or quote        'hd2016.csv'
head(AU)
## # A tibble: 6 x 70
##   UNITID                              INSTNM
##    <int>                               <chr>
## 1 100654            Alabama A & M University
## 2 100663 University of Alabama at Birmingham
## 3 100690                  Amridge University
## 4 100706 University of Alabama in Huntsville
## 5 100724            Alabama State University
## 6 100733 University of Alabama System Office
## # ... with 68 more variables: IALIAS <chr>, ADDR <chr>, CITY <chr>,
## #   STABBR <chr>, ZIP <chr>, FIPS <int>, OBEREG <int>, CHFNM <chr>,
## #   CHFTITLE <chr>, GENTELE <dbl>, EIN <chr>, DUNS <chr>, OPEID <chr>,
## #   OPEFLAG <int>, WEBADDR <chr>, ADMINURL <chr>, FAIDURL <chr>,
## #   APPLURL <chr>, NPRICURL <chr>, VETURL <chr>, ATHURL <chr>,
## #   DISAURL <chr>, SECTOR <int>, ICLEVEL <int>, CONTROL <int>,
## #   HLOFFER <int>, UGOFFER <int>, GROFFER <int>, HDEGOFR1 <int>,
## #   DEGGRANT <int>, HBCU <int>, HOSPITAL <int>, MEDICAL <int>,
## #   TRIBAL <int>, LOCALE <int>, OPENPUBL <int>, ACT <chr>, NEWID <int>,
## #   DEATHYR <int>, CLOSEDAT <chr>, CYACTIVE <int>, POSTSEC <int>,
## #   PSEFLAG <int>, PSET4FLG <int>, RPTMTH <int>, INSTCAT <int>,
## #   C15BASIC <int>, C15IPUG <int>, C15IPGRD <int>, C15UGPRF <int>,
## #   C15ENPRF <int>, C15SZSET <int>, CCBASIC <int>, CARNEGIE <int>,
## #   LANDGRNT <int>, INSTSIZE <int>, F1SYSTYP <int>, F1SYSNAM <chr>,
## #   F1SYSCOD <int>, CBSA <int>, CBSATYPE <int>, CSA <int>, NECTA <int>,
## #   COUNTYCD <int>, COUNTYNM <chr>, CNGDSTCD <int>, LONGITUD <dbl>,
## #   LATITUDE <dbl>
summary(AU)
##      UNITID          INSTNM             IALIAS              ADDR          
##  Min.   :100654   Length:7521        Length:7521        Length:7521       
##  1st Qu.:173258   Class :character   Class :character   Class :character  
##  Median :227979   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :291277                                                           
##  3rd Qu.:446224                                                           
##  Max.   :490018                                                           
##                                                                           
##      CITY              STABBR              ZIP                 FIPS      
##  Length:7521        Length:7521        Length:7521        Min.   : 1.00  
##  Class :character   Class :character   Class :character   1st Qu.:13.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :29.00  
##                                                           Mean   :29.18  
##                                                           3rd Qu.:42.00  
##                                                           Max.   :78.00  
##                                                                          
##      OBEREG         CHFNM             CHFTITLE            GENTELE         
##  Min.   :0.000   Length:7521        Length:7521        Min.   :2.012e+09  
##  1st Qu.:3.000   Class :character   Class :character   1st Qu.:4.105e+09  
##  Median :5.000   Mode  :character   Mode  :character   Median :6.508e+09  
##  Mean   :4.621                                         Mean   :5.859e+12  
##  3rd Qu.:6.000                                         3rd Qu.:8.433e+09  
##  Max.   :9.000                                         Max.   :9.785e+14  
##                                                        NA's   :67         
##      EIN                DUNS              OPEID              OPEFLAG     
##  Length:7521        Length:7521        Length:7521        Min.   :1.000  
##  Class :character   Class :character   Class :character   1st Qu.:1.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.000  
##                                                           Mean   :1.351  
##                                                           3rd Qu.:1.000  
##                                                           Max.   :7.000  
##                                                                          
##    WEBADDR            ADMINURL           FAIDURL         
##  Length:7521        Length:7521        Length:7521       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##    APPLURL            NPRICURL            VETURL         
##  Length:7521        Length:7521        Length:7521       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##     ATHURL            DISAURL              SECTOR          ICLEVEL      
##  Length:7521        Length:7521        Min.   : 0.000   Min.   :-3.000  
##  Class :character   Class :character   1st Qu.: 2.000   1st Qu.: 1.000  
##  Mode  :character   Mode  :character   Median : 4.000   Median : 2.000  
##                                        Mean   : 5.499   Mean   : 1.806  
##                                        3rd Qu.: 8.000   3rd Qu.: 3.000  
##                                        Max.   :99.000   Max.   : 3.000  
##                                                                         
##     CONTROL          HLOFFER          UGOFFER          GROFFER      
##  Min.   :-3.000   Min.   :-3.000   Min.   :-3.000   Min.   :-3.000  
##  1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.: 1.000   1st Qu.: 1.000  
##  Median : 2.000   Median : 4.000   Median : 1.000   Median : 2.000  
##  Mean   : 2.133   Mean   : 4.543   Mean   : 1.009   Mean   : 1.658  
##  3rd Qu.: 3.000   3rd Qu.: 7.000   3rd Qu.: 1.000   3rd Qu.: 2.000  
##  Max.   : 3.000   Max.   : 9.000   Max.   : 2.000   Max.   : 2.000  
##                                                                     
##     HDEGOFR1        DEGGRANT          HBCU          HOSPITAL      
##  Min.   :-3.00   Min.   :-3.00   Min.   :1.000   Min.   :-2.0000  
##  1st Qu.: 0.00   1st Qu.: 1.00   1st Qu.:2.000   1st Qu.:-2.0000  
##  Median :13.00   Median : 1.00   Median :2.000   Median : 2.0000  
##  Mean   :17.34   Mean   : 1.31   Mean   :1.987   Mean   : 0.3773  
##  3rd Qu.:30.00   3rd Qu.: 2.00   3rd Qu.:2.000   3rd Qu.: 2.0000  
##  Max.   :40.00   Max.   : 2.00   Max.   :2.000   Max.   : 2.0000  
##                                                                   
##     MEDICAL           TRIBAL          LOCALE         OPENPUBL     
##  Min.   :-2.000   Min.   :1.000   Min.   :-3.00   Min.   :0.0000  
##  1st Qu.: 2.000   1st Qu.:2.000   1st Qu.:12.00   1st Qu.:1.0000  
##  Median : 2.000   Median :2.000   Median :21.00   Median :1.0000  
##  Mean   : 1.696   Mean   :1.995   Mean   :19.54   Mean   :0.9996  
##  3rd Qu.: 2.000   3rd Qu.:2.000   3rd Qu.:22.00   3rd Qu.:1.0000  
##  Max.   : 2.000   Max.   :2.000   Max.   :43.00   Max.   :1.0000  
##                                                                   
##      ACT                NEWID           DEATHYR          CLOSEDAT        
##  Length:7521        Min.   :    -2   Min.   :  -2.00   Length:7521       
##  Class :character   1st Qu.:    -2   1st Qu.:  -2.00   Class :character  
##  Mode  :character   Median :    -2   Median :  -2.00   Mode  :character  
##                     Mean   :  2966   Mean   :  79.84                     
##                     3rd Qu.:    -2   3rd Qu.:  -2.00                     
##                     Max.   :489937   Max.   :2016.00                     
##                                                                          
##     CYACTIVE        POSTSEC         PSEFLAG         PSET4FLG    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :1.076   Mean   :1.006   Mean   :1.082   Mean   :1.369  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :3.000   Max.   :2.000   Max.   :3.000   Max.   :9.000  
##                                                                 
##      RPTMTH          INSTCAT          C15BASIC         C15IPUG      
##  Min.   :-2.000   Min.   :-2.000   Min.   :-2.000   Min.   :-2.000  
##  1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.:-2.000   1st Qu.:-2.000  
##  Median : 1.000   Median : 4.000   Median : 7.000   Median : 2.000  
##  Mean   : 1.233   Mean   : 3.502   Mean   : 9.323   Mean   : 4.592  
##  3rd Qu.: 2.000   3rd Qu.: 6.000   3rd Qu.:20.000   3rd Qu.:12.000  
##  Max.   : 3.000   Max.   : 6.000   Max.   :33.000   Max.   :20.000  
##                                                                     
##     C15IPGRD         C15UGPRF         C15ENPRF          C15SZSET     
##  Min.   :-2.000   Min.   :-2.000   Min.   :-2.0000   Min.   :-2.000  
##  1st Qu.:-2.000   1st Qu.:-2.000   1st Qu.:-2.0000   1st Qu.:-2.000  
##  Median : 0.000   Median : 1.000   Median : 1.0000   Median : 2.000  
##  Mean   : 1.546   Mean   : 3.264   Mean   : 0.8827   Mean   : 3.895  
##  3rd Qu.: 2.000   3rd Qu.: 7.000   3rd Qu.: 3.0000   3rd Qu.: 8.000  
##  Max.   :18.000   Max.   :15.000   Max.   : 7.0000   Max.   :18.000  
##                                                                      
##     CCBASIC          CARNEGIE        LANDGRNT        INSTSIZE     
##  Min.   :-3.000   Min.   :-3.00   Min.   :1.000   Min.   :-2.000  
##  1st Qu.:-3.000   1st Qu.:-3.00   1st Qu.:2.000   1st Qu.: 1.000  
##  Median : 3.000   Median :-3.00   Median :2.000   Median : 1.000  
##  Mean   : 7.398   Mean   :14.55   Mean   :1.986   Mean   : 1.539  
##  3rd Qu.:18.000   3rd Qu.:40.00   3rd Qu.:2.000   3rd Qu.: 2.000  
##  Max.   :33.000   Max.   :60.00   Max.   :2.000   Max.   : 5.000  
##                                                                   
##     F1SYSTYP        F1SYSNAM            F1SYSCOD           CBSA      
##  Min.   :-2.000   Length:7521        Min.   :    -2   Min.   :   -2  
##  1st Qu.: 1.000   Class :character   1st Qu.:    -2   1st Qu.:19100  
##  Median : 2.000   Mode  :character   Median :    -2   Median :31080  
##  Mean   : 1.353                      Mean   : 94070   Mean   :29174  
##  3rd Qu.: 2.000                      3rd Qu.:200080   3rd Qu.:38900  
##  Max.   : 2.000                      Max.   :400010   Max.   :49780  
##                                                                      
##     CBSATYPE            CSA            NECTA          COUNTYCD    
##  Min.   :-2.0000   Min.   : -2.0   Min.   :   -2   Min.   :   -2  
##  1st Qu.: 1.0000   1st Qu.:122.0   1st Qu.:   -2   1st Qu.:13121  
##  Median : 1.0000   Median :288.0   Median :   -2   Median :29183  
##  Mean   : 0.9734   Mean   :260.5   Mean   : 3822   Mean   :29235  
##  3rd Qu.: 1.0000   3rd Qu.:408.0   3rd Qu.:   -2   3rd Qu.:42039  
##  Max.   : 2.0000   Max.   :566.0   Max.   :79600   Max.   :78030  
##                                                                   
##    COUNTYNM            CNGDSTCD       LONGITUD          LATITUDE     
##  Length:7521        Min.   :  -2   Min.   :-170.74   Min.   :-14.32  
##  Class :character   1st Qu.:1307   1st Qu.: -97.36   1st Qu.: 33.95  
##  Mode  :character   Median :2906   Median : -86.66   Median : 38.77  
##                     Mean   :2927   Mean   : -90.47   Mean   : 37.37  
##                     3rd Qu.:4206   3rd Qu.: -79.23   3rd Qu.: 41.33  
##                     Max.   :7898   Max.   : 171.38   Max.   : 71.32  
## 

2.subset the data using dplyr to include five states

AU5 <- filter(AU, STABBR == c("CA", "TX", "FL", "NY", "PA"))
## Warning in STABBR == c("CA", "TX", "FL", "NY", "PA"): longer object length
## is not a multiple of shorter object length
head(AU5)
## # A tibble: 6 x 70
##   UNITID                                   INSTNM         IALIAS
##    <int>                                    <chr>          <chr>
## 1 108250   ITT Technical Institute-Rancho Cordova           <NA>
## 2 108807                    Allan Hancock College           <NA>
## 3 109040      American Career College-Los Angeles           <NA>
## 4 109721 Associated Technical College-Los Angeles           <NA>
## 5 109934             Bellus Academy-National City Bellus Academy
## 6 110219                         Bryan University           <NA>
## # ... with 67 more variables: ADDR <chr>, CITY <chr>, STABBR <chr>,
## #   ZIP <chr>, FIPS <int>, OBEREG <int>, CHFNM <chr>, CHFTITLE <chr>,
## #   GENTELE <dbl>, EIN <chr>, DUNS <chr>, OPEID <chr>, OPEFLAG <int>,
## #   WEBADDR <chr>, ADMINURL <chr>, FAIDURL <chr>, APPLURL <chr>,
## #   NPRICURL <chr>, VETURL <chr>, ATHURL <chr>, DISAURL <chr>,
## #   SECTOR <int>, ICLEVEL <int>, CONTROL <int>, HLOFFER <int>,
## #   UGOFFER <int>, GROFFER <int>, HDEGOFR1 <int>, DEGGRANT <int>,
## #   HBCU <int>, HOSPITAL <int>, MEDICAL <int>, TRIBAL <int>, LOCALE <int>,
## #   OPENPUBL <int>, ACT <chr>, NEWID <int>, DEATHYR <int>, CLOSEDAT <chr>,
## #   CYACTIVE <int>, POSTSEC <int>, PSEFLAG <int>, PSET4FLG <int>,
## #   RPTMTH <int>, INSTCAT <int>, C15BASIC <int>, C15IPUG <int>,
## #   C15IPGRD <int>, C15UGPRF <int>, C15ENPRF <int>, C15SZSET <int>,
## #   CCBASIC <int>, CARNEGIE <int>, LANDGRNT <int>, INSTSIZE <int>,
## #   F1SYSTYP <int>, F1SYSNAM <chr>, F1SYSCOD <int>, CBSA <int>,
## #   CBSATYPE <int>, CSA <int>, NECTA <int>, COUNTYCD <int>,
## #   COUNTYNM <chr>, CNGDSTCD <int>, LONGITUD <dbl>, LATITUDE <dbl>

3.using dplyr, count the number of universities by a categorical variable; note: you may need to change the variable from character to another format first; provide a tibble output from dplyr

Sector <- AU5 %>% group_by(SECTOR) %>% summarize(count = n()) %>% arrange(count)
Sector
## # A tibble: 11 x 2
##    SECTOR count
##     <int> <int>
##  1     99     1
##  2      0     2
##  3      8     6
##  4      5    14
##  5      7    16
##  6      1    43
##  7      3    48
##  8      4    50
##  9      6    73
## 10      2   113
## 11      9   120

4.make three ggplot graphs of your findings

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
g <- ggplot(AU5, aes(STABBR, fill = STABBR))
g + geom_bar(aes()) + ggtitle("Bar Plot Showing The Count of University by State")

pie(table(AU5$STABBR), clockwise = TRUE, main = "Pie Chart of Universities in different states")

d <- ggplot(AU5, aes(ICLEVEL))
d + geom_bar(aes(fill=STABBR), width = 0.5) + 
  labs(title="Histogram Showing ICLEVEL Across States") + 
  annotate("text", x = 2, y = 250, label = "Lowest Count")

Summary: This report contains a brief analysis of University information in 5 US States including California (CA), Taxes (TX), Florida (Fl), Pennsylvania (PA) and New York (NY).

I used folloiwng methods in the analysis: adjusting missing values, ordering states by universities, conducting differenciation analysis between them and conducting comparison analysis with the help of the data visulization. Results of the data analysed shows that CA has a greatest number of universities.The greater proportion of ICLEVE are 1, followed by 3 then 2.