R Packages

library(tidyverse) #loading all library needed for this assignment
## -- Attaching packages -------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library(dplyr)
library(DBI)
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(rstudioapi)
library(RJDBC)
## Loading required package: rJava
library(odbc)
library(RSQLite)
library(readr)
library(RCurl)
## 
## Attaching package: 'RCurl'
## The following object is masked from 'package:rJava':
## 
##     clone
## The following object is masked from 'package:tidyr':
## 
##     complete
library(stringr)

Loading the dataframe

I can load the dataframe from my local drive. I can make a call to githun to pull up this dataframe

For some reason at the last munite, when knit I kept getting error ….Quitting from lines 37-54 (DATA607_Assg3_String_Manip.Rmd) Error in eval(expr, envir, enclos) : ….So , I commented out the working directory to use github bridge

#setwd("~/R/DATA607_Assg3") #set the working directory to access dataframe, major-list
#a173College_majors
#View(a173College_majors)

  # looking at what the data look like
#View(a173College_majors) #another way of looking at the data
#~/R/DATA607_Assg3/a173College_majors
# This access also work as call is made in the working directory to pull up file name = major-list.csv ....majors_list <- read_csv("~/R/DATA607_Assg3/majors-list.csv")

# This access from github to Rstudio also work 
 My_173Majors <- read.csv("https://raw.githubusercontent.com/asmozo24/data/master/college-majors/majors-list.csv",header=TRUE,stringsAsFactors=FALSE)
 My_173Majors # this is to check it is the actual dataframe I am looking for ....this works too.
##     FOD1P                                                             Major
## 1    1100                                               GENERAL AGRICULTURE
## 2    1101                             AGRICULTURE PRODUCTION AND MANAGEMENT
## 3    1102                                            AGRICULTURAL ECONOMICS
## 4    1103                                                   ANIMAL SCIENCES
## 5    1104                                                      FOOD SCIENCE
## 6    1105                                        PLANT SCIENCE AND AGRONOMY
## 7    1106                                                      SOIL SCIENCE
## 8    1199                                         MISCELLANEOUS AGRICULTURE
## 9    1302                                                          FORESTRY
## 10   1303                                      NATURAL RESOURCES MANAGEMENT
## 11   6000                                                         FINE ARTS
## 12   6001                                            DRAMA AND THEATER ARTS
## 13   6002                                                             MUSIC
## 14   6003                                        VISUAL AND PERFORMING ARTS
## 15   6004                                 COMMERCIAL ART AND GRAPHIC DESIGN
## 16   6005                                  FILM VIDEO AND PHOTOGRAPHIC ARTS
## 17   6007                                                       STUDIO ARTS
## 18   6099                                           MISCELLANEOUS FINE ARTS
## 19   1301                                             ENVIRONMENTAL SCIENCE
## 20   3600                                                           BIOLOGY
## 21   3601                                              BIOCHEMICAL SCIENCES
## 22   3602                                                            BOTANY
## 23   3603                                                 MOLECULAR BIOLOGY
## 24   3604                                                           ECOLOGY
## 25   3605                                                          GENETICS
## 26   3606                                                      MICROBIOLOGY
## 27   3607                                                      PHARMACOLOGY
## 28   3608                                                        PHYSIOLOGY
## 29   3609                                                           ZOOLOGY
## 30   3611                                                      NEUROSCIENCE
## 31   3699                                             MISCELLANEOUS BIOLOGY
## 32   4006                               COGNITIVE SCIENCE AND BIOPSYCHOLOGY
## 33   6200                                                  GENERAL BUSINESS
## 34   6201                                                        ACCOUNTING
## 35   6202                                                 ACTUARIAL SCIENCE
## 36   6203                            BUSINESS MANAGEMENT AND ADMINISTRATION
## 37   6204                               OPERATIONS LOGISTICS AND E-COMMERCE
## 38   6205                                                BUSINESS ECONOMICS
## 39   6206                                  MARKETING AND MARKETING RESEARCH
## 40   6207                                                           FINANCE
## 41   6209                          HUMAN RESOURCES AND PERSONNEL MANAGEMENT
## 42   6210                                            INTERNATIONAL BUSINESS
## 43   6211                                            HOSPITALITY MANAGEMENT
## 44   6212                     MANAGEMENT INFORMATION SYSTEMS AND STATISTICS
## 45   6299                   MISCELLANEOUS BUSINESS & MEDICAL ADMINISTRATION
## 46   1901                                                    COMMUNICATIONS
## 47   1902                                                        JOURNALISM
## 48   1903                                                        MASS MEDIA
## 49   1904                                  ADVERTISING AND PUBLIC RELATIONS
## 50   2001                                        COMMUNICATION TECHNOLOGIES
## 51   2100                                  COMPUTER AND INFORMATION SYSTEMS
## 52   2101                          COMPUTER PROGRAMMING AND DATA PROCESSING
## 53   2102                                                  COMPUTER SCIENCE
## 54   2105                                              INFORMATION SCIENCES
## 55   2106                   COMPUTER ADMINISTRATION MANAGEMENT AND SECURITY
## 56   2107                        COMPUTER NETWORKING AND TELECOMMUNICATIONS
## 57   3700                                                       MATHEMATICS
## 58   3701                                               APPLIED MATHEMATICS
## 59   3702                                   STATISTICS AND DECISION SCIENCE
## 60   4005                                  MATHEMATICS AND COMPUTER SCIENCE
## 61   2300                                                 GENERAL EDUCATION
## 62   2301                        EDUCATIONAL ADMINISTRATION AND SUPERVISION
## 63   2303                                         SCHOOL STUDENT COUNSELING
## 64   2304                                              ELEMENTARY EDUCATION
## 65   2305                                     MATHEMATICS TEACHER EDUCATION
## 66   2306                            PHYSICAL AND HEALTH EDUCATION TEACHING
## 67   2307                                         EARLY CHILDHOOD EDUCATION
## 68   2308                            SCIENCE AND COMPUTER TEACHER EDUCATION
## 69   2309                                       SECONDARY TEACHER EDUCATION
## 70   2310                                           SPECIAL NEEDS EDUCATION
## 71   2311                       SOCIAL SCIENCE OR HISTORY TEACHER EDUCATION
## 72   2312                                TEACHER EDUCATION: MULTIPLE LEVELS
## 73   2313                                      LANGUAGE AND DRAMA EDUCATION
## 74   2314                                           ART AND MUSIC EDUCATION
## 75   2399                                           MISCELLANEOUS EDUCATION
## 76   3501                                                   LIBRARY SCIENCE
## 77   1401                                                      ARCHITECTURE
## 78   2400                                               GENERAL ENGINEERING
## 79   2401                                             AEROSPACE ENGINEERING
## 80   2402                                            BIOLOGICAL ENGINEERING
## 81   2403                                         ARCHITECTURAL ENGINEERING
## 82   2404                                            BIOMEDICAL ENGINEERING
## 83   2405                                              CHEMICAL ENGINEERING
## 84   2406                                                 CIVIL ENGINEERING
## 85   2407                                              COMPUTER ENGINEERING
## 86   2408                                            ELECTRICAL ENGINEERING
## 87   2409                         ENGINEERING MECHANICS PHYSICS AND SCIENCE
## 88   2410                                         ENVIRONMENTAL ENGINEERING
## 89   2411                            GEOLOGICAL AND GEOPHYSICAL ENGINEERING
## 90   2412                          INDUSTRIAL AND MANUFACTURING ENGINEERING
## 91   2413                       MATERIALS ENGINEERING AND MATERIALS SCIENCE
## 92   2414                                            MECHANICAL ENGINEERING
## 93   2415                                         METALLURGICAL ENGINEERING
## 94   2416                                    MINING AND MINERAL ENGINEERING
## 95   2417                         NAVAL ARCHITECTURE AND MARINE ENGINEERING
## 96   2418                                               NUCLEAR ENGINEERING
## 97   2419                                             PETROLEUM ENGINEERING
## 98   2499                                         MISCELLANEOUS ENGINEERING
## 99   2500                                          ENGINEERING TECHNOLOGIES
## 100  2501                             ENGINEERING AND INDUSTRIAL MANAGEMENT
## 101  2502                                 ELECTRICAL ENGINEERING TECHNOLOGY
## 102  2503                                INDUSTRIAL PRODUCTION TECHNOLOGIES
## 103  2504                       MECHANICAL ENGINEERING RELATED TECHNOLOGIES
## 104  2599                            MISCELLANEOUS ENGINEERING TECHNOLOGIES
## 105  5008                                                 MATERIALS SCIENCE
## 106  4002                                                NUTRITION SCIENCES
## 107  6100                               GENERAL MEDICAL AND HEALTH SERVICES
## 108  6102                     COMMUNICATION DISORDERS SCIENCES AND SERVICES
## 109  6103                        HEALTH AND MEDICAL ADMINISTRATIVE SERVICES
## 110  6104                                        MEDICAL ASSISTING SERVICES
## 111  6105                                  MEDICAL TECHNOLOGIES TECHNICIANS
## 112  6106                           HEALTH AND MEDICAL PREPARATORY PROGRAMS
## 113  6107                                                           NURSING
## 114  6108               PHARMACY PHARMACEUTICAL SCIENCES AND ADMINISTRATION
## 115  6109                                     TREATMENT THERAPY PROFESSIONS
## 116  6110                                       COMMUNITY AND PUBLIC HEALTH
## 117  6199                          MISCELLANEOUS HEALTH MEDICAL PROFESSIONS
## 118  1501                              AREA ETHNIC AND CIVILIZATION STUDIES
## 119  2601               LINGUISTICS AND COMPARATIVE LANGUAGE AND LITERATURE
## 120  2602     FRENCH GERMAN LATIN AND OTHER COMMON FOREIGN LANGUAGE STUDIES
## 121  2603                                           OTHER FOREIGN LANGUAGES
## 122  3301                                   ENGLISH LANGUAGE AND LITERATURE
## 123  3302                                          COMPOSITION AND RHETORIC
## 124  3401                                                      LIBERAL ARTS
## 125  3402                                                        HUMANITIES
## 126  4001                           INTERCULTURAL AND INTERNATIONAL STUDIES
## 127  4801                                  PHILOSOPHY AND RELIGIOUS STUDIES
## 128  4901                                  THEOLOGY AND RELIGIOUS VOCATIONS
## 129  5502                                       ANTHROPOLOGY AND ARCHEOLOGY
## 130  6006                                         ART HISTORY AND CRITICISM
## 131  6402                                                           HISTORY
## 132  6403                                             UNITED STATES HISTORY
## 133  2201                            COSMETOLOGY SERVICES AND CULINARY ARTS
## 134  2901                                      FAMILY AND CONSUMER SCIENCES
## 135  3801                                             MILITARY TECHNOLOGIES
## 136  4101                     PHYSICAL FITNESS PARKS RECREATION AND LEISURE
## 137  5601                                             CONSTRUCTION SERVICES
## 138  5701 ELECTRICAL, MECHANICAL, AND PRECISION TECHNOLOGIES AND PRODUCTION
## 139  5901                          TRANSPORTATION SCIENCES AND TECHNOLOGIES
## 140  4000                                   MULTI/INTERDISCIPLINARY STUDIES
## 141  3201                                                   COURT REPORTING
## 142  3202                                         PRE-LAW AND LEGAL STUDIES
## 143  5301                              CRIMINAL JUSTICE AND FIRE PROTECTION
## 144  5401                                             PUBLIC ADMINISTRATION
## 145  5402                                                     PUBLIC POLICY
## 146 bbbb                                  N/A (less than bachelor's degree)
## 147  5000                                                 PHYSICAL SCIENCES
## 148  5001                                        ASTRONOMY AND ASTROPHYSICS
## 149  5002                              ATMOSPHERIC SCIENCES AND METEOROLOGY
## 150  5003                                                         CHEMISTRY
## 151  5004                                         GEOLOGY AND EARTH SCIENCE
## 152  5005                                                       GEOSCIENCES
## 153  5006                                                      OCEANOGRAPHY
## 154  5007                                                           PHYSICS
## 155  5098                             MULTI-DISCIPLINARY OR GENERAL SCIENCE
## 156  5102        NUCLEAR, INDUSTRIAL RADIOLOGY, AND BIOLOGICAL TECHNOLOGIES
## 157  5200                                                        PSYCHOLOGY
## 158  5201                                            EDUCATIONAL PSYCHOLOGY
## 159  5202                                               CLINICAL PSYCHOLOGY
## 160  5203                                             COUNSELING PSYCHOLOGY
## 161  5205                          INDUSTRIAL AND ORGANIZATIONAL PSYCHOLOGY
## 162  5206                                                 SOCIAL PSYCHOLOGY
## 163  5299                                          MISCELLANEOUS PSYCHOLOGY
## 164  5403                         HUMAN SERVICES AND COMMUNITY ORGANIZATION
## 165  5404                                                       SOCIAL WORK
## 166  4007                                 INTERDISCIPLINARY SOCIAL SCIENCES
## 167  5500                                           GENERAL SOCIAL SCIENCES
## 168  5501                                                         ECONOMICS
## 169  5503                                                       CRIMINOLOGY
## 170  5504                                                         GEOGRAPHY
## 171  5505                                           INTERNATIONAL RELATIONS
## 172  5506                                  POLITICAL SCIENCE AND GOVERNMENT
## 173  5507                                                         SOCIOLOGY
## 174  5599                                     MISCELLANEOUS SOCIAL SCIENCES
##                          Major_Category
## 1       Agriculture & Natural Resources
## 2       Agriculture & Natural Resources
## 3       Agriculture & Natural Resources
## 4       Agriculture & Natural Resources
## 5       Agriculture & Natural Resources
## 6       Agriculture & Natural Resources
## 7       Agriculture & Natural Resources
## 8       Agriculture & Natural Resources
## 9       Agriculture & Natural Resources
## 10      Agriculture & Natural Resources
## 11                                 Arts
## 12                                 Arts
## 13                                 Arts
## 14                                 Arts
## 15                                 Arts
## 16                                 Arts
## 17                                 Arts
## 18                                 Arts
## 19               Biology & Life Science
## 20               Biology & Life Science
## 21               Biology & Life Science
## 22               Biology & Life Science
## 23               Biology & Life Science
## 24               Biology & Life Science
## 25               Biology & Life Science
## 26               Biology & Life Science
## 27               Biology & Life Science
## 28               Biology & Life Science
## 29               Biology & Life Science
## 30               Biology & Life Science
## 31               Biology & Life Science
## 32               Biology & Life Science
## 33                             Business
## 34                             Business
## 35                             Business
## 36                             Business
## 37                             Business
## 38                             Business
## 39                             Business
## 40                             Business
## 41                             Business
## 42                             Business
## 43                             Business
## 44                             Business
## 45                             Business
## 46          Communications & Journalism
## 47          Communications & Journalism
## 48          Communications & Journalism
## 49          Communications & Journalism
## 50              Computers & Mathematics
## 51              Computers & Mathematics
## 52              Computers & Mathematics
## 53              Computers & Mathematics
## 54              Computers & Mathematics
## 55              Computers & Mathematics
## 56              Computers & Mathematics
## 57              Computers & Mathematics
## 58              Computers & Mathematics
## 59              Computers & Mathematics
## 60              Computers & Mathematics
## 61                            Education
## 62                            Education
## 63                            Education
## 64                            Education
## 65                            Education
## 66                            Education
## 67                            Education
## 68                            Education
## 69                            Education
## 70                            Education
## 71                            Education
## 72                            Education
## 73                            Education
## 74                            Education
## 75                            Education
## 76                            Education
## 77                          Engineering
## 78                          Engineering
## 79                          Engineering
## 80                          Engineering
## 81                          Engineering
## 82                          Engineering
## 83                          Engineering
## 84                          Engineering
## 85                          Engineering
## 86                          Engineering
## 87                          Engineering
## 88                          Engineering
## 89                          Engineering
## 90                          Engineering
## 91                          Engineering
## 92                          Engineering
## 93                          Engineering
## 94                          Engineering
## 95                          Engineering
## 96                          Engineering
## 97                          Engineering
## 98                          Engineering
## 99                          Engineering
## 100                         Engineering
## 101                         Engineering
## 102                         Engineering
## 103                         Engineering
## 104                         Engineering
## 105                         Engineering
## 106                              Health
## 107                              Health
## 108                              Health
## 109                              Health
## 110                              Health
## 111                              Health
## 112                              Health
## 113                              Health
## 114                              Health
## 115                              Health
## 116                              Health
## 117                              Health
## 118           Humanities & Liberal Arts
## 119           Humanities & Liberal Arts
## 120           Humanities & Liberal Arts
## 121           Humanities & Liberal Arts
## 122           Humanities & Liberal Arts
## 123           Humanities & Liberal Arts
## 124           Humanities & Liberal Arts
## 125           Humanities & Liberal Arts
## 126           Humanities & Liberal Arts
## 127           Humanities & Liberal Arts
## 128           Humanities & Liberal Arts
## 129           Humanities & Liberal Arts
## 130           Humanities & Liberal Arts
## 131           Humanities & Liberal Arts
## 132           Humanities & Liberal Arts
## 133 Industrial Arts & Consumer Services
## 134 Industrial Arts & Consumer Services
## 135 Industrial Arts & Consumer Services
## 136 Industrial Arts & Consumer Services
## 137 Industrial Arts & Consumer Services
## 138 Industrial Arts & Consumer Services
## 139 Industrial Arts & Consumer Services
## 140                   Interdisciplinary
## 141                 Law & Public Policy
## 142                 Law & Public Policy
## 143                 Law & Public Policy
## 144                 Law & Public Policy
## 145                 Law & Public Policy
## 146                                <NA>
## 147                   Physical Sciences
## 148                   Physical Sciences
## 149                   Physical Sciences
## 150                   Physical Sciences
## 151                   Physical Sciences
## 152                   Physical Sciences
## 153                   Physical Sciences
## 154                   Physical Sciences
## 155                   Physical Sciences
## 156                   Physical Sciences
## 157            Psychology & Social Work
## 158            Psychology & Social Work
## 159            Psychology & Social Work
## 160            Psychology & Social Work
## 161            Psychology & Social Work
## 162            Psychology & Social Work
## 163            Psychology & Social Work
## 164            Psychology & Social Work
## 165            Psychology & Social Work
## 166                      Social Science
## 167                      Social Science
## 168                      Social Science
## 169                      Social Science
## 170                      Social Science
## 171                      Social Science
## 172                      Social Science
## 173                      Social Science
## 174                      Social Science
#file.rename("~/R/DATA607_Assg3/majors-list.csv", "a173College_majors.csv") # renaming the file dataframe 
# This access also work as call is made in the working directory to pull up file name = major-list.csv ....majors_list <- read_csv("~/R/DATA607_Assg3/majors-list.csv")
#a173College_majors

Data manipulation

Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result

My_173Majors == "DATA" # looking at the key words at the whole table , not efficient
##        FOD1P Major Major_Category
##   [1,] FALSE FALSE          FALSE
##   [2,] FALSE FALSE          FALSE
##   [3,] FALSE FALSE          FALSE
##   [4,] FALSE FALSE          FALSE
##   [5,] FALSE FALSE          FALSE
##   [6,] FALSE FALSE          FALSE
##   [7,] FALSE FALSE          FALSE
##   [8,] FALSE FALSE          FALSE
##   [9,] FALSE FALSE          FALSE
##  [10,] FALSE FALSE          FALSE
##  [11,] FALSE FALSE          FALSE
##  [12,] FALSE FALSE          FALSE
##  [13,] FALSE FALSE          FALSE
##  [14,] FALSE FALSE          FALSE
##  [15,] FALSE FALSE          FALSE
##  [16,] FALSE FALSE          FALSE
##  [17,] FALSE FALSE          FALSE
##  [18,] FALSE FALSE          FALSE
##  [19,] FALSE FALSE          FALSE
##  [20,] FALSE FALSE          FALSE
##  [21,] FALSE FALSE          FALSE
##  [22,] FALSE FALSE          FALSE
##  [23,] FALSE FALSE          FALSE
##  [24,] FALSE FALSE          FALSE
##  [25,] FALSE FALSE          FALSE
##  [26,] FALSE FALSE          FALSE
##  [27,] FALSE FALSE          FALSE
##  [28,] FALSE FALSE          FALSE
##  [29,] FALSE FALSE          FALSE
##  [30,] FALSE FALSE          FALSE
##  [31,] FALSE FALSE          FALSE
##  [32,] FALSE FALSE          FALSE
##  [33,] FALSE FALSE          FALSE
##  [34,] FALSE FALSE          FALSE
##  [35,] FALSE FALSE          FALSE
##  [36,] FALSE FALSE          FALSE
##  [37,] FALSE FALSE          FALSE
##  [38,] FALSE FALSE          FALSE
##  [39,] FALSE FALSE          FALSE
##  [40,] FALSE FALSE          FALSE
##  [41,] FALSE FALSE          FALSE
##  [42,] FALSE FALSE          FALSE
##  [43,] FALSE FALSE          FALSE
##  [44,] FALSE FALSE          FALSE
##  [45,] FALSE FALSE          FALSE
##  [46,] FALSE FALSE          FALSE
##  [47,] FALSE FALSE          FALSE
##  [48,] FALSE FALSE          FALSE
##  [49,] FALSE FALSE          FALSE
##  [50,] FALSE FALSE          FALSE
##  [51,] FALSE FALSE          FALSE
##  [52,] FALSE FALSE          FALSE
##  [53,] FALSE FALSE          FALSE
##  [54,] FALSE FALSE          FALSE
##  [55,] FALSE FALSE          FALSE
##  [56,] FALSE FALSE          FALSE
##  [57,] FALSE FALSE          FALSE
##  [58,] FALSE FALSE          FALSE
##  [59,] FALSE FALSE          FALSE
##  [60,] FALSE FALSE          FALSE
##  [61,] FALSE FALSE          FALSE
##  [62,] FALSE FALSE          FALSE
##  [63,] FALSE FALSE          FALSE
##  [64,] FALSE FALSE          FALSE
##  [65,] FALSE FALSE          FALSE
##  [66,] FALSE FALSE          FALSE
##  [67,] FALSE FALSE          FALSE
##  [68,] FALSE FALSE          FALSE
##  [69,] FALSE FALSE          FALSE
##  [70,] FALSE FALSE          FALSE
##  [71,] FALSE FALSE          FALSE
##  [72,] FALSE FALSE          FALSE
##  [73,] FALSE FALSE          FALSE
##  [74,] FALSE FALSE          FALSE
##  [75,] FALSE FALSE          FALSE
##  [76,] FALSE FALSE          FALSE
##  [77,] FALSE FALSE          FALSE
##  [78,] FALSE FALSE          FALSE
##  [79,] FALSE FALSE          FALSE
##  [80,] FALSE FALSE          FALSE
##  [81,] FALSE FALSE          FALSE
##  [82,] FALSE FALSE          FALSE
##  [83,] FALSE FALSE          FALSE
##  [84,] FALSE FALSE          FALSE
##  [85,] FALSE FALSE          FALSE
##  [86,] FALSE FALSE          FALSE
##  [87,] FALSE FALSE          FALSE
##  [88,] FALSE FALSE          FALSE
##  [89,] FALSE FALSE          FALSE
##  [90,] FALSE FALSE          FALSE
##  [91,] FALSE FALSE          FALSE
##  [92,] FALSE FALSE          FALSE
##  [93,] FALSE FALSE          FALSE
##  [94,] FALSE FALSE          FALSE
##  [95,] FALSE FALSE          FALSE
##  [96,] FALSE FALSE          FALSE
##  [97,] FALSE FALSE          FALSE
##  [98,] FALSE FALSE          FALSE
##  [99,] FALSE FALSE          FALSE
## [100,] FALSE FALSE          FALSE
## [101,] FALSE FALSE          FALSE
## [102,] FALSE FALSE          FALSE
## [103,] FALSE FALSE          FALSE
## [104,] FALSE FALSE          FALSE
## [105,] FALSE FALSE          FALSE
## [106,] FALSE FALSE          FALSE
## [107,] FALSE FALSE          FALSE
## [108,] FALSE FALSE          FALSE
## [109,] FALSE FALSE          FALSE
## [110,] FALSE FALSE          FALSE
## [111,] FALSE FALSE          FALSE
## [112,] FALSE FALSE          FALSE
## [113,] FALSE FALSE          FALSE
## [114,] FALSE FALSE          FALSE
## [115,] FALSE FALSE          FALSE
## [116,] FALSE FALSE          FALSE
## [117,] FALSE FALSE          FALSE
## [118,] FALSE FALSE          FALSE
## [119,] FALSE FALSE          FALSE
## [120,] FALSE FALSE          FALSE
## [121,] FALSE FALSE          FALSE
## [122,] FALSE FALSE          FALSE
## [123,] FALSE FALSE          FALSE
## [124,] FALSE FALSE          FALSE
## [125,] FALSE FALSE          FALSE
## [126,] FALSE FALSE          FALSE
## [127,] FALSE FALSE          FALSE
## [128,] FALSE FALSE          FALSE
## [129,] FALSE FALSE          FALSE
## [130,] FALSE FALSE          FALSE
## [131,] FALSE FALSE          FALSE
## [132,] FALSE FALSE          FALSE
## [133,] FALSE FALSE          FALSE
## [134,] FALSE FALSE          FALSE
## [135,] FALSE FALSE          FALSE
## [136,] FALSE FALSE          FALSE
## [137,] FALSE FALSE          FALSE
## [138,] FALSE FALSE          FALSE
## [139,] FALSE FALSE          FALSE
## [140,] FALSE FALSE          FALSE
## [141,] FALSE FALSE          FALSE
## [142,] FALSE FALSE          FALSE
## [143,] FALSE FALSE          FALSE
## [144,] FALSE FALSE          FALSE
## [145,] FALSE FALSE          FALSE
## [146,] FALSE FALSE             NA
## [147,] FALSE FALSE          FALSE
## [148,] FALSE FALSE          FALSE
## [149,] FALSE FALSE          FALSE
## [150,] FALSE FALSE          FALSE
## [151,] FALSE FALSE          FALSE
## [152,] FALSE FALSE          FALSE
## [153,] FALSE FALSE          FALSE
## [154,] FALSE FALSE          FALSE
## [155,] FALSE FALSE          FALSE
## [156,] FALSE FALSE          FALSE
## [157,] FALSE FALSE          FALSE
## [158,] FALSE FALSE          FALSE
## [159,] FALSE FALSE          FALSE
## [160,] FALSE FALSE          FALSE
## [161,] FALSE FALSE          FALSE
## [162,] FALSE FALSE          FALSE
## [163,] FALSE FALSE          FALSE
## [164,] FALSE FALSE          FALSE
## [165,] FALSE FALSE          FALSE
## [166,] FALSE FALSE          FALSE
## [167,] FALSE FALSE          FALSE
## [168,] FALSE FALSE          FALSE
## [169,] FALSE FALSE          FALSE
## [170,] FALSE FALSE          FALSE
## [171,] FALSE FALSE          FALSE
## [172,] FALSE FALSE          FALSE
## [173,] FALSE FALSE          FALSE
## [174,] FALSE FALSE          FALSE
My_173Majors$Major == "DATA" # looking at the key words in the column = Major, not efficient
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE
grep(pattern = 'DATA', My_173Majors$Major, value = TRUE, ignore.case = TRUE)  # this one is good 
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
# Answer = COMPUTER PROGRAMMING AND DATA PROCESSING

#select(a173College_majors) where a173College_majors$Major == "COMPUTER PROGRAMMING AND DATA PROCESSING  Not sure about this search
result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
##   FOD1P                                    Major          Major_Category
## 1  2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics

Data manipulation

Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result

grep(pattern = 'STATISTICS', My_173Majors$Major, value = TRUE, ignore.case = TRUE) # looking at the second keyword, but there should be a way to find both of them at once...maybe using logical operation | but this is only possible with numeric type
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
# Answer = "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS" , "STATISTICS AND DECISION SCIENCE"

#select(a173College_majors) where a173College_majors$Major == "COMPUTER PROGRAMMING AND DATA PROCESSING  Not sure about this search
result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
##   FOD1P                                    Major          Major_Category
## 1  2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics

Data manipulation

Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result

result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
##   FOD1P                                    Major          Major_Category
## 1  2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics

Data manipulation

Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result

result1 <- filter(My_173Majors, Major == "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS")
result1 # display the major and major category
##   FOD1P                                         Major Major_Category
## 1  6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS       Business

Data manipulation

Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result

result1 <- filter(My_173Majors, Major == "STATISTICS AND DECISION SCIENCE")
result1 # display the major and major category
##   FOD1P                           Major          Major_Category
## 1  3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics

Data transformation

[1] “bell pepper” “bilberry” “blackberry” “blood orange”

[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”

[9] “elderberry” “lime” “lychee” “mulberry”

[13] “olive” “salal berry”

Let’s transform the above data into a new format like the one below

c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)

# i need to make it look like a list
# assign the vector to new name called viegies

 vegies = list ('[1] "bell pepper"  "bilberry"     "blackberry"   "blood orange"

[5] "blueberry"    "cantaloupe"   "chili pepper" "cloudberry"  

[9] "elderberry"   "lime"         "lychee"       "mulberry"    

[13] "olive"        "salal berry"')

vegies
## [[1]]
## [1] "[1] \"bell pepper\"  \"bilberry\"     \"blackberry\"   \"blood orange\"\n\n[5] \"blueberry\"    \"cantaloupe\"   \"chili pepper\" \"cloudberry\"  \n\n[9] \"elderberry\"   \"lime\"         \"lychee\"       \"mulberry\"    \n\n[13] \"olive\"        \"salal berry\""
# now, we need to remove backslash
pattern <-"[A-Za-z]+.?[A-Za-z]+"
vegies <- str_extract_all(vegies, pattern)
vegies
## [[1]]
##  [1] "bell pepper"  "bilberry"     "blackberry"   "blood orange" "blueberry"   
##  [6] "cantaloupe"   "chili pepper" "cloudberry"   "elderberry"   "lime"        
## [11] "lychee"       "mulberry"     "olive"        "salal berry"
# now the backlash are gone, let make one vector by join string
vegies1 <- str_c(vegies, sep = "", collapse =  NULL)
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
vegies1
## [1] "c(\"bell pepper\", \"bilberry\", \"blackberry\", \"blood orange\", \"blueberry\", \"cantaloupe\", \"chili pepper\", \"cloudberry\", \"elderberry\", \"lime\", \"lychee\", \"mulberry\", \"olive\", \"salal berry\")"
vegies2 <- writeLines(vegies1)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")

#3 Describe, in words, what these expressions will match:

(.)\1\1                ==   same character 3 times
"(.)(.)\\2\\1"         ==   two same charaters that repeat twice
(..)\1                 ==   two character that repeat twice
"(.).\\1.\\1"          ==   a character that repeats 3 times with a character in between
"(.)(.)(.).*\\3\\2\\1" ==   a character that repeats 3 time or more with some order
# my_strigh <- c("lollolol","binbin", "dada", "babadabadw","ggoo", "fff") # testing strings
str_view (c("lollolol","binibin", "dada", "babadabadw","ggoo", "fff"), "(.)(.)(.).*\\3\\2\\1")

#4 Construct regular expressions to match words that:

Start and end with the same character.     === , like ada, lollolol, nibin......(.).*\\1$, 
Contain a repeated pair of letters (e.g. "church" contains "ch" repeated twice.)    === (..).*\\1"
Contain one letter repeated in at least three places (e.g. "eleven" contains three "e"s.)
# my_strigh <- c("lollolol","binbin", "dada", "babadabadw","ggoo", "fff") # testing strings
str_view (c("lollolol","binibin", "dada", "babadabadw","ggoo", "fff"), "(.).*\\1$")