library(tidyverse) #loading all library needed for this assignment
## -- Attaching packages -------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(dplyr)
library(DBI)
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(rstudioapi)
library(RJDBC)
## Loading required package: rJava
library(odbc)
library(RSQLite)
library(readr)
library(RCurl)
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:rJava':
##
## clone
## The following object is masked from 'package:tidyr':
##
## complete
library(stringr)
I can load the dataframe from my local drive. I can make a call to githun to pull up this dataframe
For some reason at the last munite, when knit I kept getting error ….Quitting from lines 37-54 (DATA607_Assg3_String_Manip.Rmd) Error in eval(expr, envir, enclos) : ….So , I commented out the working directory to use github bridge
#setwd("~/R/DATA607_Assg3") #set the working directory to access dataframe, major-list
#a173College_majors
#View(a173College_majors)
# looking at what the data look like
#View(a173College_majors) #another way of looking at the data
#~/R/DATA607_Assg3/a173College_majors
# This access also work as call is made in the working directory to pull up file name = major-list.csv ....majors_list <- read_csv("~/R/DATA607_Assg3/majors-list.csv")
# This access from github to Rstudio also work
My_173Majors <- read.csv("https://raw.githubusercontent.com/asmozo24/data/master/college-majors/majors-list.csv",header=TRUE,stringsAsFactors=FALSE)
My_173Majors # this is to check it is the actual dataframe I am looking for ....this works too.
## FOD1P Major
## 1 1100 GENERAL AGRICULTURE
## 2 1101 AGRICULTURE PRODUCTION AND MANAGEMENT
## 3 1102 AGRICULTURAL ECONOMICS
## 4 1103 ANIMAL SCIENCES
## 5 1104 FOOD SCIENCE
## 6 1105 PLANT SCIENCE AND AGRONOMY
## 7 1106 SOIL SCIENCE
## 8 1199 MISCELLANEOUS AGRICULTURE
## 9 1302 FORESTRY
## 10 1303 NATURAL RESOURCES MANAGEMENT
## 11 6000 FINE ARTS
## 12 6001 DRAMA AND THEATER ARTS
## 13 6002 MUSIC
## 14 6003 VISUAL AND PERFORMING ARTS
## 15 6004 COMMERCIAL ART AND GRAPHIC DESIGN
## 16 6005 FILM VIDEO AND PHOTOGRAPHIC ARTS
## 17 6007 STUDIO ARTS
## 18 6099 MISCELLANEOUS FINE ARTS
## 19 1301 ENVIRONMENTAL SCIENCE
## 20 3600 BIOLOGY
## 21 3601 BIOCHEMICAL SCIENCES
## 22 3602 BOTANY
## 23 3603 MOLECULAR BIOLOGY
## 24 3604 ECOLOGY
## 25 3605 GENETICS
## 26 3606 MICROBIOLOGY
## 27 3607 PHARMACOLOGY
## 28 3608 PHYSIOLOGY
## 29 3609 ZOOLOGY
## 30 3611 NEUROSCIENCE
## 31 3699 MISCELLANEOUS BIOLOGY
## 32 4006 COGNITIVE SCIENCE AND BIOPSYCHOLOGY
## 33 6200 GENERAL BUSINESS
## 34 6201 ACCOUNTING
## 35 6202 ACTUARIAL SCIENCE
## 36 6203 BUSINESS MANAGEMENT AND ADMINISTRATION
## 37 6204 OPERATIONS LOGISTICS AND E-COMMERCE
## 38 6205 BUSINESS ECONOMICS
## 39 6206 MARKETING AND MARKETING RESEARCH
## 40 6207 FINANCE
## 41 6209 HUMAN RESOURCES AND PERSONNEL MANAGEMENT
## 42 6210 INTERNATIONAL BUSINESS
## 43 6211 HOSPITALITY MANAGEMENT
## 44 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS
## 45 6299 MISCELLANEOUS BUSINESS & MEDICAL ADMINISTRATION
## 46 1901 COMMUNICATIONS
## 47 1902 JOURNALISM
## 48 1903 MASS MEDIA
## 49 1904 ADVERTISING AND PUBLIC RELATIONS
## 50 2001 COMMUNICATION TECHNOLOGIES
## 51 2100 COMPUTER AND INFORMATION SYSTEMS
## 52 2101 COMPUTER PROGRAMMING AND DATA PROCESSING
## 53 2102 COMPUTER SCIENCE
## 54 2105 INFORMATION SCIENCES
## 55 2106 COMPUTER ADMINISTRATION MANAGEMENT AND SECURITY
## 56 2107 COMPUTER NETWORKING AND TELECOMMUNICATIONS
## 57 3700 MATHEMATICS
## 58 3701 APPLIED MATHEMATICS
## 59 3702 STATISTICS AND DECISION SCIENCE
## 60 4005 MATHEMATICS AND COMPUTER SCIENCE
## 61 2300 GENERAL EDUCATION
## 62 2301 EDUCATIONAL ADMINISTRATION AND SUPERVISION
## 63 2303 SCHOOL STUDENT COUNSELING
## 64 2304 ELEMENTARY EDUCATION
## 65 2305 MATHEMATICS TEACHER EDUCATION
## 66 2306 PHYSICAL AND HEALTH EDUCATION TEACHING
## 67 2307 EARLY CHILDHOOD EDUCATION
## 68 2308 SCIENCE AND COMPUTER TEACHER EDUCATION
## 69 2309 SECONDARY TEACHER EDUCATION
## 70 2310 SPECIAL NEEDS EDUCATION
## 71 2311 SOCIAL SCIENCE OR HISTORY TEACHER EDUCATION
## 72 2312 TEACHER EDUCATION: MULTIPLE LEVELS
## 73 2313 LANGUAGE AND DRAMA EDUCATION
## 74 2314 ART AND MUSIC EDUCATION
## 75 2399 MISCELLANEOUS EDUCATION
## 76 3501 LIBRARY SCIENCE
## 77 1401 ARCHITECTURE
## 78 2400 GENERAL ENGINEERING
## 79 2401 AEROSPACE ENGINEERING
## 80 2402 BIOLOGICAL ENGINEERING
## 81 2403 ARCHITECTURAL ENGINEERING
## 82 2404 BIOMEDICAL ENGINEERING
## 83 2405 CHEMICAL ENGINEERING
## 84 2406 CIVIL ENGINEERING
## 85 2407 COMPUTER ENGINEERING
## 86 2408 ELECTRICAL ENGINEERING
## 87 2409 ENGINEERING MECHANICS PHYSICS AND SCIENCE
## 88 2410 ENVIRONMENTAL ENGINEERING
## 89 2411 GEOLOGICAL AND GEOPHYSICAL ENGINEERING
## 90 2412 INDUSTRIAL AND MANUFACTURING ENGINEERING
## 91 2413 MATERIALS ENGINEERING AND MATERIALS SCIENCE
## 92 2414 MECHANICAL ENGINEERING
## 93 2415 METALLURGICAL ENGINEERING
## 94 2416 MINING AND MINERAL ENGINEERING
## 95 2417 NAVAL ARCHITECTURE AND MARINE ENGINEERING
## 96 2418 NUCLEAR ENGINEERING
## 97 2419 PETROLEUM ENGINEERING
## 98 2499 MISCELLANEOUS ENGINEERING
## 99 2500 ENGINEERING TECHNOLOGIES
## 100 2501 ENGINEERING AND INDUSTRIAL MANAGEMENT
## 101 2502 ELECTRICAL ENGINEERING TECHNOLOGY
## 102 2503 INDUSTRIAL PRODUCTION TECHNOLOGIES
## 103 2504 MECHANICAL ENGINEERING RELATED TECHNOLOGIES
## 104 2599 MISCELLANEOUS ENGINEERING TECHNOLOGIES
## 105 5008 MATERIALS SCIENCE
## 106 4002 NUTRITION SCIENCES
## 107 6100 GENERAL MEDICAL AND HEALTH SERVICES
## 108 6102 COMMUNICATION DISORDERS SCIENCES AND SERVICES
## 109 6103 HEALTH AND MEDICAL ADMINISTRATIVE SERVICES
## 110 6104 MEDICAL ASSISTING SERVICES
## 111 6105 MEDICAL TECHNOLOGIES TECHNICIANS
## 112 6106 HEALTH AND MEDICAL PREPARATORY PROGRAMS
## 113 6107 NURSING
## 114 6108 PHARMACY PHARMACEUTICAL SCIENCES AND ADMINISTRATION
## 115 6109 TREATMENT THERAPY PROFESSIONS
## 116 6110 COMMUNITY AND PUBLIC HEALTH
## 117 6199 MISCELLANEOUS HEALTH MEDICAL PROFESSIONS
## 118 1501 AREA ETHNIC AND CIVILIZATION STUDIES
## 119 2601 LINGUISTICS AND COMPARATIVE LANGUAGE AND LITERATURE
## 120 2602 FRENCH GERMAN LATIN AND OTHER COMMON FOREIGN LANGUAGE STUDIES
## 121 2603 OTHER FOREIGN LANGUAGES
## 122 3301 ENGLISH LANGUAGE AND LITERATURE
## 123 3302 COMPOSITION AND RHETORIC
## 124 3401 LIBERAL ARTS
## 125 3402 HUMANITIES
## 126 4001 INTERCULTURAL AND INTERNATIONAL STUDIES
## 127 4801 PHILOSOPHY AND RELIGIOUS STUDIES
## 128 4901 THEOLOGY AND RELIGIOUS VOCATIONS
## 129 5502 ANTHROPOLOGY AND ARCHEOLOGY
## 130 6006 ART HISTORY AND CRITICISM
## 131 6402 HISTORY
## 132 6403 UNITED STATES HISTORY
## 133 2201 COSMETOLOGY SERVICES AND CULINARY ARTS
## 134 2901 FAMILY AND CONSUMER SCIENCES
## 135 3801 MILITARY TECHNOLOGIES
## 136 4101 PHYSICAL FITNESS PARKS RECREATION AND LEISURE
## 137 5601 CONSTRUCTION SERVICES
## 138 5701 ELECTRICAL, MECHANICAL, AND PRECISION TECHNOLOGIES AND PRODUCTION
## 139 5901 TRANSPORTATION SCIENCES AND TECHNOLOGIES
## 140 4000 MULTI/INTERDISCIPLINARY STUDIES
## 141 3201 COURT REPORTING
## 142 3202 PRE-LAW AND LEGAL STUDIES
## 143 5301 CRIMINAL JUSTICE AND FIRE PROTECTION
## 144 5401 PUBLIC ADMINISTRATION
## 145 5402 PUBLIC POLICY
## 146 bbbb N/A (less than bachelor's degree)
## 147 5000 PHYSICAL SCIENCES
## 148 5001 ASTRONOMY AND ASTROPHYSICS
## 149 5002 ATMOSPHERIC SCIENCES AND METEOROLOGY
## 150 5003 CHEMISTRY
## 151 5004 GEOLOGY AND EARTH SCIENCE
## 152 5005 GEOSCIENCES
## 153 5006 OCEANOGRAPHY
## 154 5007 PHYSICS
## 155 5098 MULTI-DISCIPLINARY OR GENERAL SCIENCE
## 156 5102 NUCLEAR, INDUSTRIAL RADIOLOGY, AND BIOLOGICAL TECHNOLOGIES
## 157 5200 PSYCHOLOGY
## 158 5201 EDUCATIONAL PSYCHOLOGY
## 159 5202 CLINICAL PSYCHOLOGY
## 160 5203 COUNSELING PSYCHOLOGY
## 161 5205 INDUSTRIAL AND ORGANIZATIONAL PSYCHOLOGY
## 162 5206 SOCIAL PSYCHOLOGY
## 163 5299 MISCELLANEOUS PSYCHOLOGY
## 164 5403 HUMAN SERVICES AND COMMUNITY ORGANIZATION
## 165 5404 SOCIAL WORK
## 166 4007 INTERDISCIPLINARY SOCIAL SCIENCES
## 167 5500 GENERAL SOCIAL SCIENCES
## 168 5501 ECONOMICS
## 169 5503 CRIMINOLOGY
## 170 5504 GEOGRAPHY
## 171 5505 INTERNATIONAL RELATIONS
## 172 5506 POLITICAL SCIENCE AND GOVERNMENT
## 173 5507 SOCIOLOGY
## 174 5599 MISCELLANEOUS SOCIAL SCIENCES
## Major_Category
## 1 Agriculture & Natural Resources
## 2 Agriculture & Natural Resources
## 3 Agriculture & Natural Resources
## 4 Agriculture & Natural Resources
## 5 Agriculture & Natural Resources
## 6 Agriculture & Natural Resources
## 7 Agriculture & Natural Resources
## 8 Agriculture & Natural Resources
## 9 Agriculture & Natural Resources
## 10 Agriculture & Natural Resources
## 11 Arts
## 12 Arts
## 13 Arts
## 14 Arts
## 15 Arts
## 16 Arts
## 17 Arts
## 18 Arts
## 19 Biology & Life Science
## 20 Biology & Life Science
## 21 Biology & Life Science
## 22 Biology & Life Science
## 23 Biology & Life Science
## 24 Biology & Life Science
## 25 Biology & Life Science
## 26 Biology & Life Science
## 27 Biology & Life Science
## 28 Biology & Life Science
## 29 Biology & Life Science
## 30 Biology & Life Science
## 31 Biology & Life Science
## 32 Biology & Life Science
## 33 Business
## 34 Business
## 35 Business
## 36 Business
## 37 Business
## 38 Business
## 39 Business
## 40 Business
## 41 Business
## 42 Business
## 43 Business
## 44 Business
## 45 Business
## 46 Communications & Journalism
## 47 Communications & Journalism
## 48 Communications & Journalism
## 49 Communications & Journalism
## 50 Computers & Mathematics
## 51 Computers & Mathematics
## 52 Computers & Mathematics
## 53 Computers & Mathematics
## 54 Computers & Mathematics
## 55 Computers & Mathematics
## 56 Computers & Mathematics
## 57 Computers & Mathematics
## 58 Computers & Mathematics
## 59 Computers & Mathematics
## 60 Computers & Mathematics
## 61 Education
## 62 Education
## 63 Education
## 64 Education
## 65 Education
## 66 Education
## 67 Education
## 68 Education
## 69 Education
## 70 Education
## 71 Education
## 72 Education
## 73 Education
## 74 Education
## 75 Education
## 76 Education
## 77 Engineering
## 78 Engineering
## 79 Engineering
## 80 Engineering
## 81 Engineering
## 82 Engineering
## 83 Engineering
## 84 Engineering
## 85 Engineering
## 86 Engineering
## 87 Engineering
## 88 Engineering
## 89 Engineering
## 90 Engineering
## 91 Engineering
## 92 Engineering
## 93 Engineering
## 94 Engineering
## 95 Engineering
## 96 Engineering
## 97 Engineering
## 98 Engineering
## 99 Engineering
## 100 Engineering
## 101 Engineering
## 102 Engineering
## 103 Engineering
## 104 Engineering
## 105 Engineering
## 106 Health
## 107 Health
## 108 Health
## 109 Health
## 110 Health
## 111 Health
## 112 Health
## 113 Health
## 114 Health
## 115 Health
## 116 Health
## 117 Health
## 118 Humanities & Liberal Arts
## 119 Humanities & Liberal Arts
## 120 Humanities & Liberal Arts
## 121 Humanities & Liberal Arts
## 122 Humanities & Liberal Arts
## 123 Humanities & Liberal Arts
## 124 Humanities & Liberal Arts
## 125 Humanities & Liberal Arts
## 126 Humanities & Liberal Arts
## 127 Humanities & Liberal Arts
## 128 Humanities & Liberal Arts
## 129 Humanities & Liberal Arts
## 130 Humanities & Liberal Arts
## 131 Humanities & Liberal Arts
## 132 Humanities & Liberal Arts
## 133 Industrial Arts & Consumer Services
## 134 Industrial Arts & Consumer Services
## 135 Industrial Arts & Consumer Services
## 136 Industrial Arts & Consumer Services
## 137 Industrial Arts & Consumer Services
## 138 Industrial Arts & Consumer Services
## 139 Industrial Arts & Consumer Services
## 140 Interdisciplinary
## 141 Law & Public Policy
## 142 Law & Public Policy
## 143 Law & Public Policy
## 144 Law & Public Policy
## 145 Law & Public Policy
## 146 <NA>
## 147 Physical Sciences
## 148 Physical Sciences
## 149 Physical Sciences
## 150 Physical Sciences
## 151 Physical Sciences
## 152 Physical Sciences
## 153 Physical Sciences
## 154 Physical Sciences
## 155 Physical Sciences
## 156 Physical Sciences
## 157 Psychology & Social Work
## 158 Psychology & Social Work
## 159 Psychology & Social Work
## 160 Psychology & Social Work
## 161 Psychology & Social Work
## 162 Psychology & Social Work
## 163 Psychology & Social Work
## 164 Psychology & Social Work
## 165 Psychology & Social Work
## 166 Social Science
## 167 Social Science
## 168 Social Science
## 169 Social Science
## 170 Social Science
## 171 Social Science
## 172 Social Science
## 173 Social Science
## 174 Social Science
#file.rename("~/R/DATA607_Assg3/majors-list.csv", "a173College_majors.csv") # renaming the file dataframe
# This access also work as call is made in the working directory to pull up file name = major-list.csv ....majors_list <- read_csv("~/R/DATA607_Assg3/majors-list.csv")
#a173College_majors
Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result
My_173Majors == "DATA" # looking at the key words at the whole table , not efficient
## FOD1P Major Major_Category
## [1,] FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE
## [20,] FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE
## [41,] FALSE FALSE FALSE
## [42,] FALSE FALSE FALSE
## [43,] FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE
## [45,] FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE
## [47,] FALSE FALSE FALSE
## [48,] FALSE FALSE FALSE
## [49,] FALSE FALSE FALSE
## [50,] FALSE FALSE FALSE
## [51,] FALSE FALSE FALSE
## [52,] FALSE FALSE FALSE
## [53,] FALSE FALSE FALSE
## [54,] FALSE FALSE FALSE
## [55,] FALSE FALSE FALSE
## [56,] FALSE FALSE FALSE
## [57,] FALSE FALSE FALSE
## [58,] FALSE FALSE FALSE
## [59,] FALSE FALSE FALSE
## [60,] FALSE FALSE FALSE
## [61,] FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE
## [63,] FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE
## [65,] FALSE FALSE FALSE
## [66,] FALSE FALSE FALSE
## [67,] FALSE FALSE FALSE
## [68,] FALSE FALSE FALSE
## [69,] FALSE FALSE FALSE
## [70,] FALSE FALSE FALSE
## [71,] FALSE FALSE FALSE
## [72,] FALSE FALSE FALSE
## [73,] FALSE FALSE FALSE
## [74,] FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE
## [76,] FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE
## [78,] FALSE FALSE FALSE
## [79,] FALSE FALSE FALSE
## [80,] FALSE FALSE FALSE
## [81,] FALSE FALSE FALSE
## [82,] FALSE FALSE FALSE
## [83,] FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE
## [85,] FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE
## [87,] FALSE FALSE FALSE
## [88,] FALSE FALSE FALSE
## [89,] FALSE FALSE FALSE
## [90,] FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE
## [92,] FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE
## [94,] FALSE FALSE FALSE
## [95,] FALSE FALSE FALSE
## [96,] FALSE FALSE FALSE
## [97,] FALSE FALSE FALSE
## [98,] FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE
## [100,] FALSE FALSE FALSE
## [101,] FALSE FALSE FALSE
## [102,] FALSE FALSE FALSE
## [103,] FALSE FALSE FALSE
## [104,] FALSE FALSE FALSE
## [105,] FALSE FALSE FALSE
## [106,] FALSE FALSE FALSE
## [107,] FALSE FALSE FALSE
## [108,] FALSE FALSE FALSE
## [109,] FALSE FALSE FALSE
## [110,] FALSE FALSE FALSE
## [111,] FALSE FALSE FALSE
## [112,] FALSE FALSE FALSE
## [113,] FALSE FALSE FALSE
## [114,] FALSE FALSE FALSE
## [115,] FALSE FALSE FALSE
## [116,] FALSE FALSE FALSE
## [117,] FALSE FALSE FALSE
## [118,] FALSE FALSE FALSE
## [119,] FALSE FALSE FALSE
## [120,] FALSE FALSE FALSE
## [121,] FALSE FALSE FALSE
## [122,] FALSE FALSE FALSE
## [123,] FALSE FALSE FALSE
## [124,] FALSE FALSE FALSE
## [125,] FALSE FALSE FALSE
## [126,] FALSE FALSE FALSE
## [127,] FALSE FALSE FALSE
## [128,] FALSE FALSE FALSE
## [129,] FALSE FALSE FALSE
## [130,] FALSE FALSE FALSE
## [131,] FALSE FALSE FALSE
## [132,] FALSE FALSE FALSE
## [133,] FALSE FALSE FALSE
## [134,] FALSE FALSE FALSE
## [135,] FALSE FALSE FALSE
## [136,] FALSE FALSE FALSE
## [137,] FALSE FALSE FALSE
## [138,] FALSE FALSE FALSE
## [139,] FALSE FALSE FALSE
## [140,] FALSE FALSE FALSE
## [141,] FALSE FALSE FALSE
## [142,] FALSE FALSE FALSE
## [143,] FALSE FALSE FALSE
## [144,] FALSE FALSE FALSE
## [145,] FALSE FALSE FALSE
## [146,] FALSE FALSE NA
## [147,] FALSE FALSE FALSE
## [148,] FALSE FALSE FALSE
## [149,] FALSE FALSE FALSE
## [150,] FALSE FALSE FALSE
## [151,] FALSE FALSE FALSE
## [152,] FALSE FALSE FALSE
## [153,] FALSE FALSE FALSE
## [154,] FALSE FALSE FALSE
## [155,] FALSE FALSE FALSE
## [156,] FALSE FALSE FALSE
## [157,] FALSE FALSE FALSE
## [158,] FALSE FALSE FALSE
## [159,] FALSE FALSE FALSE
## [160,] FALSE FALSE FALSE
## [161,] FALSE FALSE FALSE
## [162,] FALSE FALSE FALSE
## [163,] FALSE FALSE FALSE
## [164,] FALSE FALSE FALSE
## [165,] FALSE FALSE FALSE
## [166,] FALSE FALSE FALSE
## [167,] FALSE FALSE FALSE
## [168,] FALSE FALSE FALSE
## [169,] FALSE FALSE FALSE
## [170,] FALSE FALSE FALSE
## [171,] FALSE FALSE FALSE
## [172,] FALSE FALSE FALSE
## [173,] FALSE FALSE FALSE
## [174,] FALSE FALSE FALSE
My_173Majors$Major == "DATA" # looking at the key words in the column = Major, not efficient
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE
grep(pattern = 'DATA', My_173Majors$Major, value = TRUE, ignore.case = TRUE) # this one is good
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"
# Answer = COMPUTER PROGRAMMING AND DATA PROCESSING
#select(a173College_majors) where a173College_majors$Major == "COMPUTER PROGRAMMING AND DATA PROCESSING Not sure about this search
result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
## FOD1P Major Major_Category
## 1 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result
grep(pattern = 'STATISTICS', My_173Majors$Major, value = TRUE, ignore.case = TRUE) # looking at the second keyword, but there should be a way to find both of them at once...maybe using logical operation | but this is only possible with numeric type
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "STATISTICS AND DECISION SCIENCE"
# Answer = "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS" , "STATISTICS AND DECISION SCIENCE"
#select(a173College_majors) where a173College_majors$Major == "COMPUTER PROGRAMMING AND DATA PROCESSING Not sure about this search
result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
## FOD1P Major Major_Category
## 1 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result
result1 <- filter(My_173Majors, Major == "COMPUTER PROGRAMMING AND DATA PROCESSING")
result1 # display the major and major category
## FOD1P Major Major_Category
## 1 2101 COMPUTER PROGRAMMING AND DATA PROCESSING Computers & Mathematics
Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result
result1 <- filter(My_173Majors, Major == "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS")
result1 # display the major and major category
## FOD1P Major Major_Category
## 1 6212 MANAGEMENT INFORMATION SYSTEMS AND STATISTICS Business
Looking at the data to find or to identify the majors that contain either “DATA” or “STATISTICS” There is probably many ways of getting results. One is to actually see if these words, “DATA” or “STATISTICS” exist…then display the result
result1 <- filter(My_173Majors, Major == "STATISTICS AND DECISION SCIENCE")
result1 # display the major and major category
## FOD1P Major Major_Category
## 1 3702 STATISTICS AND DECISION SCIENCE Computers & Mathematics
[1] “bell pepper” “bilberry” “blackberry” “blood orange”
[5] “blueberry” “cantaloupe” “chili pepper” “cloudberry”
[9] “elderberry” “lime” “lychee” “mulberry”
[13] “olive” “salal berry”
Let’s transform the above data into a new format like the one below
c(“bell pepper”, “bilberry”, “blackberry”, “blood orange”, “blueberry”, “cantaloupe”, “chili pepper”, “cloudberry”, “elderberry”, “lime”, “lychee”, “mulberry”, “olive”, “salal berry”)
# i need to make it look like a list
# assign the vector to new name called viegies
vegies = list ('[1] "bell pepper" "bilberry" "blackberry" "blood orange"
[5] "blueberry" "cantaloupe" "chili pepper" "cloudberry"
[9] "elderberry" "lime" "lychee" "mulberry"
[13] "olive" "salal berry"')
vegies
## [[1]]
## [1] "[1] \"bell pepper\" \"bilberry\" \"blackberry\" \"blood orange\"\n\n[5] \"blueberry\" \"cantaloupe\" \"chili pepper\" \"cloudberry\" \n\n[9] \"elderberry\" \"lime\" \"lychee\" \"mulberry\" \n\n[13] \"olive\" \"salal berry\""
# now, we need to remove backslash
pattern <-"[A-Za-z]+.?[A-Za-z]+"
vegies <- str_extract_all(vegies, pattern)
vegies
## [[1]]
## [1] "bell pepper" "bilberry" "blackberry" "blood orange" "blueberry"
## [6] "cantaloupe" "chili pepper" "cloudberry" "elderberry" "lime"
## [11] "lychee" "mulberry" "olive" "salal berry"
# now the backlash are gone, let make one vector by join string
vegies1 <- str_c(vegies, sep = "", collapse = NULL)
## Warning in stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE):
## argument is not an atomic vector; coercing
vegies1
## [1] "c(\"bell pepper\", \"bilberry\", \"blackberry\", \"blood orange\", \"blueberry\", \"cantaloupe\", \"chili pepper\", \"cloudberry\", \"elderberry\", \"lime\", \"lychee\", \"mulberry\", \"olive\", \"salal berry\")"
vegies2 <- writeLines(vegies1)
## c("bell pepper", "bilberry", "blackberry", "blood orange", "blueberry", "cantaloupe", "chili pepper", "cloudberry", "elderberry", "lime", "lychee", "mulberry", "olive", "salal berry")
#3 Describe, in words, what these expressions will match:
(.)\1\1 == same character 3 times
"(.)(.)\\2\\1" == two same charaters that repeat twice
(..)\1 == two character that repeat twice
"(.).\\1.\\1" == a character that repeats 3 times with a character in between
"(.)(.)(.).*\\3\\2\\1" == a character that repeats 3 time or more with some order
# my_strigh <- c("lollolol","binbin", "dada", "babadabadw","ggoo", "fff") # testing strings
str_view (c("lollolol","binibin", "dada", "babadabadw","ggoo", "fff"), "(.)(.)(.).*\\3\\2\\1")
#4 Construct regular expressions to match words that:
Start and end with the same character. === , like ada, lollolol, nibin......(.).*\\1$,
Contain a repeated pair of letters (e.g. "church" contains "ch" repeated twice.) === (..).*\\1"
Contain one letter repeated in at least three places (e.g. "eleven" contains three "e"s.)
# my_strigh <- c("lollolol","binbin", "dada", "babadabadw","ggoo", "fff") # testing strings
str_view (c("lollolol","binibin", "dada", "babadabadw","ggoo", "fff"), "(.).*\\1$")