library(tidyverse, quietly=TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

major_data <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/majors-list.csv")

data
## function (..., list = character(), package = NULL, lib.loc = NULL, 
##     verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
## {
##     fileExt <- function(x) {
##         db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
##         ans <- sub(".*\\.", "", x)
##         ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", 
##             x[db])
##         ans
##     }
##     my_read_table <- function(...) {
##         lcc <- Sys.getlocale("LC_COLLATE")
##         on.exit(Sys.setlocale("LC_COLLATE", lcc))
##         Sys.setlocale("LC_COLLATE", "C")
##         read.table(...)
##     }
##     stopifnot(is.character(list))
##     names <- c(as.character(substitute(list(...))[-1L]), list)
##     if (!is.null(package)) {
##         if (!is.character(package)) 
##             stop("'package' must be a character vector or NULL")
##     }
##     paths <- find.package(package, lib.loc, verbose = verbose)
##     if (is.null(lib.loc)) 
##         paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), 
##             paths)
##     paths <- unique(normalizePath(paths[file.exists(paths)]))
##     paths <- paths[dir.exists(file.path(paths, "data"))]
##     dataExts <- tools:::.make_file_exts("data")
##     if (length(names) == 0L) {
##         db <- matrix(character(), nrow = 0L, ncol = 4L)
##         for (path in paths) {
##             entries <- NULL
##             packageName <- if (file_test("-f", file.path(path, 
##                 "DESCRIPTION"))) 
##                 basename(path)
##             else "."
##             if (file_test("-f", INDEX <- file.path(path, "Meta", 
##                 "data.rds"))) {
##                 entries <- readRDS(INDEX)
##             }
##             else {
##                 dataDir <- file.path(path, "data")
##                 entries <- tools::list_files_with_type(dataDir, 
##                   "data")
##                 if (length(entries)) {
##                   entries <- unique(tools::file_path_sans_ext(basename(entries)))
##                   entries <- cbind(entries, "")
##                 }
##             }
##             if (NROW(entries)) {
##                 if (is.matrix(entries) && ncol(entries) == 2L) 
##                   db <- rbind(db, cbind(packageName, dirname(path), 
##                     entries))
##                 else warning(gettextf("data index for package %s is invalid and will be ignored", 
##                   sQuote(packageName)), domain = NA, call. = FALSE)
##             }
##         }
##         colnames(db) <- c("Package", "LibPath", "Item", "Title")
##         footer <- if (missing(package)) 
##             paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), 
##                 "\n", "to list the data sets in all *available* packages.")
##         else NULL
##         y <- list(title = "Data sets", header = NULL, results = db, 
##             footer = footer)
##         class(y) <- "packageIQR"
##         return(y)
##     }
##     paths <- file.path(paths, "data")
##     for (name in names) {
##         found <- FALSE
##         for (p in paths) {
##             tmp_env <- if (overwrite) 
##                 envir
##             else new.env()
##             if (file_test("-f", file.path(p, "Rdata.rds"))) {
##                 rds <- readRDS(file.path(p, "Rdata.rds"))
##                 if (name %in% names(rds)) {
##                   found <- TRUE
##                   if (verbose) 
##                     message(sprintf("name=%s:\t found in Rdata.rds", 
##                       name), domain = NA)
##                   thispkg <- sub(".*/([^/]*)/data$", "\\1", p)
##                   thispkg <- sub("_.*$", "", thispkg)
##                   thispkg <- paste0("package:", thispkg)
##                   objs <- rds[[name]]
##                   lazyLoad(file.path(p, "Rdata"), envir = tmp_env, 
##                     filter = function(x) x %in% objs)
##                   break
##                 }
##                 else if (verbose) 
##                   message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", 
##                     name, paste(names(rds), collapse = ",")), 
##                     domain = NA)
##             }
##             files <- list.files(p, full.names = TRUE)
##             files <- files[grep(name, files, fixed = TRUE)]
##             if (length(files) > 1L) {
##                 o <- match(fileExt(files), dataExts, nomatch = 100L)
##                 paths0 <- dirname(files)
##                 paths0 <- factor(paths0, levels = unique(paths0))
##                 files <- files[order(paths0, o)]
##             }
##             if (length(files)) {
##                 for (file in files) {
##                   if (verbose) 
##                     message("name=", name, ":\t file= ...", .Platform$file.sep, 
##                       basename(file), "::\t", appendLF = FALSE, 
##                       domain = NA)
##                   ext <- fileExt(file)
##                   if (basename(file) != paste0(name, ".", ext)) 
##                     found <- FALSE
##                   else {
##                     found <- TRUE
##                     switch(ext, R = , r = {
##                       library("utils")
##                       sys.source(file, chdir = TRUE, envir = tmp_env)
##                     }, RData = , rdata = , rda = load(file, envir = tmp_env), 
##                       TXT = , txt = , tab = , tab.gz = , tab.bz2 = , 
##                       tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name, 
##                         my_read_table(file, header = TRUE, as.is = FALSE), 
##                         envir = tmp_env), CSV = , csv = , csv.gz = , 
##                       csv.bz2 = , csv.xz = assign(name, my_read_table(file, 
##                         header = TRUE, sep = ";", as.is = FALSE), 
##                         envir = tmp_env), found <- FALSE)
##                   }
##                   if (found) 
##                     break
##                 }
##                 if (verbose) 
##                   message(if (!found) 
##                     "*NOT* ", "found", domain = NA)
##             }
##             if (found) 
##                 break
##         }
##         if (!found) {
##             warning(gettextf("data set %s not found", sQuote(name)), 
##                 domain = NA)
##         }
##         else if (!overwrite) {
##             for (o in ls(envir = tmp_env, all.names = TRUE)) {
##                 if (exists(o, envir = envir, inherits = FALSE)) 
##                   warning(gettextf("an object named %s already exists and will not be overwritten", 
##                     sQuote(o)))
##                 else assign(o, get(o, envir = tmp_env, inherits = FALSE), 
##                   envir = envir)
##             }
##             rm(tmp_env)
##         }
##     }
##     invisible(names)
## }
## <bytecode: 0x0000018cacf33740>
## <environment: namespace:utils>
data <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/all-ages.csv")
normalize_fun <- function(x){
  return ((x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) -(min(x, na.rm = TRUE))))
}
normalize_data <- as.data.frame(apply(data[4:6],2,normalize_fun))

normalize_data
##            Total     Employed Employed_full_time_year_round
## 1   0.0402907423 0.0377205889                  0.0376543047
## 2   0.0297746253 0.0320340039                  0.0325786995
## 3   0.0101114538 0.0105524828                  0.0112042000
## 4   0.0324092616 0.0338666313                  0.0329382946
## 5   0.0070115991 0.0067104253                  0.0059996151
## 6   0.0246748437 0.0261595661                  0.0257876655
## 7   0.0013424694 0.0014594718                  0.0015214434
## 8   0.0019714115 0.0020825311                  0.0020538712
## 9   0.0332285203 0.0365972971                  0.0330935861
## 10  0.0214830346 0.0198630970                  0.0198731769
## 11  0.0258856293 0.0273895345                  0.0255389929
## 12  0.0936511771 0.0914945178                  0.0835411195
## 13  0.0324704577 0.0315805221                  0.0255054582
## 14  0.3156821571 0.3354167145                  0.3067888155
## 15  0.1331921871 0.1330040384                  0.1208869050
## 16  0.0669046373 0.0718184237                  0.0641781858
## 17  0.0590920421 0.0620258523                  0.0569878310
## 18  0.0191422037 0.0204500307                  0.0186597369
## 19  0.0805436777 0.0921226772                  0.0974348021
## 20  0.0086254459 0.0090679356                  0.0091080235
## 21  0.2501978460 0.2783281610                  0.2888931538
## 22  0.0241609246 0.0275833374                  0.0291550650
## 23  0.0118438481 0.0131216462                  0.0139622998
## 24  0.0158196721 0.0180963455                  0.0179854315
## 25  0.0127931886 0.0135560027                  0.0127364776
## 26  0.4602430414 0.3579407762                  0.3047891158
## 27  0.0005257738 0.0006889353                  0.0007093878
## 28  0.0000000000 0.0000000000                  0.0000000000
## 29  0.4627530427 0.3476131218                  0.2583167337
## 30  0.0212783000 0.0194274654                  0.0146525986
## 31  0.0894760653 0.0816224703                  0.0697779642
## 32  0.0495601891 0.0475871114                  0.0361349250
## 33  0.0173274671 0.0147613207                  0.0122396482
## 34  0.0710855163 0.0543982633                  0.0453100179
## 35  0.0471924447 0.0453821785                  0.0363835977
## 36  0.0399299737 0.0328500161                  0.0260740002
## 37  0.0274488532 0.0243923897                  0.0189852814
## 38  0.0573670170 0.0466890730                  0.0343384972
## 39  0.0735202239 0.0653094514                  0.0483224655
## 40  0.0714991506 0.0529396414                  0.0465508017
## 41  0.1604183634 0.1520162726                  0.1604145095
## 42  0.0202933952 0.0184673761                  0.0192943165
## 43  0.0097247329 0.0096807947                  0.0090430178
## 44  0.0055079693 0.0051940027                  0.0052040689
## 45  0.0051106752 0.0048382723                  0.0041835823
## 46  0.0594819670 0.0553379523                  0.0558806701
## 47  0.1141249567 0.1110707355                  0.1132105551
## 48  0.0486249461 0.0540820585                  0.0567159420
## 49  0.2144269642 0.2076041287                  0.2173172140
## 50  0.0058267657 0.0057023102                  0.0057597131
## 51  0.0034026312 0.0035517781                  0.0036171039
## 52  0.0012393011 0.0011169167                  0.0011644278
## 53  0.0435645734 0.0424075590                  0.0432963884
## 54  0.0060984636 0.0056079588                  0.0055605686
## 55  0.1855532992 0.1788065482                  0.1862259073
## 56  0.0033391924 0.0023150096                  0.0022540475
## 57  0.0026753268 0.0025177376                  0.0027477814
## 58  0.0043888176 0.0039092084                  0.0041959644
## 59  0.0023805603 0.0024769370                  0.0027761569
## 60  0.0055220668 0.0053168295                  0.0054393277
## 61  0.0174969578 0.0180262195                  0.0186251703
## 62  0.0112094592 0.0121594318                  0.0126699242
## 63  0.0143224503 0.0109579388                  0.0108399616
## 64  0.0295730947 0.0307045840                  0.0325358783
## 65  0.0255504925 0.0271617311                  0.0289806845
## 66  0.0086353783 0.0096467942                  0.0104112334
## 67  0.0198006225 0.0219324529                  0.0232627609
## 68  0.0235156422 0.0187704056                  0.0151344664
## 69  0.0749559292 0.0646698168                  0.0507488298
## 70  0.0177491114 0.0141119110                  0.0113259567
## 71  0.1280446661 0.1020410505                  0.0789736938
## 72  0.0022216427 0.0024556867                  0.0026053879
## 73  0.0207108744 0.0203012785                  0.0186561254
## 74  0.3512370903 0.3006452446                  0.2482269174
## 75  0.0182034363 0.0184542009                  0.0147217317
## 76  0.1918625850 0.1714645634                  0.1525565563
## 77  0.0140308877 0.0121037560                  0.0094758733
## 78  0.0044205370 0.0023796106                  0.0016700279
## 79  0.2681920622 0.2471781703                  0.2175602115
## 80  0.0233653753 0.0217186747                  0.0185782217
## 81  0.0037611571 0.0033116495                  0.0027034124
## 82  0.0082665997 0.0079599440                  0.0063318666
## 83  0.0137681610 0.0149670238                  0.0126833381
## 84  0.0012707001 0.0013833957                  0.0012407838
## 85  0.0213029707 0.0186705291                  0.0169721677
## 86  0.0008391235 0.0008453376                  0.0007666547
## 87  0.0133247296 0.0127085400                  0.0098612644
## 88  0.0169807960 0.0145445674                  0.0129283993
## 89  0.0036140942 0.0031854226                  0.0022457928
## 90  0.0086485146 0.0088426822                  0.0079528822
## 91  0.1379026847 0.1187510253                  0.1076953873
## 92  0.0053557800 0.0057987867                  0.0056833571
## 93  0.0071801286 0.0073594100                  0.0069004087
## 94  0.0006148446 0.0000671510                  0.0003172898
## 95  0.0137140137 0.0145411674                  0.0128695846
## 96  0.0173604681 0.0176896145                  0.0136233414
## 97  0.0199089171 0.0180143193                  0.0139395994
## 98  0.0015340676 0.0018623778                  0.0020358140
## 99  0.0014424337 0.0017149006                  0.0013135283
## 100 0.0190556961 0.0177737657                  0.0154698134
## 101 0.1115028160 0.1212079871                  0.1048083079
## 102 0.0651584659 0.0583287220                  0.0486696786
## 103 0.0738419039 0.0694184128                  0.0621748747
## 104 0.0020697738 0.0018615278                  0.0019955724
## 105 0.0007381980 0.0008109121                  0.0006985535
## 106 0.0037342436 0.0041480620                  0.0041299268
## 107 0.0979349040 0.0835490241                  0.0785305199
## 108 0.0338039559 0.0315380215                  0.0300104577
## 109 0.0018810591 0.0019707545                  0.0020260116
## 110 0.0026737248 0.0027157906                  0.0027936982
## 111 0.0385195799 0.0337051289                  0.0308101312
## 112 0.0015417572 0.0018589778                  0.0017603136
## 113 0.1363477912 0.1304637754                  0.1227060333
## 114 0.0031302926 0.0034289513                  0.0031703186
## 115 0.4747276133 0.4481105493                  0.3795735522
## 116 0.0037310396 0.0030851211                  0.0024093389
## 117 0.0016795285 0.0015453231                  0.0011370842
## 118 0.0048819108 0.0049211486                  0.0040215840
## 119 0.0049895646 0.0044141160                  0.0038889929
## 120 0.0027153766 0.0022971593                  0.0021322908
## 121 0.0101585524 0.0095324675                  0.0075298291
## 122 0.2418191069 0.2600516128                  0.2664749514
## 123 0.0167376136 0.0154647062                  0.0155647423
## 124 0.0039684549 0.0041034363                  0.0036645684
## 125 0.0254364307 0.0254621307                  0.0226312767
## 126 0.1014916469 0.0950267456                  0.0840570379
## 127 0.0400392296 0.0334365249                  0.0288620233
## 128 0.2419712961 0.2269338427                  0.2215802478
## 129 0.0450771744 0.0428861161                  0.0335104481
## 130 0.0232894409 0.0246682188                  0.0245938303
## 131 0.0362136724 0.0349265972                  0.0319750750
## 132 0.0240218717 0.0234059499                  0.0211516227
## 133 0.2391966458 0.2295620820                  0.2170303633
## 134 0.2153596440 0.1945177580                  0.1730503830
## 135 0.0043208931 0.0045964437                  0.0043084346
## 136 0.0288198380 0.0329647678                  0.0334433787
## 137 0.0042709110 0.0047239456                  0.0048289963
## 138 0.0398072611 0.0413624684                  0.0425250904
## 139 0.1824877271 0.1638267742                  0.1318966038
## 140 0.0552434163 0.0567719237                  0.0414932536
## 141 0.0877462342 0.0812663149                  0.0593558965
## 142 0.0168994148 0.0168328017                  0.0115493494
## 143 0.1609236318 0.1608598049                  0.1370165780
## 144 0.0420080779 0.0451182495                  0.0351907944
## 145 0.0283411628 0.0254166550                  0.0195512439
## 146 0.0251871607 0.0243558391                  0.0184956748
## 147 0.0019592363 0.0020991064                  0.0013976230
## 148 0.0327190868 0.0326005374                  0.0271646517
## 149 0.0232548379 0.0203582294                  0.0128938328
## 150 0.0339987581 0.0356444329                  0.0341543143
## 151 0.0198390703 0.0211597913                  0.0123949397
## 152 0.0520948610 0.0509952374                  0.0469666319
## 153 0.0096497597 0.0074448363                  0.0062188804
## 154 0.5663029290 0.5628014889                  0.4882925216
## 155 0.0569309548 0.0520913288                  0.0454735641
## 156 0.0800169427 0.0840161060                  0.0655329876
## 157 0.0174120522 0.0174469358                  0.0143523341
## 158 0.0241103016 0.0217254748                  0.0178420062
## 159 0.6876762592 0.6712915858                  0.6725269838
## 160 0.5692912851 0.5671000031                  0.5643806838
## 161 0.0023603752 0.0027004904                  0.0029856198
## 162 1.0000000000 1.0000000000                  1.0000000000
## 163 0.0175591151 0.0194861163                  0.0206424113
## 164 0.0234374650 0.0240090339                  0.0244431822
## 165 0.3563560959 0.3776746712                  0.3631131755
## 166 0.2608530159 0.2844095769                  0.2889039881
## 167 0.0592346194 0.0600903734                  0.0595230541
## 168 0.0268070952 0.0276088378                  0.0257541308
## 169 0.0635856300 0.0688089537                  0.0626355898
## 170 0.0494301073 0.0565198950                  0.0604429366
## 171 0.0321542244 0.0322915578                  0.0312182227
## 172 0.2275190845 0.2026957303                  0.1821553110
## 173 0.0049181158 0.0044179410                  0.0036686958
major_data$Major <- toupper(major_data$Major)
data_mjrs <- str_subset(major_data$Major, pattern = "(DATA)|(STATISTICS)")
data_mjrs
## [1] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"
## [2] "COMPUTER PROGRAMMING AND DATA PROCESSING"     
## [3] "STATISTICS AND DECISION SCIENCE"
data$Major <- toupper(data$Major)
data_mjrs <- str_subset(data$Major, pattern = "(DATA)|(STATISTICS)")
data_mjrs
## [1] "COMPUTER PROGRAMMING AND DATA PROCESSING"     
## [2] "STATISTICS AND DECISION SCIENCE"              
## [3] "MANAGEMENT INFORMATION SYSTEMS AND STATISTICS"

(.)\1\1 = It would find any same character appearing three times in a row.

library("stringr")

testData <- c("fdraaaaaoijh")
str_extract_all(testData , regex("(.)\\1\\1"))
## [[1]]
## [1] "aaa"

“(.)(.)\2\1” = It would search words that a pair of characters followed by the same pair of characters in reversed order.

Search_in <- c("cdsabbavdf")
str_extract_all(testData , regex("(.)(.)\\2\\1"))
## [[1]]
## [1] "aaaa"

“(..)\1” = It would search two characters repeated.

Search_in <- c("scfababthlb")
str_extract_all(Search_in , regex("(..)\\1"))
## [[1]]
## [1] "abab"

“(.).\1.\1” = It would search words with repeated character pairs spaced by 1 character in between.

Data <- c("scfacbababthlb")
str_extract_all(Data , regex("(.).\\1.\\1"))
## [[1]]
## [1] "babab"

“(.)(.)(.).*\3\2\1” = it would search words with any three character pairs repeated three times

Data <- c("applebccfdscbaelppa")
str_extract_all(Data , regex("(.)(.)(.).*\\3\\2\\1"))
## [[1]]
## [1] "applebccfdscbaelppa"

Start and end with the same character.

Start with ^ and end with*$

(“^.*$“)

str_view("churchc", "^(.)(.*)\\1$")
## [1] │ <churchc>

Contain a repeated pair of letters (e.g. “church” contains “ch” repeated

Start with [A-Za-z] and end with [A-Za-z]

str_view("church", "([A-Za-z][A-Za-z]).*\\1")
## [1] │ <church>

Contain one letter repeated in at least three places (e.g. “eleven” contains three “e”s.)

str_view("eleven", "([A-Za-z]).*\\1.*\\1.")
## [1] │ <eleven>

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.