- Please write a program to compute how many words (separated by space) in the “product_title” for each product and for each product and for each product and for each product and create a new column “numofwords” to store the results
library(readr)
train <- read_csv("train.csv")
attach(train)
numofwords <- c()
for (i in 1:length(product_title)){
numofwords[i] <- length(unlist(gregexpr("\\S+",product_title[i])))
}
numofwords <- sapply( gregexpr("\\S+",product_title),length)
# Using "\\S+" instead of " " in strsplit will allow you to consider and ignore multiple spaces between words.
numofwords <- c()
system.time( for (i in 1:length(product_title)){
numofwords[i] <- length(unlist(gregexpr("\\S+",product_title[i])))
})
## user system elapsed
## 6.58 0.02 6.61
system.time(sapply( gregexpr("\\S+",product_title),length))
## user system elapsed
## 1.26 0.00 1.26
system.time(mapply(length,gregexpr("\\S+",product_title)))
## user system elapsed
## 1.34 0.00 1.35
system.time(vapply(gregexpr("\\S+",product_title),length,numeric(1)))
## user system elapsed
## 1.34 0.00 1.34
library(stringi)
count <- c()
for (i in 1:length(relevance)){
count[i] <- length(Reduce(`intersect`,stri_extract_all_regex(c(product_title[i],search_term[i]),"\\w+")))
}
tapply(count,relevance,mean)
## 1 1.25 1.33 1.5 1.67 1.75 2
## 0.2517815 0.0000000 0.2867598 0.0000000 0.2464602 0.5555556 0.2734868
## 2.25 2.33 2.5 2.67 2.75 3
## 0.2727273 0.2846202 0.1578947 0.2927246 0.0000000 0.2786928