categorias <- c("Rural Male", "Rural Female", "Urban Male", "Urban Female")
grupo <- c("50-54","55-59","60-64","65-69","70-74")
barplot(VADeaths, names.arg = categorias, xlab = "Categorias", ylab = "Mortes",
col = rainbow(length(grupo)), main = "VADeaths")
legend("topright", pch = 15, col = rainbow(length(grupo)), legend = grupo)
estagio = table(c("moderado", "leve", "leve", "severo", "leve", "moderado", "moderado", "moderado", "leve", "leve", "severo", "leve", "moderado", "moderado", "leve", "severo", "moderado", "moderado", "moderado", "leve"))
estagio = (estagio/sum(estagio))*100
pie(estagio, paste(estagio, '%'), col=rainbow(length(estagio)), main="Estagio/Paciente")
legend("topright", pch=15, names(estagio), col=rainbow(length(estagio)))
## Loading required package: NLP
## Loading required package: RColorBrewer
## [1] "Using direct authentication"
preprocessing <- function(corpus, lang){
#Coloca tudo em minúsculo
corpus <- tm_map(corpus, tolower)
#Remove pontuação
corpus <- tm_map(corpus, removePunctuation)
#Remove números
corpus <- tm_map(corpus, removeNumbers)
#Remove espaços extras em branco
corpus <- tm_map(corpus, stripWhitespace)
#Remove palavras ruído
corpus <- tm_map(corpus, removeWords,stopwords(lang))
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "",x)
corpus <-tm_map(corpus,removeURL)
# remove qualquer coisa que não seja letras em português e espaço.
removeNumPunct <- function(x) gsub ("[^[:alpha:][:space:]]*", "",x)
corpus <- tm_map(corpus,content_transformer(removeNumPunct))
return (corpus)
}
tweets <- searchTwitter("#racismo", n=1000, lang="pt")
## Warning in doRppAPICall("search/tweets", n, params = params, retryOnRateLimit =
## retryOnRateLimit, : 1000 tweets were requested but the API can only return 528
# Cria corpus e preprocessa
tweets <- twListToDF(tweets)
tweets_t <- tweets$text
tweets_S <- VectorSource(tweets_t)
corpus <- preprocessing(Corpus(tweets_S), lang="english")
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords(lang)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removeURL): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(removeNumPunct)):
## transformation drops documents
#Cria a matriz
dtm <-TermDocumentMatrix(corpus)
dtm <- as.matrix(dtm)
#Fornece a frequência de cada palavra
fre <- sort(rowSums(dtm),decreasing=TRUE)
# Cria nuvem
wordcloud(names(fre),freq=fre,min.freq=1,max.words=200,scale=c(4,.5),
random.order=F, rot.per=0.35, colors=brewer.pal(8, "Dark2"))
s <- get_nrc_sentiment(tweets$text)
## Warning: `filter_()` is deprecated as of dplyr 0.7.0.
## Please use `filter()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
barplot(colSums(s),las=2,col=rainbow(10),ylab= "Quantidade",
main="Sentimentos sobre o Movimento #racismo")
flu <- read.csv("flu.csv", header = T)
#Histograma
hist(flu$age, col="grey", probability=T, main= "Histograma do Dataset Flu", ylab="Frequencia", xlab="Idade")
#Linha de densidade
density<-density(flu$age)
lines(density)
nova_amostra = vector()
for(i in 1:200){
nova_amostra = append(nova_amostra, mean(sample(flu$age, size=35)))
}
hist(nova_amostra, col="grey", probability=T, main= "Histograma da Nova Amostra", ylab="Frequencia", xlab="Média")
density<-density(nova_amostra)
lines(density)