Text Frequency Using R

0
119
views

Export Sentiment Value, Text Frequency, and Positive Negative and Neutral words

PropellerAds

The word frequency is the most useful thing in text analysis. Words frequency is used to get insight which sentiment word is more used in the overall communication so analysis of overall text is much easier.


# Load
 library(“tm”)
 library(“SnowballC”)
 library(“wordcloud”)
 library(“RColorBrewer”)
 library(“sentimentr”)
 library(“sink”)
 library(“sentimentr”)
 library(“xlsx”)
 library(“data.table”)
 
 # Read the text file from internet
 
 text <- readLines(“data.csv”)
 
 # Load the data as a corpus
 docs <- Corpus(VectorSource(text))
 
 inspect(docs)
 toSpace <- content_transformer(function (x , pattern ) gsub(pattern, ” “, x))
 docs <- tm_map(docs, toSpace, “/”)
 docs <- tm_map(docs, toSpace, “@”)
 docs <- tm_map(docs, toSpace, “\\|”)
 
 # Convert the text to lower case
 docs <- tm_map(docs, content_transformer(tolower))
 # Remove numbers
 docs <- tm_map(docs, removeNumbers)
 # Remove english common stopwords
 docs <- tm_map(docs, removeWords, stopwords(“english”))
 # Remove your own stop word
 # specify your stopwords as a character vector
 docs <- tm_map(docs, removeWords, c(“blabla1”, “blabla2”))
 # Remove punctuations
 docs <- tm_map(docs, removePunctuation)
 # Eliminate extra white spaces
 docs <- tm_map(docs, stripWhitespace)
 # Text stemming
 # docs <- tm_map(docs, stemDocument)
 
 dtm <- TermDocumentMatrix(docs)
 m <- as.matrix(dtm)
 v <- sort(rowSums(m),decreasing=TRUE)
 d <- data.frame(word = names(v),freq=v)
 head(d, 10)

set.seed(1234)
 wordcloud(words = d$word, freq = d$freq, min.freq = 1,
           max.words=200, random.order=FALSE, rot.per=0.35,
           colors=brewer.pal(8, “Dark2”))
 


 barplot(d[1:15,]$freq, las = 2, names.arg = d[1:15,]$word,
         col =”lightblue”, main =”Most frequent words”,
         ylab = “Word frequencies”)

 setwd(“E:/”)

 write.csv(d,”wordfrequency.csv”)
 write.table(xo,file = ‘tibble_matrix.csv’, sep = ‘,’, row.names = FALSE)