library(twitteR) load("twitter_auth.rda") setup_twitter_oauth(consumer_key, consumer_secret, token, token_secret) library(plyr) tweets <- plyr::ldply(searchTwitter("#bigdata", n=1000, lang="en"), as.data.frame) library(quanteda) dfm = dfm(tweets$text, remove_punct=T) dfm_tags <- dfm_select(dfm, '#*') dfm_tags <- dfm_select(dfm_tags, '#bigdata', "remove") library(topicmodels) dtm = convert(dfm_tags, to = "topicmodels") m = LDA(dtm, method = "Gibbs", k = 5, control = list(alpha = 0.1)) terms(m, 5) library(wordcloud) words = posterior(m)$terms[3, ] topwords = head(sort(words, decreasing = T), n=50) wordcloud(names(topwords), topwords)