This is a Twitter wordcloud, constructed daily, using the phrase “climate change”. The most recent 500 tweets from the time of the run (2pm) that contain the phrase “climate change” are organized in a corpus and then a term document sparse matrix.

We then determine word frequency counts and display that in the form of a word cloud - on a daily basis. We take into account the geographic location of the tweets by selecting a radius of 100 miles around the University of Idaho, in Moscow, ID.

library(twitteR)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(SnowballC)

#--twitter authentication

options(httr_oauth_cache=T)
api_key <- "redacted"
api_secret <- "redacted"
access_token <- "redacted"
access_token_secret <- "redacted"

setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
## [1] "Using direct authentication"
#-uses the twitter api and twitteR to search the last n number of tweets
mach_tweets = searchTwitter("climate+change", n=500, lang="en", resultType = "recent")

#--create a matrix using apply of the tweet texts
mach_text = sapply(mach_tweets, function(x) x$getText())
mach_text <- iconv(mach_text,to="utf-8")

# create a corpus
mach_corpus = Corpus(VectorSource(mach_text))

# create document term matrix applying some transformations

control <- list(
  removePunctuation = TRUE,
  stopwords = c("climate", "change", stopwords("english")),
  removeNumbers = TRUE,
  tolower = TRUE)

tdm <- TermDocumentMatrix(mach_corpus, control = control)

# define tdm as matrix
m = as.matrix(tdm)

# get word counts in decreasing order
word_freqs = sort(rowSums(m), decreasing=TRUE)

# create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)

#setwd("/dmine/data/wordclouds")

string <- format(Sys.time(), format = "%Y-%m-%d_%H:%M:%S")
#cloudname <- "climate.png"
#cloudpng <- png(paste0(string, "_", cloudname), width=12, height=8, units="in", res=300)
invisible(wordcloud(dm$word, dm$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2")))