In [ ]:


Today's topic is term frequency inverse document frequency, which is a statistic for estimating the importance of words and phrases in a set of documents.


As an example, I've been tracking ...

In [164]:
%matplotlib inline
from matplotlib import pyplot as plt
import os
import random
import numpy as np
import pandas as pd
import twython
import time
import ConfigParser
from collections import defaultdict
In [165]:
propertiesFile = ""
cp = ConfigParser.ConfigParser()

APP_KEY            = cp.get('Params', 'app_key')
APP_SECRET         = cp.get('Params', 'app_secret')
OAUTH_TOKEN        = cp.get('Params', 'oauth_token')
OAUTH_TOKEN_SECRET = cp.get('Params', 'oauth_token_secret')

In [166]:
files = os.listdir('.')
followers = {}
for file in files:
    i = file.find('_followers.txt')
    if i > 0:
        f = open(file, 'r')
        followers[file[0:i]] = set('\n'))
In [167]:
keys = followers.keys()
unfollowers = []
allfollowers = set()
for i in range(len(keys)-1):
    today = followers[keys[i]]
    allfollowers = allfollowers.union(today)
    tomorrow = followers[keys[i+1]]
    unfollow = today.difference(tomorrow)
In [5]:
fol = random.sample(allfollowers, len(unfollowers))
nfol = unfollowers
In [6]:
follower_tweets = {}
unfollower_tweets = {}
In [21]:
errors = defaultdict(int)
for u in fol:
    except KeyError:
            tweets = twitter.get_user_timeline(screen_name=u)
            all = []
            for tweet in tweets:
            follower_tweets[u] = all
        except twython.TwythonError as e:
            if e.msg.find('Twitter API returned a 404 (Not Found)') == 0:
                ignoreDeletedAccount = True
                errors[e.msg] += 1
print errors
defaultdict(<type 'int'>, {'Twitter API returned a 401 (Unauthorized), An error occurred processing your request.': 9})
In [23]:
errors = defaultdict(int)
for u in nfol:
    except KeyError:
            tweets = twitter.get_user_timeline(screen_name=u)
            all = []
            for tweet in tweets:
            unfollower_tweets[u] = all
        except twython.TwythonError as e:
            if e.msg.find('Twitter API returned a 404 (Not Found)') == 0:
                ignoreDeletedAccount = True
                errors[e.msg] += 1
print errors
defaultdict(<type 'int'>, {'Twitter API returned a 401 (Unauthorized), An error occurred processing your request.': 30})
In [26]:
import pickle
pickle.dump({'unfollower_tweets': unfollower_tweets, 'follower_tweets': follower_tweets}, open('tfidfbackup.pkl', 'wb'))
In [168]:
import pickle
x = pickle.load(open('tfidfbackup.pkl', 'rb'))
unfollower_tweets = x['unfollower_tweets']
follower_tweets = x['follower_tweets']
In [ ]:
In [177]:
In [180]:
from gensim import corpora, models, similarities
from collections import defaultdict
In [183]:
list_of_lists = unfollower_tweets.values()
documents = [val for sublist in list_of_lists for val in sublist]

texts = [[word for word in document.lower().split()]
          for document in documents]
frequency = defaultdict(int)
for text in texts:
    for token in text:
        frequency[token] += 1
        texts = [[token for token in text if frequency[token] > 1] for text in texts]

from pprint import pprint
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-183-2b1ed78513bb> in <module>()
      9     for token in text:
     10         frequency[token] += 1
---> 11         texts = [[token for token in text if frequency[token] > 1] for text in texts]
     13 from pprint import pprint

In [169]:
import nltk
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import PorterStemmer

def stem_tokens(tokens, stemmer):
    stemmed = []
    for item in tokens:
    return stemmed

def tokenize(text):
    tokens = nltk.word_tokenize(text)
    stems = stem_tokens(tokens, stemmer)
    return stems

docs = unfollower_tweets.values()

tokens = set()
for tdocs in docs:
    for doc in tdocs:
        doc = doc.lower()
        dtokens = tokenize(doc)
        tokens = tokens.union(dtokens)
LookupError                               Traceback (most recent call last)
<ipython-input-169-bf229277150e> in <module>()
     22     for doc in tdocs:
     23         doc = doc.lower()
---> 24         dtokens = tokenize(doc)
     25         tokens = tokens.union(dtokens)

<ipython-input-169-bf229277150e> in tokenize(text)
     12 def tokenize(text):
---> 13     tokens = nltk.word_tokenize(text)
     14     stems = stem_tokens(tokens, stemmer)
     15     return stems

/usr/local/lib/python2.7/dist-packages/nltk/tokenize/__init__.pyc in word_tokenize(text, language)
     99     :param language: the model name in the Punkt corpus
    100     """
--> 101     return [token for sent in sent_tokenize(text, language)
    102             for token in _treebank_word_tokenize(sent)]

/usr/local/lib/python2.7/dist-packages/nltk/tokenize/__init__.pyc in sent_tokenize(text, language)
     83     :param language: the model name in the Punkt corpus
     84     """
---> 85     tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
     86     return tokenizer.tokenize(text)

/usr/local/lib/python2.7/dist-packages/nltk/data.pyc in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
    780     # Load the resource.
--> 781     opened_resource = _open(resource_url)
    783     if format == 'raw':

/usr/local/lib/python2.7/dist-packages/nltk/data.pyc in _open(resource_url)
    894     if protocol is None or protocol.lower() == 'nltk':
--> 895         return find(path_, path + ['']).open()
    896     elif protocol.lower() == 'file':
    897         # urllib might not use mode='rb', so handle this one ourselves:

/usr/local/lib/python2.7/dist-packages/nltk/data.pyc in find(resource_name, paths)
    622     sep = '*'*70
    623     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 624     raise LookupError(resource_not_found)
    626 def retrieve(resource_url, filename=None, verbose=True):

  Resource u'tokenizers/punkt/english.pickle' not found.  Please
  use the NLTK Downloader to obtain the resource:  >>>
  Searched in:
    - '/home/deploy/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - u''
In [6]:
import gensim
In [ ]:
dictionary = gensim.corpora.Dictionary.load(os.path.join(MODELS_DIR, 
corpus = gensim.corpora.MmCorpus(os.path.join(MODELS_DIR, ""))

tfidf = gensim.models.TfidfModel(corpus, normalize=True)
corpus_tfidf = tfidf[corpus]

# project to 2 dimensions for visualization
lsi = gensim.models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=2)

# write out coordinates to file
fcoords = open(os.path.join(MODELS_DIR, "coords.csv"), 'wb')
for vector in lsi[corpus]:
    if len(vector) != 2:
    fcoords.write("%6.4f\t%6.4f\n" % (vector[0][1], vector[1][1]))
In [40]:
[u'@faithandfaders oh gordon.',
 u'@peterrylands @wundershqip ha!!',
 u"we're recording a christmas episode tonight, what should we talk about? #podcast",
 u"rt @oliversnelling: @pip_wall do you listen to my podcast?! second to @serial i'd say @talking_podcast is the next addictive hyped podcast \u2026",
 u"here's a little video from us for today...",
 u'sadly there is no episode today but ollie has an update for you... \n\n',
 u"sorry we forgot to let you know yesterday - but sadly this week's #talkingfact was false. @lexlowmusic was not in s club juniors.",
 u'have a look at our afternoon with @mattvbray',
 u"our guest @lexlowmusic was an absolute delight, you'll love him and his music. \n\nenjoy.\n\n",
 u'joseph listened to @lexlowmusic for the first time during the tea break. \n\nyou should too. \n\n',
 u'@peterrylands impressive!',
 u"music, songs, guitars, writing, creativity, @lexlowmusic. it's a good listen. \n\n",
 u'have you ever written a song? @lexlowmusic has. you should check him out on our podcast! \n\n',
 u"this week is live! we chat to @lexlowmusic about songwriting. it's a cracker. enjoy.\n\n",
 u'rt @baggeeboy: twenty is my favourite episode of @talking_podcast yet. many lols.',
 u"@baggeeboy ha! we were a little worried it was terrible but if you like it then we're happy! thank you sir!",
 u'twenty questions. is it a podcast? yes. is it the talking podcast? yes.\n\nexcellent. have a listen.\n\n',
 u'the perfect combination. get the episode on, put your feet up and have a cuppa. \n\n',
 u'we are twentysomething. how old are you?\n\n\n\n#talking #podcast',
 u"we're in our twenties and count ourselves experts on the matter. get genius in your ears.\n\n",
 u'rt @matkim_: #huggable new #wearable for #sick #kids #healthit #iot #bigdata #mit #digitalhealth #tech\u2026',
 u'big data and market research myths and missteps #bigdata #businessintelligence #insights #data',
 u'40 epic resources for investing in startups',
 u'rt @rlsegundo: 3 ways #predictive #analytics help you hack #customer #acquisition',
 u"i've just registered for @texata 2015 :) #bigdata #analytics",
 u'@nikoneugebauer obrigada :) encontramo-nos l\xe1! ;)',
 u'rt @farbodsaraf: "it\'s imagination that counts. not skill." \u2014 a letter from lego, circa 1970s',
 u"rt @kirkdborne: get $500 credit on google cloud platform services with @mapr's free hadoop on-demand training:\n #big\u2026",
 u'behind the numbers: growth in the internet of things #iot #bigdata',
 u'rt @kirkdborne: give outliers some love\u2014not all data distributions are normal: #bigdata #datascience @mapr http://t\u2026',
 u"the big 'big data' question: hadoop or spark? #bigdata #analytics #hadoop #spark",
 u'anyone looking for #oneplus2 invitations? ;)',
 u'9 causes of data misinterpretation #bigdata #analytics #data #datascience',
 u'rt @bobfridley: read statistical inference for data science via @leanpub',
 u'ol\xe1 @justinesherry ! \xe9 sempre bom receber boas-vindas em portugu\xeas :) have an amazing day! (here is 7 a.m. :p )',
 u'monetizing big data for big profits #bigdata #analytics #datamining #business #data',
 u'rt @becomingdatasci: i wonder how much data analysis coke does to decide which names to send to which areas/stores.',
 u'@satya_majhi i also advise you to use a more powerful interface for r, rstudio:',
 u'@satya_majhi start with datacamp and this specialization on coursera :)',
 u"@satya_majhi \xed've used sas and i liked it but i would advice learning r for data science because it's open source and has a large community",
 u'"music is the silence between the notes." ~ claude debussy #quote #music',
 u'the art of improv, part 2: listen #improv #music [replay]',
 u'apple music fixes \u2018songwriter\u2019 information within its metadata (a bit)',
 u'you are the #music while the music lasts. ~ t.s. eliot #quote',
 u'music is forever; #music should grow and mature with you, following you right on up until you die. ~ paul simon #quote',
 u'fl studio 12.2 released - - several important new features',
 u"imogen heap inks 'forward-thinking' publishing deal, planning 'fair trade' payment system",
 u'live celtic music!',
 u'#poem - "up" #inspiration [replay]',
 u'"the purpose of life is a life of purpose." ~ robert byrne via @mankindproject',
 u'sarcastic side - a yuletide poem. sort of.',
 u'this is not a galaxy far, far away. it\u2019s @hawaiivolcanoesnps. pictured here are the\u2026',
 u'dancing can reveal all the mystery that #music conceals ~charles baudelaire #quote #dance',
 u'payoff #nowplaying #quest2016',
 u'heart leaps - an #improv #poem - \n',
 u'sarcastic side - an #improv #poem',
 u'the art of #music above all the other arts is the expression of the soul of a nation. ~ ralph vaughan williams #quote',
 u'rt @tracetv: the freelance life: hitting the reset button',
 u"setting up your musicians' monitor mix for better recording sessions",
 u"'you will only fail to learn if you do not learn from failing.' ~ stella adler \n",
 u"that devil's chord will do it every time! @dwallm @blacktapespod",
 u'thanks @cjordahl',
 u'seventh and ninth chords in adele\u2019s \u201chello\u201d by @musictheorygeek\n\n',
 u'everyone deserves someone who loves them as much as kanye loves kanye',
 u"rt @the_audiofiles: @aburtch @saralistens @btwnlinernotes true, 'tuning wars' was also great!",
 u'@alittledusty one more reason to switch companies',
 u'@alittledusty they clearly need to bone up on section 105 of the copyright act. that speech is public domain:',
 u'@alittledusty why are they reviewing it?',
 u'rt @timbretweet: podcast producers &amp; listeners: we want to hear from you! what was your favorite podcast episode of the past year? email ro\u2026',
 u"rt @saralistens: what were your favorite podcast episodes of 2015? i'm collecting the stories you loved most for a special holiday edition \u2026",
 u'grammy nominations 2016: kendrick lamar, taylor swift, the weeknd lead nominees:',
 u'@brianrbarone @tunershow in a sense, "lose yourself" also fills the "i want" song function in 8 mile too. again, great episode.',
 u'female buyers are still fueling pop music sales:',
 u'@mrbenbrown if you like music you could try @btwnlinernotes',
 u'@lindsaygoldwert @podcasttrending if you like music &amp; history you can try: @btwnlinernotes',
 u'@tunershow hearing the bassline of "my shot" by itself made me think of "lose yourself" nice breakdown of the song!',
 u'@bigdannysmack @rbdpodcast there is always room for another!',
 u"@barrymusic that's a lot of cash for a song 120 years old",
 u'thanks so much @pickledblonde',
 u"rt @goatrodeodc: that's so funny we recommend, @btwnlinernotes #realrecognizereal",
 u'.@learnfromerror @mattstat *`"prv{\'a\'}"/from-s` is true* un-provable in s if *"con{\'s\'}"* true: proof-theoretic ordinal of s not s-definable',
 u".@gravity_v20 #mathematics is autonomous: its '#ontology' and its 'objects' #explain the #laws of every #science.",
 u'rt @learnfromerror: @georgeshiber all 0?',
 u'there are no rules to good philosophizing: unfortunately, every philosopher knows all of them!',
 u"saying 'math is the language of science' is like saying physics is the language of chemistry: misleading &amp; doesn't explain its onto-nomology",
 u'#antilinearity rather than #hermiticity as a guiding principle for #quantum theory #philosophy of #physics',
 u"#music is unique among the #arts: why a #math-connection: a #neuro-selectivity found, like @harvard's w/ #math/1998",
 u'some examples of contextuality in #physics: implications to #quantum #cognition - g. oas et al. #bell #neurology',
 u"great support for #gaifman's claim that #g\xf6del\u2019s #incompleteness theorem does not show what people think it does!",
 u'in a kripkean sense!',
 u'yet more evidence that the #brain has a #universal #grammar go #chomsky #v2 via @cambridge_uni',
 u".@lapsuslima @oupphilosophy we must have an '#occam's #machete' when in #meinong's jungle &amp; keep our #ontic-economy '#federalreserve' free!",
 u".@lapsuslima @oupphilosophy keep in mind c. parsons' incompleteness point about fictional ontology: meinongian entities can be complete!",
 u".@lapsuslima @oupphilosophy yes: higher-order nomological modal logic quantifies de-re over meinongian entities w/o equivocating on 'truth'",
 u"keep c. #parsons in mind: 'the mark of #fictional entities is their #ontological #incompleteness w.r.t. #truth!",
 u"rt @thebjps: ntms, paris: amanda bryant (cuny) - keep the chickens cooped: the methodological inadequacy of 'free range' metaphysics",
 u'rt @thebjps: ntms, paris: bryant\nnegative: we ought not to engage in free range metaphysics\npositive: metaphysics ought to be scientificall\u2026',
 u'trapped-ion quantum #logic is possible with the #precision necessary for #faulttolerant #quantum #computer',
 u'#brains categorize/handle #data using less than 1% of #information with #algorithm that explains #learning as well',
 u'rt @jonathansampson: first attempt at async programming.',
 u'rt @monkchips: \u201cthe rise of conversational user interfaces\u201d  this is the biggest thing in tech',
 u'really thought provoking, what can a technologist do about climate change? #climatechange',
 u'rt @nandodf: [1511.06279] neural programmer-interpreters',
 u'rt @greatdismal: the asymmetry, in asymmetrical warfare, is that the little guy can only really damage the big guy by getting him angry eno\u2026',
 u'rt @ylecun: a new paper by christopher manning on the coming deep learning tsunami over natural language understanding.\u2026',
 u'rt @ch402: my team at google just open sourced tensorflow, our library for training large-scale neural networks.',
 u'rt @codemeshio: .@bltroutwine gives an inspiring talk on "the charming genius of the apollo guidance computer" enjo\u2026',
 u'machine learning &amp; deep neural networks explained - #natandlo ep 7 via @youtube',
 u"rt @internetofshit: can't wait for robotic arms to come to the kitchen",
 u'rt @xoxo: \u201cyou can put the pieces back together. you can adapt to chaos.\u201d the brilliant @ncasenmare on killing your dreams:\u2026',
 u'rt @oculus: watch @id_aa_carmack\'s "live coding session" from #oc2! available on our @youtube channel:\u2026',
 u'rt @wizdee: the challenges that startups face to build #ai based apps for businesses &amp; how to overcome them @techcr\u2026',
 u'rt @futureaware: yann lecun: the path to ai requires us to solve the unsupervised learning question (video) #future',
 u'rt @neonleif: "it\'s easy to hate code you didn\'t write, without an understanding of the context in which it was written"\u2026',
 u"rt @ch402: ever wonder what information entropy or mutual information is? i've written a visual explanation!",
 u'rt @luiciolfi: her code got humans on the moon\u2014and invented software itself via @wired',
 u'rt @wizdee: guess who is nominated for most promising technology innovation on #upawards.. vote for @wizdee @pt_star\u2026',
 u'@mtaoliveira @bemantunes tony stark has programmed several, enough said...',
 u'nice stanford course with syllabus and resources available: cs224d: deep learning for natural language processing',
 u'would open apis benefit u.s. banks? @amerbanker #bankondata',
 u'the 10 most #innovative ceos in #banking 2015! @bankinnovation #bankondata \n@mastercard @svb_financial',
 u'rt @sureshshankar: @crayonbigdata bringing 3 of these #bankondata innovations to market',
 u'2015 #fintech forward award winners discuss how banks can compete with startups #bankondata',
 u'commercial #innovations #banks should copy\n @amerbanker #bankondata',
 u"sometimes, it's nice to say thank you.\nsometimes, some banks do.\n#tdthanksyou #bankondata\n",
 u'.#innovation demands risks and failure until you get it right. @jpnicols #bankondata',
 u'introducing, the @forbestech #fintech 50 list! #bankondata',
 u'.@coinbase unveils america\u2019s first #bitcoin debit card! #blockchain #banking  #bankondata',
 u'.#bankondata week kicks off: 10 #banking #innovation strategies for 2016!\n @financialbrand',
 u'check it out, and spread the word: number 13 is on its way across the world!',
 u"check out @rntata2000's 2015 startup shopping list!\nnumber 13 sounds oddly familiar..\n",
 u"conversation is rarely driven by the fin side of #fintech.\na cynic (and financial expert's) guide: @dsquareddigest",
 u"@spokesoftware thank you for including us in your database! \nwe've raised about  sgd 5.7 mil though. could you please update that?",
 u'@etbrandequity @groupmindia @prithaet thank you for sharing!',
 u'@groupmworldwide @cvlsrinivas very soon @zentosh, very soon!\nearly next year is when we plan to launch for consumers :d',
 u"with @groupmworldwide &amp; @mindshare by our side, we're all set to sweep india off its feet!\n",
 u'@infoblazeind @rntata2000 thank you for sharing!',
 u"we're so excited about this opportunity! @groupmindia",
 u'#chennairains \na list of volunteers by area - if you need supplies, or if you have them- please reach out &amp; retweet\n',
 u'rt @biznewscom: zuma\u2019s speech was dubbed by esteemed sa writer rian malan as mugabe 2.0.',
 u'rt @dailymaverick: what does the working class stand to gain from the #zumamustfall campaign?\n a column by karl clo\u2026',
 u'rt @dailymaverick: recall president zuma! a column by tshepo motsepe @tshepom22',
 u'rt @biznewscom: the south african finance minister fiasco has international investors on edge.\u2026',
 u'rt @tferriss: how to lose 100 pounds on the slow-carb diet \u2013 real pics and stories',
 u'rt @time: watch the republican debate in 4 minutes',
 u'rt @lifehackorg: 10 efficient workout tips for 2016\n\n#lifehack',
 u'rt @onnit: lower back in pain? here is how to help fix low back pain with one kettlebell exercise.\u2026',
 u'rt @ideas: philosophy you can use: the ancient, practical wisdom in stoic thinking:',
 u'@myanc_ words from a true leader',
 u'rt @bvb: #np jingle bells! #bvbsge',
 u'rt @analyticbridge: 7 free machine learning courses',
 u'rt @theminimalists: what the buddhists can teach us about household chores via @organiclifemag\u2026',
 u'rt @tferriss: exclusive warren buffett \u2013 a few lessons for investors and managers',
 u'tweeting just because.#zumamuststillfall',
 u'rt @mental_floss: this app teaches students ancient japanese calligraphy \u2014',
 u'rt @tferriss: the perfect posterior: kettlebell swings and cheap alternatives',
 u'rt @tomeatonsa: these are the south africans who make sure the wrecking ball keeps swinging. i will never trust any of them again. https://\u2026',
 u'rt @ideas: stephen hawking\u2019s theory of everything, animated in 150 seconds:',
 u'"... and that, in no other country on earth, is my story even possible." \nbarack obama. completely true!!! \U0001f44f\U0001f44f\U0001f44f\u270c\U0001f60a\U0001f60d',
 u'my followers live in usa (57%), spain(15%)... get your free map:',
 u'rt @socncharlotte: "a great many people think that polysyllables are a sign of intelligence." - barbara walters',
 u'rt @dr_morton: "i\'m convinced that about half of what separates successful entrepreneurs from the non-successful ones is pure perseverance.\u2026',
 u'rt @oracle: safra catz, #oracle ceo on 3 key characteristics of successful teams &amp; businesses. #oraclefinance #oow15',
 u'why children shouldn\u2019t be allowed to head a soccer ball.\n',
 u'rt @dreambox_learn: q&amp;a w/our ceo @jessieww "don\u2019t underestimate the power of passion" @twentytenclub @octaviagoredema\u2026',
 u'rt @nealshelton: @keysignature @matthewsalivenc thank you key signature, mayor jim taylor and matthews, nc! http://t\u2026',
 u'rt @keysignature: aeromyth is rocking @matthewsalivenc #matthewsalivenc @nealshelton',
 u'rt @keysignature: aeromyth is on stage now at @matthewsalivenc! @nealshelton #matthewsalivenc',
 u'rt @johnwross: aeromyth live on the main stage at #matthewsalive right now!',
 u"rt @matthewsalivenc: aeromyth (aerosmith tribute) will walk this way onto the #wellsfargo stage at 7:30pm. we're cryin' tears of joy! can't\u2026",
 u'rt @nealshelton: @keysignature @matthewsalivenc @blackhawklive @nealshelton \nlooking forward to the aeromyth show this weekend!\u2026',
 u"rt @keysignature: we're just a few days from @matthewsalivenc ! this year's headliners are @blackhawklive and aeromyth! @nealshelton http:/\u2026",
 u'\xbfquieres encontrar trabajo antes que nadie? estudia matem\xe1ticas o ingenier\xeda \U0001f60d\u263a\U0001f60a\u270c\U0001f44c',
 u'at hard rock cafe in the evening in broadway at the beach  @hrcmyrtlebeach #hardrockcafemyrtlebeach',
 u'at hard rock cafe in myrtle beach @hrcmyrtlebeach #hardrockcafemyrtlebeach',
 u'#artoflove at bechtler museum of modern art in charlotte heart emoticon\n#family #artmuseum @thebechtler \U0001f60d\U0001f31e\U0001f60a\U0001f49d\U0001f497\U0001f496',
 u'#epicentre #nightlife #downtowncharlotte #perfectweather \U0001f60d\U0001f31e\U0001f60a\U0001f49d\U0001f497\U0001f496\U0001f498',
 u'beautiful flowering trees in charlotte #flowers #trees #floweringtrees #ctl #perfectweather',
 u'middle of december so time to start thinking of next year. be prepared for cloud computing in 2016',
 u'have you heard about the latest update to #qlikview yet?',
 u'even low-tech product companies can use big data to stay ahead of the competition.',
 u'celebrating christmas with coffee and mince pies in #nexusucd this morning! feeling very festive round here!',
 u'how leadership is best fuelled by cloud computing and enterprise collaboration',
 u'project management &amp; cloud computing | smartdata collective',
 u'data sells - data enabling a new kind of journalism',
 u'14 ways iot will change big data and business forever - informationweek',
 u'as in many aspects of business, the cloud is a big trend in managed services.',
 u'save money and the planet with the cloud!',
 u'lots of new enhancements with the arrival of #qlikview 12',
 u'#bigdata helps manage crises effectively. just one more example of its power.',
 u'if you are still uncertain about the security of the cloud, these tips might put your mind at ease.',
 u'#netsuite ceo @zachnelson explains how one short elevator conversation brought huge changes to the company.',
 u'if you want your #bigdata projects to succeed, you need to have your ceo on board.',
 u'where is is the cloud headed in 2016? 7 cloud trends to watch in 2016',
 u'you can be thankful for these cloud advances any time of year!',
 u'with analytics becoming more widespread, you need the talent to get the most out of it.',
 u'\U0001f3b5"all i want for christmas is a new website!"\U0001f3b5 get your free website analysis to see how you can improve yours.',
 u"data visualization: understand what's working and what's not in your business | smartdata collective",
 u'get strategic! create actionable metrics that take your #business from where you are to where you want to be.',
 u"it's not what is under the christmas is who is around it that counts!",
 u'"if you take a sincere interest in others, they\'ll take a real interest in you." #quote',
 u'40 things you wish you knew before you started your blog -',
 u'love how this came together craig. relay is amazing!',
 u'know the legal risks of blab for brand marketing via @markwschaefer',
 u'social media can be a huge benefit or a reputation killer. don\u2019t leave public perception to chance. #influencerchat',
 u'smart ways to manage time on social media -',
 u'want to make better #socialmedia connections? be a relationship collector! #networking',
 u"want to make better #socialmedia connections? create better conversations. when you focus on that 1 thing, you'll attract the right people.",
 u'if you want to earn respect, don\u2019t be someone or something you\u2019re not. build your brand around you! #influencerchat',
 u'want to know what matters to your #socialmedia audience? ask them and then listen (really listen) to their answers. be the solution!',
 u"social media isn't a set it and forget it activity. if you want to succeed, you must track, measure and adjust - often! #influencerchat",
 u'7 smart #smallbusiness strategies to get noticed on social media -',
 u'rt @postplanner: how to set facebook goals w/ @rebekahradice @adamsconsulting @scottayres #blab\u2026',
 u'how to set facebook goals for the new year w/ @postplanner @adamsconsulting #blab',
 u'getting ready to go live! join me for "how to set facebook goals for the new year today" subscribe now \u27bc #blab',
 u'internet marketing predictions for 2016 via @erikemanuelli',
 u"7 fatal facebook mistakes you're making (and how to quickly overcome them) #socialmedia",
 u'6 habits of highly successful #socialmedia marketers -',
 u'rt @jensjta: endelig kom opdateringen af ms reporting services - l\xe6s dommen her! #ssrs #inspari #msbi #mssql #reporting\u2026',
 u'rt @philiphoyos: how much #darkdata does your organization have? with @gruset. #inspari #ikd15 #bi',
 u'rt @philiphoyos: how do you make data understandable? what is interesting about you #data? #storytelling  #inspari #ikd15 #bi\u2026',
 u'rt @philiphoyos: excited about #iot. #inspari #ikd15',
 u'rt @philiphoyos: how #powerbi can be utilized to show real-time #data from #iot. #tabular data. #inspari #ikd15 #bi',
 u'rt @tmv95: #inspari  #ikd2015 #ikd15 \ntableau as a game. \U0001f633',
 u'rt @philiphoyos: #bipowerhour #inspari #ikd15',
 u'rt @jensjta: martin kolding giver den gas med alt det nye fede i #microsoft #excel 2016 - #ikd15 #inspari @\u2026',
 u'rt @philiphoyos: hvad siger gartner  @jens_jacob_aarup #tivoli #inspari #ikd15 #bi',
 u'rt @dauledk: sql reports med html5 #sql2016 #ikd15',
 u'@jensjta b',
 u'@jensjta a',
 u'rt @jensjta: the train is packed with #inspari on the road to copenhagen. ready to share knowledge about #bi #ikd15',
 u'@jensjta e',
 u'\xe5rets vinder #eoydk entrepreneur of the year k\xe5ret af @kristian_jensen og christian frigast. stort tillykke til casa',
 u'@oestergaard og @kristian_jensen i h\xe5rd musik battle p\xe5 #eoydk entrepreneur of the year',
 u'900 samlet i bella center til entrepreneur of the year #eoydk',
 u'@sitecore on stage at #ey #entrepreneuroftheyear in copenhagen',
 u'rt @ivaekst: #succesonline summit er slutningen p\xe5 vores dk tour i \xe5r: 43 kommuner, 1700 kursister, 193 moduler i 2015\u2026',
 u'rt @maxmjsoderlund: some may feel dwarfed. #alibaba',
 u'a voice of christian sanity: "why standing against islamic violence forces you to rethink biblical inerrancy" l',
 u'on suddenly pretending to care about homeless veterans for just as long as it allows you to oppose helping refugees',
 u'rt @hapi_drinks: first time seeing 20/20 \U0001f60a',
 u'rt @petersinger: kill one to save many?  a question for designers of self-drive cars:\n',
 u'rt @specgram: sg editor\u2019s credo\u2014do as little as possible &amp; that unwillingly. \u2018tis better to receive a slight reprimand than to perform an a\u2026',
 u'rt @specgram: argument schemata\u2014the putdown: that\u2019s true but uninteresting in the ______ sense.',
 u"rt @languagelog: grammarians, whores, buffoons: from an anonymous colleague: i'm currently auditing jennifer houseman wegner's ... http://t\u2026",
 u'rt @academicssay: fractal wrongness\nvia b. augustine',
 u'rt @eff: an "enhanced driver license" puts insecure technology in your wallet. tell @jerrybrowngov to veto sb 249',
 u'rt @briankrebs: krebsonsecurity exclusive: sources: credit card breach at michaels stores',
 u'rt @briankrebs: tale of how feds infiltrated, busted a counterfeit credit card shop is full of opsec fail. get popcorn\u2026',
 u'only takes one of these episodes to destroy trust. just deleted chrome.',
 u'rt @ggreenwald: mcclatchy on how sen. martin heinrich became an nsa critic - &amp; only learned about its programs from newspapers\u2026',
 u'rt @evgenymorozov: "ford exec: \'we know everyone who breaks the law\' thanks to our gps in your car"',
 u"rt @briankrebs: very thought-provoking quote re: rsa &amp; nsa in jeff carr's blog post today on why he's pulling out of rsa conference http://\u2026",
 u"target customers' personal information hacked (new breach)",
 u"rt @schneierblog: security risks of embedded systems: we're at a crisis point now with regard to the security of embedded system... http://\u2026",
 u"@cspatatbo you're late",
 u"rt @jamilsmith: instead of watching the #gopdebate, listen to the new @intersectiontnr on trump's fear campaign. i'm trying to help. https:\u2026",
 u"rt @culturejedi: is trump taking it to the next level so we'll think carson and rand are sane? or is everybody just cray? #gopdebate https:\u2026",
 u'rt @schnabeybaby: "i don\'t like star wars because i\'m not into period pieces" - @cesposito',
 u'test for workkk',
 u'test for work',
 u'best southern albums of 2015 via @bittersouth',
 u'rt @egotripland: kamasi washington goes shopping at amoeba records, hollywood (video).',
 u'rt @jamilsmith: every doj civil rights investigation of a law enforcement agency since 1994, catalogued by @sarah_childress.\u2026',
 u'rt @ronanburtenshaw: shackling the masses with drastic capitalist tactics // @jacobinmag on martin shkreli and wu tang\u2026',
 u'also, martin shkreli da real terrorist.',
 u"guess who has hair, a respiratory system, loves mealy apples, and doesn't give a shit about star wars? me.",
 u"check out @thisamerlife episode 574: 'sinatra's 100th birthday.'",
 u'one of the most important pieces of journalism this year. an essential read. - white debt',
 u'bills add up when someone is shot, and costs spill over to taxpayers',
 u'rt @jessethorn: people have figured out that san franciscans don\'t like "frisco" but they haven\'t yet noticed the cringes at "san fran."',
 u'rt @luckypeach: trust us: seaweed is the key to an incredible burger.',
 u'your new medical team: algorithms and physicians',
 u"rt @djdodgerstadium: new york \nnew york \nnew york\n\nmusic hall of williamsburg w/ @venusxgg &amp; @suicideyear \n\ntonight's the night \n\ndoors at 8",
 u'rt @statnews: "food is still our weapon of choice." drug reps\u2019 secret to wooing doctors:',
 u'#flyelephant team is open to collaboration. contact dmitry spodarets (@m31_rootua), founder and ceo, to set up a meeting at #sc15. please rt',
 u'#kdnuggets about #flyelephant and new project opportunities #r #python #bigdata',
 u'thanks for the recent follow @thawabhazmi @theteacherteam @mimi91022 happy to connect :) have a great saturday. (by',
 u'@se_railway thanks for sharing southeastern, have a great saturday :) (insight by',
 u'thanks for the recent follow @microlease @bruinnguyen @davidmkyalo happy to connect :) have a great saturday. (via',
 u'@xplorestarstuff thanks for sharing srinivas anupoju, have a great friday :) (insight by',
 u'hello everyone, have a great friday! looking forward to reading your tweets :) (insight by',
 u'thanks for the recent follow @sheriatseagate @nimbussearch @sabersf happy to connect :) have a great friday. (via',
 u'thanks for the recent follow @alvaromangual @torontosj @jorgearomo happy to connect :) have a great friday. (via',
 u'@wagsocialcare thanks for sharing wagsocialcare, have a great thursday :) (insight by',
 u'thanks for the recent follow @poller @molinafm @straun happy to connect :) have a great thursday. (insight by',
 u'hello everyone, have a great thursday! looking forward to reading your tweets :) (insight by',
 u'thanks for the recent follow @elima166 @govindrajann @moabkasaby happy to connect :) have a great thursday. (via',
 u'thanks for the recent follow @nice_hpc @isezaki150416 @noriyukixy happy to connect :) have a great wednesday. (via',
 u'@ridepatco thanks for sharing patco, have a great wednesday :) (insight by',
 u'thanks for the retweets this week @jmhal @o_i_w @webcampodessa @rosenbergerd much appreciated! (insight by',
 u'thanks for the recent follow @mr_prbkr @mullingkintyre happy to connect :) have a great wednesday. (insight by',
 u'hello everyone, have a great wednesday! looking forward to reading your tweets :) (insight by',
 u'rt @insidehpc: flyelephant startup announces support for r, python, and public\xa0api',
 u'thanks for the recent follow @purecognition @jimacmcdermott @lamb003 happy to connect :) have a great tuesday. (via',
 u'rt @dsetrakyan: come to see my @apacheignite presentation at in-memory computing with apache ignite meetup -',
 u'#apacheignite allows storing data in on-heap as well as off-heap memory #fastdata',
 u'#apacheignite allows  multiple backup copies to guarantee data resiliency in case of failures #fastdata',
 u'service grid allows for deployments of arbitrary user-defined services on the cluster #apacheignite',
 u'ignite supports several protocols for client connectivity to clusters-ignite native clients, rest/http &amp; more',
 u'rt @24prbot: samaitra just opened a pull request to apache/ignite:',
 u'rt @24prbot: samaitra just opened a pull request to apache/ignite:',
 u'rt @misagh84: new contributions to @apereoorg #cas: @apacheignite &amp; @couchbase ticket registries &amp; a @couchbase service registry. thanks! #\u2026',
 u"#apacheignite ensures fault tolerance by replicating or partitioning the sessions' data across all cluster nodes",
 u'running sql queries w igniterdd - orders of magnitude faster than sql queries w spark native rdds or data frame apis',
 u'#apacheignite is capable of caching web sessions of all java servlet containers that follow java servlet 3.0 spec',
 u'#apacheignite provides high-perf cluster-wide messaging functionality to exchange data via publish-subscribe',
 u'unique capability of #apacheignite is a distributed #inmemory file system called ignite file system (igfs)',
 u'#apacheignite ordered messages - guarantees that messages will be received in the same order they were sent',
 u'in-memory data grid built from the ground up to linearly scale to 100s of nodes - read more #apacheignite',
 u'with jcache support you get in-memory key value store, concurrentmap apis, \npluggable persistence &amp; more',
 u'rt @worldofpiggy: the main problem of gemfire is that it has no support for ansi99 sql. apache ignite does. #distributed #data #bigdata',
 u'rt @mgarcia_cetax: apache ignite "in-memory data fabric"',
 u'#apacheignite - learn about shared memory layer for spark #inmemorycomputing #fastdata',
 u'rt @worldofpiggy: apache ignite vs gemfire #datagrid #bigdata #inmemory',
 u'bbc uses #artificialintelligence to track down new audiences for \u2018sherlock\u2019\n #datascience #ai',
 u'mit\u2019s new #deeplearning tool aims to predicts the memorability of your photos\n #datascience',
 u'deep learning in a nutshell: history &amp; training\n #datascience #machinelearning #deeplearning',
 u'the future of wind energy: seamlessly powering urban areas #future #bigdata #wind #tech',
 u' microsoft project oxford machine learning speaker and video apis available #machinelearning',
 u"machine learning frees up data scientists' time, simplifies smart applications #datascience",
 u' us military reveals it hopes to use artificial intelligence to create cybersoldiers #ai',
 u' ensuring developing countries benefit from big data #bigdata #developing #countries #benefit',
 u'have you seen our latest infographic on the fiction and fact surrounding big data? #bigdata #datascience #bigcloud',
 u'the coffee machines are taking over... @kdnuggets #machinelearning #deeplearning #datascience #iot #bigdata',
 u'rt @webefacts: who does what in the #datascience industry [#infographic] | #bigdata',
 u'rt @ie_bigdata: how to make big data work for franchises #articles #bigdata',
 u'rt @latinia: wearable banking to hit 2bn users by 2020 #wearbanking #wearable',
 u'rt @bryonshannon: 15 data science influencers you should be following on twitter and why #bigdata #datascience',
 u'rt @courtneydeinert: improving efficiencies and adding new innovative solutions are only as good as the revenue they can bring in. #iiot \nh\u2026',
 u'rt @danwithrowitpro: how analytics are transforming b2b sales - insidebigdata (via',
 u'rt @jus_wilson: what we all need - connected alcohol: absolut sets up internet of things testing lab',
 u'rt @waspkilluk: linkedin revamps its jobs listings with big data analytics',
 u"@trigentsoftware @bigdatatweetbot you're most welcome! have a great day! #bigcloud",
 u"@ecurasllc @luxuryhacks @ideatec_en @iloveaishahaadi @_moonwinx_ @milogulias @mooringsbbar @teluolunfu @josemolerm you're most welcome!",
 u'emc elastic cloud storage now certified on hdp',
 u'the case for a data science lab',
 u'vespa uses big data to design scooter for the 21st century',
 u'grow: build the ultimate internet marketing dashboard',
 u'meet the avant-garde of new relational databases',
 u'clearing the air around cloud computing',
 u'aadhaar: the world\u2019s largest biometric database',
 u'how historians and detectives can benefit from a semantic graph database',
 u'71 new tweeps followed me in the last week. i find relevant people to follow with the #copyfollowers feature of',
 u'data science to change the world or scratch an itch',
 u'neustar at the 2015 grace hopper celebration of women in computing',
 u'a big data success story made in ireland - irish times',
 u'wpi receives u.s. dept. of education funding to address shortage in big data computing professionals',
 u'compliance could be your selling point',
 u'rt @patricetruong: 4 agile #startup hacks that can transform your business @neilpatel',
 u'data: the new coal and\xa0steel',
 u'rt @tunguz: microsoft beats google, intel, tencent, and qualcomm in image recognition competition | #bigdata #ai #deeplearning\u2026',
 u"rt @tableau: data is big. so is visualizing it. read when it's best to use 13 common charts:",
 u'rt @insightsibm: scopri il valore aziendale della #security intelligence\n',
 u'rt @kirkdborne: how to use #dataviz and #analytics to improve customer satisfaction: #abdsc #bigdata #datascience #\u2026',
 u'winning elected office (head of government) knocks 2.7 years off life expectancy.',
 u'politics is a set of lies agreed upon. history is not.',
 u'are robots zombies?',
 u'@schoppik my analogy is spike sorting by threshold. it is less likely. regression to mean due to threshold of effects. #edges #space',
 u'bechdel test for atheism: two atheists meeting. talking about something other than god or religion.',
 u"@bryanconklin great! just email me and i'll pass it on to the publisher",
 u'the turnover rate of popular music:',
 u'the mary problem, resolved.',
 u'diversity in popular music peaked in the 60s, 70s and late 80s.',
 u'@cblatts @jhaushofer i wrote about this:',
 u'why primates like to look at data through their visual cortex, illustrated.',
 u'i wonder if they use matlab in matlab. they do seem to have the data. i should visit more often. \n',
 u'@jayvanbavel illumination priors',
 u'anova: cooking anything but data',
 u'strong effect of bifidobacterium on tumor suppression. #gut #beyondmentalhealth',
 u'"...the brain is just a big bit mixer" - the most quotable neuroscientist? @kordinglab?',
 u"map all the roads and you still don't know who is driving, with what cars, when or why. #connectome",
 u'reddit to start producing original content on cognitive neuroscience. a game changer? #cyborgnation',
 u'seminal paper: number of neurons and glia in the brain of a large number of mammalian species. #opensource too',
 u'@vskillingsworth @jayvanbavel yes, a priori. but read the paper. #experiment',
 u'@joerafel is that someone sliding down a staircase :)',
 u'rt @f_kremmer: improving child safety in motor vehicles - a safe communities approach #publichealth\u2026',
 u'rt @uniofoxford: testing the tolerance of the nuclear fusion reactors of the future',
 u"rt @mmmmmrob: if you have friends studying medicine, please do send them a link to meducation's new android app :)",
 u"@vincentdignan loads of stuff. some projects to pay the bills and a load of code that i'm trying to get into private alpha.",
 u"what's with the @twitter hearts. that's new! #stargazing",
 u'lovely work @ihid &amp; @meducation on the new #meducation app. all looks very clean. hope your servers keep cool and your cdn stays warm.',
 u'rt @speccollbrad: .@ueaarchives @uniofeastanglia launch the british archive for contemporary writing #bacw http://t.\u2026',
 u'rt @petergross11: this ones a cracker',
 u'rt @meriemlaifa: hello! yes #ilooklikeanengineer and i am proud (^_^)',
 u'@charlescare @meducation @strategyeye great stuff guys! keep it up :)',
 u"rt @charlescare: nice to see @meducation getting a mention as one of #birmingham's many exciting start-ups  @strate\u2026",
 u'@charlescare i just found this very helpful.',
 u'@pattishank ah, thanks for the tips :)',
 u"@pattishank interesting... i'm just looking at my social tooling. do you find useful?",
 u"upside to running own #startup. can relocate your office to wherever sells ice cream. downside: isn't a lot of spare cash for ice cream.",
 u'learning mean today... just read setting up a router with node.js',
 u"@charlescare how's meducation going?",
 u'never thought about this... but it makes complete sense. the uk telelphone numbers kept-back for tv',
 u"oh: 'how do you charge for hot water ', 'easy, just put through an extra shot of coffee and a sachet of ketchup.' #wesellketchup",
 u'rt @sophiepenrose: icelandic lullaby',
 u'rt @pattyarquette: i feel kind of sick inside 4 #ashleymadison hacked people. i am into fidelity &amp; honesty but public shaming is like bring\u2026',
 u'rt @viticci: the sad truth of 2015 tech twitter: 70% of my timeline is people complaining about something, every day. optimism is rare thes\u2026',
 u'rt @rbudavari: #vmwarensx nsx 6.2 has just gone live !',
 u'rt @zettagrid: are you an apra supervised organisation? our secondsite dr as a service is safeguarding your peers!',
 u'rt @ramirahim: excited about the launch of our new #juniper design &amp; arch center for the data center',
 u'rt @imaxava: follow-up: 9 september free international i-power workshop #vr4smallbiz',
 u'rt @ibmaustralia: the world #health organisation first precisely mapped outbreaks of smallpox on an ibm system 370 in 1976 #innovation http\u2026',
 u"rt @robincharlton8: - it's all about the numbers",
 u'rt @storagesport: george boole inspired sherlock holmes baddie moriarty, professor claims -',
 u'rt @lvbackup: new e-paper for #ibmi about the differences between using #lto tape &amp; tapeless backup.\u2026',
 u'rt @vrdmn: just blogged: modify sharepoint regional and language settings with jsom and javascript #sharepoint #offi\u2026',
 u'rt @mattein: awesome panel session from ignite - transforming your #sharepoint full trust code to the office app model\u2026',
 u'rt @officemechanics: three important upcoming changes to #office365 presented by julia white',
 u'rt @officemechanics: how to use #azure ad for unified and secure login to thousands of #cloud apps and services',
 u'rt @shinjikuwayama: \u2018the onion\u2019 has a \u2018vice\u2019 parody in the works called \u2018edge\u2019',
 u"rt @shinjikuwayama: a 2004 email from jeff bezos explains why powerpoint presentations aren't allowed at amazon",
 u'rt @bradjensen: sending impromptu messages on the ibm i #ibmi',
 u"rt @sunfoundation: we're looking for an intrepid software developer to help expand our work with government #opendata. apply today! http://\u2026",
 u'eating habits are contagious.',
 u'booking shows for next year and my irish friend, ian cleary, is on the list.  if you do not know who this is then...',
 u'i posted a new photo to facebook',
 u'content should ask people to do something and reward them for it. \u2013 lee odden #content #marketing #podcasting',
 u'be where the world is going.  beth comstock  #podcast',
 u'@recruiterguy81 @carolesanek and the fact that a former partner actually had the flipping nerve to call me obnoxious.  me????',
 u'make your marketing so useful people would pay for it. - via @jaybaer #podcast',
 u'i recorded my last podcast of the year today - actually it was a redo of one that failed due to a recording...',
 u'your brand is a story unfolding across all customer touch points. \u2013 jonah sachs #podcast',
 u'social media is about the people. not about your business. provide for the people and the people will provide for you.\u2013 matt goulart',
 u'content: there is no easy button. - scott abel #podcast',
 u'carole here to say my company (and i) recently were awarded being in the top 10 of marketing/pr companies in the...',
 u'a dynamic \u201cbutterfly\u201d -  over coffee\xae  celebrates #podcastday with "crazy marketing lady" @crazymktgladies',
 u'we have a closed group on fb for marketing conversations no self-promos pls srch &amp; ask to join the padded room #entrepreneurs',
 u'we\u2019re all learning here; the best listeners will end up the smartest. \u2013 josh bernoff #podcast',
 u'content should ask people to do something and reward them for it. \u2013 lee odden #content #marketing #podcasting',
 u'we need to stop interrupting what people are interested in and be what people are interested in.  craig davis #podcast',
 u'i posted a new photo to facebook',
 u'inspiration doesn\u2019t respond to meeting requests. you can\u2019t schedule greatness.  jay baer #podcast',
 u'social media is a contact sport.  \u2013 margaret molloy #podcast',
 u'social media is about the people. not about your business. provide for the people and the people will provide for you.\u2013 matt goulart',
 u"just discovered john rowley &amp; francis rubin's great book on #intdev #consulting is free at thanks @oxfamgb !",
 u'loved the hc3 innovation webinar 11: transmedia and entertainment education. thanks to all the presenters: it was really useful.',
 u"amazing. take a look at this, anyone who's interested in #london and #housing",
 u'rt @ovalhouse: 1966: and so it begins with people show 1 @peopleshowltd #50tweetsfor50years',
 u'rt @un_satisfactory: in expatistan it\'s cirrhosis. mt \u201c@amazing_maps: the disease most likely to kill you by country."\n-\u2026',
 u"wow. @begumnadiya, anne-marie duff and @cerysmatthews all in one spot for @wateraiduk's #deliverlife appeal",
 u'fascinating talk from rob burnett of @awelltoldstory talking about shujazz on  #transmedia #storytelling #c4d #bcc',
 u"didn't feel like wrapping the christmas presents. realised have no decorations.",
 u'@neureekie if only! will be watching out for your 2016 festivities.',
 u'my kind of motivational quote: success consists of going from failure to failure without loss of enthusiasm. w churchill #mondaymotivation',
 u'oh to be in scotland on december 17th!',
 u'thanks for sharing @solemu !',
 u"surely isn't a variation on anti-homeless spikes outside @mariecurieuk in islington? people used to sleep there.",
 u'rt @doyle_kevin: softball piece by @forbes on cambodia commerce minister sun chanthol (so soft it should be flagged as advertorial) https:/\u2026',
 u'@hollyferg  #benjaminclementine ticket is on twickets at',
 u"@melsanson  hi mel, i've just put one on twickets -",
 u'rt @versobooks: isis expert patrick cockburn on air strikes in #syria: wishful thinking &amp; poor information https://\u2026',
 u'@hollyferg the ticket is yours! can you pm me your phone and we can arrange how to meet?',
 u"@hollyferg if my boyfriend doesn't recover from his sickbed it will be yours - cover price \xa318.50.",
 u'@hollyferg i might have one. will know tomorrow. are you still looking?',
 u'not happy w/ what they did w/ #redtornado in #supergirl tv show (appearance, lack of speech, a villain punching bag to hot head #supergirl',
 u"it's crazy how the #legofriends sets are so darn good while #legocity (which is mostly police, firefighting, &amp; construction) lack variety \U0001f615",
 u'why am i so complacent about petty chores?',
 u"not all degrees r created equal so don't compare the value of #stem degrees 2 that a #politicalscience's\n#nationaloffendacollegestudentday",
 u'#afol #legocity release date of 60117 (van + caravan) january 1st, 2016',
 u'#lego set 60117 (van + caravan) is officially listed on their website. it will be available on january 1st, 2016. #legocity',
 u'#mickeyrourke &amp; and the #bogdanov twins are *not* the same person',
 u'a #genderbender #cosplay of #scorpion or #subzero please! \U0001f600',
 u'the more i look at the pic of #lego60117 the more i have my doubts about purchasing it. i will wait until i see a review of it on youtube',
 u'i am in a \U0001f363\U0001f371 kinda mood.\U0001f600',
 u'@the_aznphenom it had to happen at one point \U0001f62d',
 u'3z 3z 3z',
 u'#draymondgreen = 2016 #nbaallstar ?',
 u'devising a #legomoc of my childhood apartment.',
 u'curry career 3 count = 1,316 #nba #3isgreaterthan2',
 u'close game between #dubnation &amp; #celtics',
 u'high likelihood #dubnation goes 24-0 after beating #bostonceltics 2nite',
 u'congrats to #kevindurant &amp; #steveblake for passing the 1,000th mark for career three pointers made. #nba #3isgreaterthan2',
 u"oh yes it's been a glorious day!",
 u'#tmnt2 coming out in 2016 \n\ni will probably not see it \U0001f612',
 u"l&amp;#x27;observatoire du nanomonde est sorti de presse! les unes d'aujourd'hui via @_veilleactu",
 u'rendre l\u2019intelligence artificielle accessible \xe0 tous, \xab\xa0une fa\xe7on de se blanchir\xa0\xbb pour les entrepr... | @scoopit',
 u"l&amp;#x27;observatoire du nanomonde est sorti de presse! les unes d'aujourd'hui via @elcp72 @bigmitchagency",
 u'robotique : l\u2019av\xe8nement des drones collaboratifs | @scoopit',
 u'les led flexibles \xe0 nanofils ouvrent la voie vers des \xe9crans pliables | @scoopit',
 u'la nasa et google battent un record de vitesse avec leur ordinateur quantique - le monde informatique | @scoopit',
 u"l&amp;#x27;observatoire du nanomonde est sorti de presse! les unes d'aujourd'hui via @nanotech_news",
 u'offrez un toit aux migrants avec la vie - diaporamas - la vie | @scoopit',
 u'l&amp;#x27;observatoire du nanomonde est sorti de presse!',
 u"l&amp;#x27;observatoire du nanomonde est sorti de presse! les unes d'aujourd'hui via @_veilleactu @neuinfo",
 u'l&amp;#x27;observatoire du nanomonde est sorti de presse!',
 u'des robots qui apprennent en imaginant gr\xe2ce aux r\xe9seaux de neurones - h+ magazine | @scoopit',
 u'rdc : th\xe9r\xe8se izay et son robot 100 % \xab made in kin \xbb - | @scoopit',
 u'troubles de la m\xe9moire : les robots de compagnie au secours des patients | @scoopit',
 u'des robots voient \xe0 travers les murs en se servant du wi-fi | @scoopit',
 u'dans 4 ans, la disruption viendra de la biosynth\xe8se et de l\u2019intelligence artificielle | @scoopit',
 u'robot : il peut \xeatre votre ami. a certaines conditions | @scoopit',
 u"les humains peuvent ressentir de l'empathie pour les robots | @scoopit",
 u"l'universit\xe9 de t\xe9h\xe9ran pr\xe9sente son nouveau robot humano\xefde - h+ magazine | @scoopit",
 u"l&amp;#x27;observatoire du nanomonde est sorti de presse! les unes d'aujourd'hui via @neuinfo",
 u'rt @personalboards: sea shells cutting board ~ bottom text | words with boards',
 u'rt @customwoodboard: happy hour cutting board and serving board',
 u'rt @personalboards: trivet - petula the pig | words with boards',
 u'rt @personalboards: trivet - pippin the pelican | words with boards',
 u'rt @customwoodboard: personalized seashells cutting and serving board',
 u'rt @personalboards: large horizontal cutting board ~ top text | words with boards',
 u'rt @musiccloudpromo: i will give you 10,000 soundcloud plays  for $5 via @mycheapjobs_',
 u'rt @musiccloudpromo: 10,000 soundcloud plays  for $5 via @mycheapjobs_',
 u'rt @customwoodboard: pet leash board',
 u'rt @personalboards: trivet - rojo the rooster | words with boards',
 u'rt @teamcarma: motorists turn to ride-sharing apps: (via @nbc12)',
 u'rt @susanshaheen1: could self-driving cars spell the end of ownership? #dmo2015 #sharedmobility #sharingeconomy htt\u2026',
 u'@dimejosh ahhhh josh, so sorry dude. i have no words',
 u'rt @susanshaheen1: "a holistic guide to shared mobility" on @linkedin #sharedmobility #dmo #carsharing\u2026',
 u"rt @sosventures: we\u2019re now sosv\u2014the accelerator vc! follow us at @sosvvc and sean o'sullivan at @sean_sosv.",
 u'@sean_sosv @sosventures @sosvvc nice one s',
 u'rt @jebbush: donald trump is unhinged. his "policy" proposals are not serious.',
 u'the most active vcs in the internet of things and their investments in one infographic via @cbinsights',
 u'@sugrue @garyvee pretty good questions',
 u'rt @susanshaheen1: carsharing services take advantage of mobile tech says new report; #dmo2015 #sharedmobility http\u2026',
 u'how does your leadership team rate? \u2013',
 u'@ryanbethencourt can you dm me your e-mail address can seem to find you anywhere ;(',
 u"a former wall street rainmaker says running a bank is a 'breeze' compared with a startup via @sfgate",
 u'rt @susanshaheen1: .@blablacarbr moves into brazil; #dmo2015 #ridesharing #p2p #sharedmobility #sharingeconomy http\u2026',
 u'let\u2019s #go100percent clean energy at the #paris climate summit. sign &amp; join the global #climatemarch',
 u'rt @susanshaheen1: carsharing service membership will grow to 26 million worldwide in 2020 says new report; https:/\u2026',
 u'rt @susanshaheen1: how transportation planning is stuck in the past + new report; #sharedmobility #dmo2015 #p2p htt\u2026',
 u'rt @psteinberg: 10 carpooling apps to make you jump with joy this weekend in san francisco that aren\u2019t\xa0uber via @te\u2026',
 u'rt @leahthehunter: the revols team presents today at @haxlr8r. their kickstarter launched yesterday. in 15 hours,  $300k+ raised. yep. http\u2026',
 u'interesting thing?  out of 1059 shares not a single twitter share',
 u'datazenit beta v0.9.26: the biggest update so far',
 u'a scala view of rust',
 u'we have added selectable rows and a few handy keyboard shortcuts to sensei grid v0.3.14',
 u'new beta release - datazenit v0.9.25. sortable data grid, improved row duplication and more',
 u'rt @mnbbrown: check out @datazenit for a psql and mysql query tool. looks very promising @pjlaurie @smitec.',
 u'sensei grid v0.3.11 released',
 u'rt @mnbbrown: @lauriswat @datazenit loving it so far. fav thing: keyboard shortcuts. ctrl+enter to run query. csv export + ability to cance\u2026',
 u'datazenit now has a separate windows package - download from datazenit store',
 u'latest datazenit beta v0.9.24 \u2013 team view, user invites, option to execute text selection in query editor',
 u'@rabbitonweb @purelyfunc yeah, we wanted that too, but sadly twitter polls only allow two choices.',
 u'rt @livet0ski: @datazenit looks interesting. will the tool be open source?',
 u'rt @ja_tomasz: @datazenit depends. if i am starting a project from scratch and i see a non-relational way to implement my models i tend to \u2026',
 u'poll: sql or nosql?',
 u'chaos tomb: visualizing gameplay with d3.js and sql',
 u'@livet0ski some parts of datazenit are already open sourced, e.g., the data grid component is available on github. more coming later on.',
 u'introducing datazenit community',
 u'postgresql locking revealed',
 u'datazenit beta release v0.9.23 \u2013 fixed layout, function support in query builder and more',
 u'you can now use an integrated feedback button inside datazenit to submit bugs, issues and feature requests #beta',
 u'rt @tilakn: @datazenit looks certainly interesting. is connecting oracle on the cards ? just subscribed for beta.',
 u'\u201cfuture: that period of time in which our affairs prosper, our friends are true &amp; our happiness is assured.\u201d #quote',
 u'the #sec will allow #stocks to be issued and traded on internet via #blockchain #finance\n',
 u'#fireeye flamed: single email allows total network access #cybersecurity #vulnerability\n',
 u"police found a suspect in the hack of millions of kids' toys #hack #iot #cybersecurity\n",
 u'do i need to register my drone w/ the faa? check this list @wired #drone #law #tech\n',
 u'vdi comes to the raspberry pi w/ 1080p video #pi #tech #vdi  -',
 u"@mhasib @evanderburg let's hope passwords are gone even faster!",
 u'eighty percent of success is showing up. \u2013woody allen #quote',
 u'ho ho hosed: asian biz malware pwns air-gaps, thousands of #android s #malware #mobile\n',
 u"beware pandora jewelry 'clearance' offers #phish #cybersecurity\n",
 u'survey: passwords will be eliminated within 10 years #cybersecurity #infosec\n',
 u'mackeeper breach exposes 13m apple users #breach #cyberscecurity\n',
 u'toyota corolla hybrid car hacked via smartphone #hacked #cybersecurity #cars #mobile\n',
 u'european space agency hacked. staff and subscribers\u2019 data exposed #hacked #eu #cybersecurity\n',
 u'@lmbrownlee1 iot has and will open up many more opportunities for compromise!',
 u'@lmbrownlee1 lisa, sure. the holes are numerous. one of many. some are easy to take advantage of, others require more skill.',
 u'@lmbrownlee1 when it stops happening, i will be thinking, something else must happening behind the scenes, just currently hidden from sight',
 u'eighty percent of success is showing up. \u2013woody allen #quote',
 u'ho ho hosed: asian biz malware pwns air-gaps, thousands of #android s #malware #mobile\n',
 u"beware pandora jewelry 'clearance' offers #phish #cybersecurity\n",
 u"rt @mikael_moeslund: exciting times ahead for easi'r, busy day today in prague... #startup",
 u'rt @mikael_moeslund: as an experienced night owl, i know for a fact that my productivity peaks both early mornings and if i work at night h\u2026',
 u'rt @mikael_moeslund: #ready, #set and #go. multiple phases defined and phase 1 due date set. let the #project commence. #autotech #crm http\u2026',
 u'rt @mikael_moeslund: yet another exciting day waiting ahead... this magic monday is travel day! #startuplife #entrepreneur #excitingday htt\u2026',
 u"rt @mikael_moeslund: making it easy to compare the #capabilities of easi'r and how it has been designed to help build and grow #relations h\u2026",
 u'rt @mikael_moeslund: we need to have the right people -and the right number of people - to keep our #business #growing. happy to welcome ye\u2026',
 u'rt @mikael_moeslund: the first half-year 2016 #roadmap of awesome #features are scoped and sprints have started. #crm #autotech #ontime htt\u2026',
 u"rt @mikael_moeslund: planning. we're considering to integrate easi'r into a world leading dealer sales tool. 100 importers. 30 countries. h\u2026",
 u"rt @mikael_moeslund: #awesome. 570 new dealers and 3500 users on #crm. only 0.08% have inquired for 1to1 training.\nthe #power of easi'r ! h\u2026",
 u'rt @mikael_moeslund: i reflect on years of milestones, for myself, my family, and my #startup. my kids made me a better #entrepreneur https\u2026',
 u'rt @mikael_moeslund: spending time with friends in new offices, to jointly create a game changer for the automotive industry #autotech http\u2026',
 u"rt @mikael_moeslund: getting #ready. an essential guide to understanding the dynamics of a startup's board of directors #startuplife https:\u2026",
 u'rt @mikael_moeslund: cold starting myself.  -11 degrees celcius, 05.31am, me and my mountainbike in the forrest heading for work #startup h\u2026',
 u"rt @mikael_moeslund: understanding the true power of easi'r. comparing dealersocket with easi'r. #crm #funday #disruptivechange https://t.c\u2026",
 u'rt @mikael_moeslund: today we signed a data security agreement to manage &gt;3 million customers for a new client of ours. ready, steady, go h\u2026',
 u"rt @mikael_moeslund: our easi'r built-in artificial intelligence feature changes the way dealers interact with consumers #disrupt #crm http\u2026",
 u'rt @mikael_moeslund: with autonation - the largest u.s. retailer of new cars, trucks and suvs. 285 dealers sell &gt;335.000 #autotech #crm htt\u2026',
 u'rt @mikael_moeslund: amazing day with ford in peoria, phoenix az #autotech #crm',
 u'rt @mikael_moeslund: looking forward to meet top-performing dealerships in the u.s. #autotech #crm #gowest #automotive\u2026',
 u'rt @mikael_moeslund: this week people were brought together around one table. with one aim. winning the uk market for #automotive #crm http\u2026',
 u'the art of data science: the skills you need and how to get them\n',
 u'\U0001f4ddwe need bloggers. shoot an email if interested and tell us how truly data-driven are you. any questions, just ask!',
 u'rt @inc: to grow its platform, slack is handing out $80 million to developers',
 u'how #data is vital to ensuring effective #engagement',
 u'the adoption of data science in companies like uber, netflix and amazon \n',
 u'with governments collecting so much data, at least we should get useful apps\n',
 u'insights you weren\u2019t expecting from big data. how do you gain for exponential growth?\n',
 u'rt samuel_wong_: still test driving ibm watson analytics - take 3',
 u'rt marketingmag: the star wars effect in full force this christmas',
 u'comparing 7 python data #visualisation tools',
 u'we need a little help for a simple experiment. could you please "google datafying digital"? thanks :)',
 u'how you can use big data in your small business. yes, it is personalised email marketing.',
 u'10 tips for beginning your big #data journey - an analytics culture is very important.',
 u'how you should explain big data to your ceo \n',
 u'programmatic ad spend in japan reaches mobile tipping\n',
 u'rt slideshare: chief data scientist chrishwiggins discusses his role and the importance of data at the nytimes:',
 u'rt @inc: to grow its platform, slack is handing out $80 million to developers',
 u'rt teradata: 5 ways to create and nurture customer loyalty',
 u'rt datafloq: how big data &amp; advanced analytics will change transportation - by gocloudmoyo \u2026',
 u'join us on #facebook for more data-driven #insights without the 140 character limit.',
 u"rt @alexheid: the university that broke the dark web is still running tor nodes - but it's not what it appears via \u2026",
 u'rt @ep_justice: #eudatap: @ep_justice to vote on #dataprotection reform (regulation &amp; directive) on 17 december, strasbourg. agenda https:/\u2026',
 u'rt @weltwoche: aktuelle ausgabe: \xabich k\xe4mpfe bis zum schluss\xbb - svp-s\xfcnneli \xfcber der romandie - \xabdie galaxie z\xe4hlt auf uns!\xbb\u2026',
 u"rt @androidpolice: google pulls nowsecure's android vulnerability scanner from the play store",
 u'rt @eucourtpress: #generalcourt annuls \u20ac790m fines imposed on airline companies for air cargo cartel',
 u'rt @malmstromeu: eu and @wto partners finalise high tech trade deal #ita #mc10nairobi /jl',
 u"rt @electoralreform: rt if you think mps were wrong to block @jreynoldsmp's proportional representation bill in the commons today",
 u"rt @ernestmaragall: we've sent a letter to @junckereu demanding transparency in the distribution of the \u20ac3bn aid package to #turkey https:/\u2026",
 u'how nice of europol help #teamjunker clean up luxembourg of stolen cars #luxcar #opblueamber',
 u'rt @akamai_soti: the @akamai q3 2015 state of the internet report is now available - get the latest info on connection speeds &amp; more: https\u2026',
 u"rt @torservers: our @tor2web gateway is back online now. hopefully the ddos won't return. can you sponsor a location for additiona\u2026",
 u'rt @torservers: our @tor2web gateway is under ddos and has to be nullrouted for a while (12 gbit/s). sorry for the downtime.',
 u'rt @twittereng: the design, architecture, and implementation of twitter\u2019s failure testing framework:',
 u'rt @icomofficiel: "we are standing by libya to make the future of this country better." #icomredlist @statedept representative now speaking\u2026',
 u'rt @cbwevents: report on first day at the biological weapons convention meeting of states parties posted\n #bwcmsp201\u2026',
 u'rt @bgpstream: bgp,hj,hijacked prefix as1200, amsterdam internet excha,-,by as18403 the corporation for fina,\u2026',
 u'rt @dannyshawbbc: computer virus hits a number of bbc reporters (including me) originating it seems from a hack attack on pr team behind ma\u2026',
 u'rt @jpmens: \u201ccell phone based tampon monitoring system\u201d, us patent #internetoftampons',
 u'rt @supertrack_it: nice meteor echoes from graves radar.   #geminidsmeteorshower #meteorshower #geminids #hamradio',
 u'rt @sgi_corp: our latest blog looks at how sgi is supporting the dod\u2019s hpcmp initiative #hpc #afrl\u2026',
 u'rt @exasolag: #career #opportunity at exasol: \u2013 we are looking for a #senior operations #engineer. #itjob https://t\u2026',
 u'rt @mrcfabbri: .@birstbi and @exasolag partner to deliver enterprise bi &amp; discovery at unprecedented speeds',
 u'rt @michaelvanlaar: learn everything about the technology of the world\u2019s fastest in-memory #database:\u2026',
 u'rt @exasolag: .@mjasay explains four ways #data visualization makes #bigdata easier:',
 u"rt @martha_bennett: and yesterday's key announcement: @birstbi &amp; @exasolag partnership. good move for both. #analyt\u2026",
 u'rt @humwin: birst and exasol partner to deliver enterprise bi and governed discovery at unprecedented speeds\n\n',
 u'rt @exasolag: oracle says forget about benchmarks. @danwoodsearly tells you why you shouldn\u2019t:\u2026',
 u'rt @jasminesatdell: exasol produces &amp; distributes its imdb database appliance globally by partnering w dell oem services #iwork4del https:/\u2026',
 u'rt @exasolag: customers opt for dedicated #analytics as they tire of big vendors. interview with @exagolo https://t\u2026',
 u'rt @costello66: exasol distributes its imdb database appliance globally by partnering w dell  #iwork4dell https://t\u2026',
 u'rt @bigdataprofiles: birst and exasol partner to deliver enterprise bi and governed discovery at unprecedented speeds\u2026',
 u'rt @exasolag: 10 technologies shaping the future of manufacturing: predictive #analytics leads the way \u2013 \u2013 by @loui\u2026',
 u'rt @carstenbange: .@birstbi and @exasolag partner to complement cloud data source with a fast in-memory database',
 u'rt @kesbutters: big data and business intelligence:\xa0 #bigdata #businessintelligence',
 u'rt @birstbi: .@birstbi to embed @exasolag in-memory #analytic database w/its pioneering networked #bi\u2026',
 u'rt @automationblues: automationblues is out! stories via @andreasrades @exasolag @jabawack',
 u'rt @nuevalor: rt aleonardrau: inspired by exasolag #supplychain #management #infologis #expertforum\xa0\u2026',
 u'webinar: how to guarantee business success with fast data analytics',
 u'rt @birstbi: .@birstbi partnership with @exasolog delivers enterprise-caliber performance &amp; scalability. https://t.\u2026',
 u'rt @vivekranjandixi: exasol produces &amp; distributes its imdb database appliance globally #iwork4dell\u2026',
 u"'tis the season. it's nearing that time in december. consider something nifty from gmo free usa. you can help...",
 u'mark ruffalo goes hulk on monsanto ceo, hugh grant. after the interview that hugh grant gave to cbs this morning...',
 u'could gmos be behind your digestive problems? the incidence of leaky gut is on the rise since gmos were...',
 u"action alert: you're making a difference! we've reviewed the omnibus spending bill published last night, and it...",
 u'yet another report - this one from spain - proves that gmos are a fraud. yields are no better, insect resistance...',
 u'$10 million challenge to monsanto: \u201cif monsanto can disprove the fact that there are no safety assessment...',
 u'syngenta has sued top u.s. grain exporters cargill inc. and archer daniels midland co. over losses that u.s....',
 u'bernie sanders had something to say about monsanto at a recent town hall meeting: "monsanto is a very, very...',
 u'breaking: taiwan\u2019s government has passed amendments to the school health act that ban the use of genetically...',
 u"if you're looking for something for your elementary school age child's teacher, look no further. kk's cupcakes is...",
 u'action alert: call your u.s. senators and representatives and tell them: no sneaky riders in spending bills. we...',
 u'breaking news: city of montr\xe9al, canada, bans neonicotinoid\ninsecticides linked to mass bee decline. montr\xe9al has...',
 u'danish pig farmer, ib pedersen, found that the incidence of birth defects &amp; other health problems went way down...',
 u'henry i. miller - another sneaky gmo shill and operative that works to defame public scientists in the interest...',
 u'weed killing robot developed by bosch could put an end to the use of roundup and other carcinogenic herbicides in...',
 u'while these gmo chickens were not approved for human consumption and we applaud the treatment of lal deficiency,...',
 u'australian women strip to protest roundup spraying: a group of north coast women have put their bodies on the...',
 u'scientists warn of dangers of multi-toxin gmo bt crops: hazards to non-target insects and mammals from...',
 u'after we saw the demise of a potential buyout of syngenta by monsanto, a new merger deal between agrichemical...',
 u"join gmo free usa on ts\u016b... a growing social media platform that shares 90% of its revenue with users. here's...",
 u"the predict conference 2016 is on the 4-5 october. early, early-bird tickets are on sale now! no's are limited, so book early! #predictconf",
 u'the predict book has arrived! \n2015 attendees: check your email for the ebook link. \n#predictconf',
 u'our ceo @cronanmcnamara is featured in @siliconrepublic\'s "10 experts unlocking the secrets of big data" article',
 u'register for free for @brighttalk webinar: democratisation of data analytics by @cronanmcnamara on 20th jan',
 u"don't forget to register for our free predict webinar hosted by acia @aon_plc at 2pm using this link: #dataanalytics",
 u"our delorean went down well at #predictconf, unfortunately we couldn't get a hoverboard this year.\n#backtothefuture",
 u'we created some great videos for you from predict 2015. check them out here: #predictconf',
 u'register for our free predict webinar, hosted by @aon_plc. the webinar takes place at 2pm today: #dataanayltics',
 u'register for our free webinar on predictive analytics, hosted by @aon_plc: the webinar takes place at 2pm tomorrow!',
 u'@porcelinad shares tips for people working with data visualisation at predict 2015: #predictconf',
 u'dr. nora khaldi @nuritasresearch gives tips to new companies on making data driven decisions: #predictconf #data',
 u'check out the latest videos featuring some of the amazing speakers at predict 2015: #predictconf',
 u'mark brosnan @xanadu_ie shares his data tips at predict 2015: #predictconf',
 u'rt @sandrinepigat: fascinating talk on ibm watson: it can read and understand 200 mio pages in 3 seconds. #efsaexpo2015 #openra @cremegloba\u2026',
 u"will data journalism replace traditional journalism? @porcelinad from @irishtimes says no &amp; here's why: #predictconf",
 u'watch this short video &amp; discover how @nuritasresearch uses data to help their customers make decisions #predictconf',
 u'at #efsaexpo2015 - should we run a poster session like this at #predictconf 2016?',
 u'check out our new #predictconf media section featuring pictures and articles from predict 2015:',
 u'read the latest @cremeglobal newsletter featuring highlights &amp; pictures from the #predictconf:',
 u"rt @cremeglobal: ''data analytics is at the forefront of the third industrial revolution''. read our highlights from the #predictconf http:\u2026",
 u'@monstercat96 fashion is beautifull! i wish to participated in it!',
 u'@c_thoughts fashion is beautifull! i wish to participated in it!',
 u'rt @fashionmusic: unbelievable! this beauty goes on sale \u203c\ufe0f amazing, couture, handmade gown ! absolutely one of a kind fashion... https://t\u2026',
 u'"we are all part of the ultimate statistic -- ten out of ten die."',
 u'@catwalkmodels_ fashion is beautifull! i wish to participated in it!',
 u'@dennisbasso fashion is beautifull! i wish to participated in it!',
 u'"warning: you have entered a tact free zone."',
 u'@shessopoised fashion is beautifull! i wish to participated in it!',
 u'rt @mpdesignerwear: #uksopro #flockbn #xmas #gifts #party #dresses #handbags unbelievable prices #queenof #fashion',
 u'"everybody wants to go to heaven, but nobody wants to die."',
 u'@sergeo_london fashion is beautifull! i wish to participated in it!',
 u'"last night i lay in bed looking up at the stars in the sky and i thought to myself, "where the heck is the ceiling?!""',
 u'@andersenhenrik5 fashion is beautifull! i wish to participated in it!',
 u'@carlgriggsv9kt fashion is beautifull! i wish to participated in it!',
 u"rt @lukeyw10: my love for @burton_menswear is unbelievable can't wait to go shopping there next week\U0001f60d\U0001f44c\U0001f3fc\U0001f6cd! #burtonforlife #fashion #love",
 u'"going to church doesn\ufffdt make you a christian any more than standing in a garage makes you a car."',
 u'@asktnd fashion is beautifull! i wish to participated in it!',
 u'"the only way to keep your health is to eat what you don\ufffdt want, drink what you don\ufffdt like, and do what you\ufffdd rather not."mark twain"',
 u'@joanna__simpson fashion is beautifull! i wish to participated in it!',
 u'rt @diamondstone133: unbelievable!!! ~african fashion, ankara, kitenge, african women dresses, african prints, african me\u2026',
 u'experts doubt google\u2019s claim about its quantum computer\u2019s speed',
 u'50 years of #datascience \n #machinelearning #bigdata',
 u'the university of cambridge to build a new artificial intelligence research centre - biztekmojo',
 u'#bigdata, #iot and #artificialintelligence will drive digital transformation for the balance of this decade.',
 u'mom/daughter mstrpice. wonder if #deeplearning cld cr8 smthng sensible on top of scribbles?',
 u'ibmvoice: how new technology helps blind people explore the world #robotics',
 u'how to analyze a nuclear bomb #physics #nuclear',
 u'@noelinel glad to be connected!',
 u'the big boys open source their #machinelearning frameworks @google @facebook @microsoft @intel @ibm @linkedin',
 u"18 rsrchrs rvl the mst imprssve thng they've seen. #artificialintelligence #machinelearning",
 u'#artificialintelligence aims to make @wikipedia friendlier #datascience',
 u'#artificialintelligence takes to the racetrack: driverless cars get their own racing series\n',
 u'@ibm with @apachespark support. \n #datascience #analytics',
 u'@phaseit absolutely. bringing that education to the masses is my primary passion.',
 u'@phaseit agreed. the point is to not confuse large scale data collection and management (engineering) with discovery (science).',
 u'how is #datascience different from #analytics in general?\n @quora #machinelearning',
 u'my @quora answer to how is #datascience different from #analytics in general?',
 u'engineers create droid that could replace firefighters, soldiers and bomb disposal experts. \n',
 u'#iot won\u2019t work without ai. let the machines reveal the opportunities. #machinelearning',
 u"finishing touches being put on the world's biggest telescope #space #science",
 u'@nate_lacombe sure, what is your involvement?',
 u'@doodlebug0 is it easier to believe something when people put up million$ for you to believe it?',
 u'every era needs its myth cycle. star wars is ours | matthew d\u2019ancona',
 u'hello everyone, have a great wednesday! you and your tweets are important :) (insight by',
 u'@squirestocks thanks for sharing squire stocks, have a great wednesday :) (insight by',
 u'cruz campaign credits psychological data and analytics for its rising success',
 u'thanks for the retweets this week @shakamunyi @blog4forall @rad_reza much appreciated! (insight by',
 u'@mostafaelzoghbi thanks for sharing mostafa elzoghbi, have a great wednesday :) (insight by',
 u'how data analytics is shifting politics',
 u'@mspowerbi thanks for sharing power bi, have a great wednesday :) (insight by',
 u'thanks @morningstarusmc @paul_turley @goappsforce for being top engaged community members this week :) (insight by',
 u'@microsoftwomen thanks for sharing microsoft women, have a great wednesday :) (insight by',
 u'@_fereydouni thanks for sharing mahmoud fereydouni, have a great wednesday :) (insight by',
 u'thanks for the retweets this week @homeaiinfo @faruk_ne @dl_ally @hwm_tech much appreciated! (insight by',
 u'dinosaur blood vessels survived 80 million years without fossilizing',
 u'christmas 2020. will big data and iot change things for father christmas? part ii | splunk \u2026',
 u'@_fereydouni thanks for sharing mahmoud fereydouni, have a great monday :) (insight by',
 u'thanks @kekline @thebattlelist @blog4forall @jamie_dixon for being top engaged community members this week :) (via',
 u'@azure thanks for sharing microsoft azure, have a great monday :) (insight by',
 u'@windows thanks for sharing windows, have a great monday :) (insight by',
 u'follow our new handle @odsc for all your #odsc information!',
 u'@marvivilla_b thanks for following me marvi! hope you like my tweets!',
 u"@mrpaulbola thank you paul for following me! any cool projects you're working on?",
 u'@debruning thanks for following me david! hope you like my tweets!',
 u"@vadim_efimov943 nice to e-meet you, free, how's life?  :)",
 u'google vs. uber and the race to self-driving taxis | the verge',
 u"@christi_mays nice to e-meet you, christi, how's life?  :)",
 u"@fu6jq thanks for following me!  hope you're having a great week :)",
 u'my little mojave bee is lookin so pretty &lt;3 just the best pet rock ever \n#snake #ballpython\u2026',
 u'@socialmatterz thanks for the follow, socialmatterz, nice to e-meet you!',
 u'@mcbreenmedia thanks for the follow - nice to e-meet you!',
 u"@valeotranslates thanks for following me!  hope you're having a great week :)",
 u"@michael81082 thank you for following me - hope you've been having a good day!",
 u'@jamespero thanks for retweeting me, james!',
 u'strange but true - #marketing lessons i learned from lizards: by @tyrannosaurjen',
 u"facebook's instant articles arrive on android by @jonfingas",
 u'10 successful women share their morning routines | mydomaine',
 u'@travel_thelife thanks for the follow - nice to e-meet you!',
 u"@rileymunns thanks for following me!  hope you're having a great week :)",
 u'microsoft rebrands bing pulse and integrates with skype, azure and onenote by @tweets_amanda',
 u"@latinomogul nice to e-meet you, mex, how's life?  :)",
 u'delivering a strong #ipcomm service starts off with the right technology! &gt;&gt; #customersatisfaction',
 u'overcome the 5 main objections many #smbs have to upgrading their #communications systems&gt;&gt;',
 u"here's why everyone can benefit from #semisynthetics, but there are disadvantages as well &gt;&gt; #cnc",
 u'[stat of the day] #humanerror is the cause of over 52% of #securitybreaches and you have to be ready &gt;&gt;',
 u"here's why #adobeflash is one of the biggest sources of #cyberattacks &gt;&gt;",
 u"news flash: #sbcs provide benefits that you can't afford to ignore. here's why&gt;&gt;",
 u'check out some big corporations who had major #databreaches in 2015 &gt;&gt; #ashleymadisonhack',
 u'as #smbs are upgrading aging tdm systems, should #vars adapt accordingly?',
 u'do your #salesreps convey a sense that a high #pricepoint speaks to high value?',
 u"if you don\u2019t have a #telecom reporting system, you\u2019re playing with fire. here's why &gt;&gt;",
 u'#cpq solutions come into their own | #sales',
 u"70% of #smbs expect to embrace #ipcomm over the next couple of years. here's why&gt;&gt;",
 u'#coolant is essential to maintaining high surface quality &amp; extending #toollife &gt;&gt;',
 u'times have changed: why is it suddenly so easy to buy #mobilephones online? &gt;&gt;',
 u'how to ensure the most cost-effective process for winning deals \xbb',
 u'tips for #machining with confidence: keeping your #coolant clean',
 u'here are just a few problems that #sbcs can fix &gt;&gt;',
 u"here's how to prepare for the growing #cyberthreat landscape #datasec",
 u'what mobile offerings best drive #mobileapp usage &amp; engagement during the #holidayshopping bonanza?',
 u'check out the major #databreaches of 2015 &gt;&gt; #cybersecurity',
 u'rt @sengineland: apple shuts down topsy &amp; redirects it to ios search help page by @rustybrick',
 u"watch the #twittersmarter hashtag daily. i'm always posting helpful articles &amp; info about twitter.",
 u'@divablueband omg that would be awesome!!',
 u"i love when this song comes on #pandora, brandy (you're a fine girl) by looking glass.",
 u'10 key design trends for 2016 (and how to make the most of them)',
 u'don\u2019t try to be original, just try to be good. \u2014 paul rand #quote #startup',
 u'book a session with me and save $$! my holiday special to you when you use coupon code: holiday2015',
 u'the 6 types of social media content that will give you the greatest value via @neilpatel',
 u'i love the new "your twitter activity!" do you see this on your yet? #twittersmarter',
 u'@cindyosbourne great poll idea!!',
 u'i share twitter tips in the @guidedgoals podcast. #twittersmarter',
 u'remember: rewards come in action, not in discussion.- tony robbins',
 u'understanding the psychology of social networks: secret ingredient to social media success',
 u'yay!! #ggchat',
 u'you may never know what results come of your actions, but if you do nothing, there will be no results. - gandhi #quote',
 u"@andymac71 @getsocial_io thank you for sharing! i'd love for you to join our blab sometime. we talk about twitter marketing. it's fun!",
 u'@andymac71 what a cutie!!',
 u'twitter looks to make money from ads aimed at logged-out users #twittersmarter',
 u'#twittertip: have a great looking twitter profile. nice branded profile image. cool header image. and a compelling bio. #twittersmarter',
 u'7 small-business owners share their best productivity tips (infographic) by @lashandrow',
 u'@nuppolina bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@shaiyanai bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@benpeyrache bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@yuriymamontov64 bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@aando_realtors bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@domains_express bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@rfinney bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@salihsarikaya bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@benrakov bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@jmcbain bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@lifecoach_va bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@ecocagne bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@transformdgtl bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@gabrielobr2 bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@1stworldcomms bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@continuumio #bigdata: 517 cos, $24.9b funding, 10k+ people. stay on top of it"',
 u'@convertro in our #bigdata landscape with 516 others! track them in 5min a day.',
 u'@wijayaliu bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@ananthv9 bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'@neuvoosalcal bigdata: 517 cos, $24.9b invested, 10k+ people. stay on top of it!',
 u'easy way to make money by recycling without any effort + helping our world in the same time \n',
 u"when people say they'd like to be in your shoes, it's usually after the difficult journey is finished. - tim fargo",
 u'declare your independence from worry! get \'s new, life-changing book "worry no more!"',
 u'we need to get our priorities straight thinks',
 u'the only real failure is not trying... jeff clavier',
 u'people demand freedom of speech as a compensation for the freedom of thought which they seldom use. - kierkegaard',
 u'social media blogs jukebox. now available free in the  store.',
 u'we are so busy judging others that we are unable to detect our own flaws.',
 u"what are you doing today to be inclusive of your human family? we're more alike than different. -",
 u'happiness is a decision, you are as happy as you decide to be.',
 u"don't give up...\n\nmt",
 u'you can never be overdressed or over educated. \n\u2015 oscar wilde',
 u'you have your way. i have my way. as for the right way, the correct way, and the only way, it does not exist. \n\u2015friedrich nietzsche',
 u'eggs are for omelets.',
 u'nothing in the world is ever completely wrong. even a stopped clock is right twice a day. \n\u2015 paulo coelho',
 u"don't live down to expectations. go out there and do something remarkable. \u2013 wendy wasserstein",
 u'tim fargo.-',
 u'celebrity quotes jukebox. now available free in the  store.',
 u'how to go from absolute disaster to absolute master in minutes. a chat with  &gt;',
 u'if you can build trust then you can build relations. -',
 u'150+ data science blogs #analytics #datascience #machinelearning',
 u'how amazon redshift opened up #bigdata to #growthhackers, marketers, and other light #quants',
 u'customer success and the evolution of saas metrics: churn, ltv, and cac',
 u'how to increase growth through retention analysis | amplitude blog',
 u'answer 1 question about ecomm insights and get entered into a cupcake giveaway',
 u'cool new site from @becomingdatasci, featuring tutorials, blogs, books, meetups, and tons of #datascience content.',
 u'22 big data &amp; data science experts predictions for 2016',
 u'how to write a content marketing strategy step-by-step [with template]',
 u'how well does nps predict rebooking? - airbnb engineering #datascience #analytics',
 u"the week's most interesting #datascience stories distilled and delivered to your inbox every sunday.",
 u'top companies employing data scientists are microsoft, facebook, ibm, and gsk',
 u'the 7 conferences data nerds shouldn\u2019t miss #datascience #analytics',
 u'from @modeanalytics -- smooth out the noise of daily data with moving averages in sql',
 u'no free hunch, countbayesie, mlwave, and other blogs that data scientists love #datascience',
 u'7 experts weigh in on how you can keep learning #datascience long after college',
 u'customer success and the evolution of saas metrics: #churn, #ltv, and #cac',
 u'18 brilliant examples of holiday marketing campaigns',
 u'3 things to consider for your 2016 it strategy',
 u'16 creative email subject lines to help you restart stalled conversations',
 u'the latest trends in e-commerce technology',
 u"@snafuworld, @jebbush could've prosecuted case against trump, but spent most of his time insisting he was serious/trump not #showdonttell",
 u"@jebbush one response: it is not a serious proposal to say that..that's not a serious proposal. we need a serious leader to deal with this.",
 u'2.2/2.2 @jebbush seriously overran: serious undertaking- serious challenge- serious administration- serious about protecting- serious part',
 u'2/2 @jebbush continued, seriously: lack of seriousness-lack of seriousness-not a serious kind of candidate- serious strategy...',
 u'1/2 @jebbush had a serious problem last night. in order: serious times-not a serious proposal-not a serious proposal-serious leader...',
 u'@journalismjudge many thanks!',
 u'@zuluninerseven many thanks!',
 u"is there a transcript available of the main rep debate last night? can't seem to find one . . .",
 u'@kwcollins well, you do want hillary as commander in chief, yes?',
 u'@kwcollins years ago, had an "editor" change the correct form to the incorrect one before publication.',
 u'@ajungherr @kwcollins creative development great, but there\'s a ton of fg use for "message testing," which is research malpractice.',
 u'@kwcollins @thepollsters nor can focus groups be used to model results for an actual population of interest.',
 u'@kwcollins uh, you mean the activists decide?',
 u'rt @kwcollins: few genres of campaign journalism are worse than "focus groups staged for journalists"',
 u'called it here:',
 u'rt @fbihop: right to rise pac attorneys respond to trump.',
 u'digital to overtake tv ad spending:',
 u'rt @emayfarris: how d.c. spent $200 million over a decade on a streetcar you still can\u2019t ride @txbornviking',
 u'@kwcollins @voxdotcom @dlind indeed, but the "america\'s gun owners" part is confusing . . . definitely need to adjust how that\'s presented.',
 u'maybe because we were arming "good rebels" and ended up arming isis? not hard to see how they make the leap of logic',
 u'rt @kirkdborne: click, zoom and pan in beautiful #d3js dashboards for r: #rstats #dataviz by @plotlygraphs https://\u2026',
 u'rt @kevinaggett1: nuance mix brings voice control to any app, internet of things device - slashgear #iot',
 u'rt @rasbt: "5 important #python data science advancements of 2015" -- was definitely a great year for the python sci-stack :)\n\u2026',
 u"rt @unglobalpulse: we're recruiting! #dataviz #datascience #privacy #datainnovation #unjobs #data4good #nptech http\u2026",
 u'rt @scritchley: crimes against #dataviz - why this @nro global temperature graph is so misleading: #climate #data',
 u'rt @evansinar: great 8-minute pbs video on data visualization, art, and revelation via aeon #dataviz #art https://t\u2026',
 u'rt @loop_creative: interesting article on digital signage #animation #dataviz',
 u'rt @0fjrm0: honeystation attacks today. #dataviz #honeypots #infosec #ciberseguridad',
 u'rt @anchrpoint: a little something from @wonderlusthfx to start your week. #dataviz #infographics #animation https:\u2026',
 u'rt @steffenschmigi: #devices pull data from services or #sensors \u2013 but how to pay for those #data? #iot @balajis ht\u2026',
 u'rt @wtvox: as sensors shrink, wearables will disappear #iot #wearables',
 u'rt @judoclubmestre: rt nalimr stemettes masschallengeuk laurieainley donativeuk #dataviz #patterns #bigdata #siesta #cats #pets #anima\u2026 htt\u2026',
 u'rt @evansinar: 6 powerful reasons why your business should visualize data | maptive #dataviz',
 u'rt @albertocairo: reading: the evolution of a scientific american infographic #dataviz #infographics\u2026',
 u'rt @dataviznews: rise of breweries in america \u2022 /r/dataisbeautiful #dataviz',
 u'rt @epromisglobal: #epromis #erp enables richer #datapresentation by transforming raw #data into #businessinsights &gt;#bigdata #dataviz #busi\u2026',
 u'rt @parthona98: valaafshar : rt cbinsights: the biggest bets on #iot over the past 5 years in one graphic \u2026 https:\u2026',
 u'rt @iotmed: new #webinar : intro to #tigerconnect: secure #messaging apis and use cases for #iot -\u2026',
 u'rt @rpi2bot: my current cpu temperature is 36.9 c - wow i am hot. #uselessfacts are the best facts! #iot',
 u'rt @wtvox: the first exascale computer #iot #wearables',
 u'data warehousing automation adoption becoming mainstream',
 u'the bi conundrum: delivering trust and transparency at speed\r',
 u'rt @mateus__carlos: tous les emplois de demain n\xe9cessiteront d\u2019\xeatre en mesure de d\xe9bloquer l\u2019intelligence des donn\xe9es @satyanadella https:/\u2026',
 u'rt @cmendibl3: top 10 blogs of 2015: machine intelligence to grow your\u2026 #bigdataandanalytics #machineintelligence v\u2026',
 u'evolution of data storage devices 1956-2015 [infographic]\r',
 u'rt @flashtweet: 4\u20e3[#bigdata] #microsoft sur les pas de #google avec son #graph v/@7wdata #flashtweet\u2026',
 u'simplified analytics: machine learning a key to digital transformation !!!\r',
 u'rt @convergytics: big data, business intelligence still top it concerns in 2016\n\n',
 u'microsoft\u2019s graph wants to turn user data into business intelligence it can sell\r',
 u'rt @sas_southafrica: a report on #bigdata evolution from the economist intelligence unit  #sasinsights',
 u'rt @startup124: rt: rt startup124: rt: rt openagio: rt iot_bulk: top 8 trends fo\u2026 https://t\u2026',
 u'4 big reasons why healthcare needs data science\r',
 u'3 things that can stall innovation (and how to overcome them)',
 u'rt @startup124: rt: rt openagio: rt iot_bulk: top 8 trends for #bigdata !!\n#iot #machinelearning #datasci\u2026 https://\u2026',
 u'the 37 best tools for data visualization\r',
 u'4care leverages teradatas social capabilities',
 u'rt @skrobola: 4 reasons why leaders should emulate military intelligence training when dealing with big data',
 u'rt @lpventure: data or intelligence without context isn\u2019t useful - harness the power of your network to understand the context behi\u2026https:/\u2026',
 u'5 key roadblocks to data-driven healthcare\r',
 u'rt @technmanagement: top 8 trends in #bigdata for 2016\n #analytics #iot #machinelearning #datascience #leadership #s\u2026',
 u'#mckinsey buys #formulaone #dataanalytics firm quantumblack:',
 u'how i stopped worrying and found balance in #bigdata:',
 u'what #bigdata was like before the #web: #businessintelligence',
 u'how dish network innovates with #analytics and #uber like #customerservice',
 u'how #bigdata is helping fight #aids in africa: #businessintelligence',
 u'how #businessintelligence has evolved into simplified and accessible platforms:',
 u'cloudy #security: what your advisor doesn\u2019t know about cloud computing could hurt you:',
 u'how to prove the business value of #bigdata:',
 u'tech billionaire michael dell says #bigdata is the next trillion dollar tech industry:',
 u'lack of #bigdata talent hampers corporate analytics:',
 u'hiring top #analytics talent: how leading companies do it:',
 u'fraud #analytics firm #rippleshot raises $1.2 million in funding:',
 u'the continuing evolution of #cloudcomputing:',
 u'ge #cmo: "i\u2019m done advertising on prime-time tv":',
 u'talking #analytics with frank armour of american university\u2019s kogod school of business:',
 u'how to create a culture of #analytics:',
 u'how #cloudcomputing platforms are becoming a catalyst of #globalmanufacturing growth:',
 u"#microsoft integrates #cortana into it's power bi #businessintelligence service:",
 u'#cloudcomputing to spread like the web, says hpe:',
 u"ever wonder who's visit your website? #analytics can help:",
 u'wish list app from target springs a major personal data leak',
 u'rt @jonyiveparody: if you\u2019re buying someone a windows phone for christmas, the best accessory is a gift receipt to make returns easier. htt\u2026',
 u'rt @msvirtacademy: resource manager = a logical way to manage your #azure deployment. learn more: #msmva https://t.\u2026',
 u'progress report: open container initiative via @docker',
 u'mesosphere is proud to support the open container initiative #mesosphere',
 u'is microsoft trying to show value of azure with new windows server 2016 licensing?',
 u'announcing azure resource manager support in azure automation runbooks | microsoft azure blog via @azure',
 u'rt @bcdady: #powershell "pester is an important windows tech going forward" - @jsnover',
 u'azure service fabric and the microservices architecture',
 u'azure security center now available | microsoft azure blog via @azure',
 u"rt @becomingdatasci: for those of you eagerly awaiting the becoming a data scientist podcast, i'm doing the first interview tonight!",
 u'rt @msdev: all the essential dev tools you need\u2026.for free. rt &amp; share!',
 u'introducing u-sql \u2013 a language that makes big data processing easy -',
 u'steve ballmer has some harsh words for the new microsoft',
 u'@mskathyv have a great trip. post lots of pictures.',
 u'"deep lessons from google and ebay on building ecosystems of microservices"',
 u'announcing azure portal general availability | microsoft azure blog via @azure',
 u'if your enterprise it org is still taking orders for servers and 3 week lead time is not enough, your behind the curve. #iac #devops',
 u'rt @powershellmag: deploy custom azure automation integration modules using arm\xa0templates',
 u'rt @powershellmag: pester explained: introduction and\xa0assertions',
 u'#twitter fact: #tweets with images get 200% more engagement \n\n5 tools to make great images',
 u'13 unique ways to use lists on #twitter right now:',
 u'limited time! increase your followers, engagement &amp; leads from #twitter \n\nfree 14-day trial:',
 u'see #twitter growth right now with advice from 5 #socialmedia influencers',
 u'all we can say is @thesocialquant is "cloud based"...\n\nwe\'ll make you famous on #twitter &amp; then we tell you why! ;-)\n',
 u'proper etiquette 101: how to send a direct message on #twitter:',
 u'are you a hashhole on #twitter:\n\nyou may be and not even know it (yikes)!!',
 u'serious #twittertips: \n\nhere are 8 #twitter lists you should be using today!!! #socialmedia',
 u'6 spicy steps for #authors on #twitter to sell more books to your followers:',
 u'ultimate cheat sheet: 21 #lead generation ideas you need to know:',
 u'watch your #twitter follower count, retweets, visibility &amp; profits take off: #socialmedia',
 u'#periscope #marketing: the 5 things your #business needs to know:',
 u'did we just help connect 2,500,000+ people &amp; brands?\n\nyes we did !!!\n\ncome join the party at',
 u"here's what 3 of the best brands on #twitter are doing:\n\n(copy this for your #business)",
 u'ng: #twitter is considering changing the algorithm like #facebook. \n\nwhat you need to know..',
 u'free 14-day trial:\n\nexperience a greater #marketing impact for on #twitter',
 u'new #twitter update: \n\nhere are 3 ways to take advantage of this big change:',
 u'here are 5 ways for you to start using #twitter to market your #business:',
 u'shhh.... 5 reasons why you should be using #twitter to market your #business in 2016:',
 u'let us help your #startup get the same results on #twitter\n\nfree 14-day trial: #entrepreneur',
 u'having toothpaste and orange juice is only cool when you use them in quick succession.',
 u"i don't even know atlassian's revenue model but @experienceteam exists and that is awesome.",
 u'rt @sgoo_nz: star wars atlassian style, thanks @experienceteam #badnexus5photo',
 u"if 20 people say yes,\n\ni'll start putting videos on youtube again",
 u'both are cool ideas and you know it',
 u'rt @cctvnews: president #xijinping: all villages in china expected to be connected to internet by 2020 #wic #wic2015',
 u"three\ntwo\none\n\nhe's awake",
 u'@emilykjohnston hello',
 u'@larsenchung hello',
In [ ]:
token_dict = {}
stemmer = PorterStemmer()

#this can take some time
tfidf = TfidfVectorizer(tokenizer=tokenize, stop_words='english')
tfs = tfidf.fit_transform(token_dict.values())
In [ ]:
In [ ]:
In [21]:
import random
In [110]:
import random
import math
tokens = ['adf', 'klj', 'iuaod', 'ssfle', 'edfel', 'egfel', 'efhlef', 'efjlef', 'sokeof', 'adf', 'adffd', 'f', 'ff', 'fff', 'ffff', 'fffff', 'fa', 'faa', 'faaa']
w = []
for t in tokens:
    r = math.pow(random.random(), 3)
w = np.array(w)
w = w / sum(w)
df = pd.DataFrame({'token': tokens, 'w': w})
df.sort('w', inplace=True, ascending=True)
df.index = np.arange(df.shape[0]) + 1
In [130]:
if df.shape[0] > 10:
    tail = df.ix[:10, 'w'].sum()
    rem = df.iloc[10:].copy()
    rem.sort('w', inplace=True, ascending=False)
    rem.ix[rem.shape[0]] = pd.Series({'token': '--[other terms]--', 'w': tail})
    rem.index = np.arange(rem.shape[0])
In [131]:
token w
0 fa 0.182045
1 faa 0.173771
2 adf 0.148007
3 ssfle 0.139892
4 ff 0.101003
5 klj 0.058918
6 ffff 0.055602
7 egfel 0.032608
8 fffff 0.029047
9 --[other terms]-- 0.079108
In [163]:
colors = []
for c in range(rem.shape[0]-1):
plt.barh(rem.index * -1, rem['w'], color=colors)
plt.yticks(rem.index * -1 + 0.4, rem['token'])
plt.ylim(-1 * rem.shape[0] + 0.8, 1)
plt.ylabel('phrase weight')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Enjoy this post? Sign up for our mailing list and don't miss any updates.

Have a word to say? Propose a specific change to the blog post.