This topic covers the sentiment analysis of any tweets collected from twitter and store the result in database
What is sentiment analysis ?
Which means the analysis done through computational to determine the given statement is positive or negative .
Where it will useful ?
- Marketing - which find out the people feed back based product success of failure prediction
- Politics
- People actions
Here we are going to do the sentiment analysis with twitter
Pre required :
1. Java 1.8 - required for stanford nlp server to run
2. Tweepy - required to pull / crawl data from the twitter
3. Pycorennlp - required to call stanford nlp server via python
Please follow this url https://stanfordnlp.github.io/CoreNLP/corenlp-server.html to install nlp server in your local system.
We can also use third party library to find the sentiment analysis. The textblob is one of the library in python.
Authentication :
In order to fetch tweets through Twitter API, one needs to register an App through their twitter account. Follow these steps for the same:
Authentication :
In order to fetch tweets through Twitter API, one needs to register an App through their twitter account. Follow these steps for the same:
- Open this link and click the button: ‘Create New App’
- Fill the application details. You can leave the callback url field empty.
- Once the app is created, you will be redirected to the app page.
- Open the ‘Keys and Access Tokens’ tab.
- Copy ‘Consumer Key’, ‘Consumer Secret’, ‘Access token’ and ‘Access Token Secret’.
After created app in twitter use that key information in the python to create authentication and the it crawl the data
After that you have to start the sentiment analysis server in your local. which has default portal localhost:9000
The below sample code for getting data from python,
After that you have to start the sentiment analysis server in your local. which has default portal localhost:9000
The below sample code for getting data from python,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | """ This class provide the wrapper around tweepy to access twitter data @author ramakavanan """ import tweepy class TwitterApi(object): def __init__(self, auth = None, twitter_api = None): self._auth = auth self._twitter_api = twitter_api def authenticate(self, api_token, api_secret, customer_key, customer_secret): try: self._auth = tweepy.OAuthHandler(customer_key, customer_secret) self._auth.set_access_token(api_token, api_secret) except Exception as ex: print(ex) self._auth = None def twitter_api(self): if self._auth == None : raise Exception('Authentication Object was null') self._twitter_api = tweepy.API(self._auth) def search(self, search_term, count): if search_term == None and self._twitter_api == None: raise Exception('Twitter API / Search term should not be empty') return self._twitter_api.search(q=search_term, count=count) def get_home_timeline(self, count): if self._twitter_api == None: raise Exception('Twitter API should not be empty') return tweepy.Cursor(self._twitter_api.home_timeline).items(count) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """ This class provide to ananlysis the setiment from the twitter @author honeyvig """ import json from pycorenlp import StanfordCoreNLP from TwitterApi import TwitterApi import time import pandas as pd import re import chardet from DBConnector import DBConnector from DBManipulation import DBManipulation import matplotlib.pyplot as plt import numpy as np class SentenceAnalyzer(object): CONSUMER_KEY = "################" CONSUMER_SECRET = "######################" ACCESS_TOKEN = "##########" ACCESS_SECRET = "##########" CREATE_TABLE_QUERY = "CREATE TABLE sentiment_analysis( ID INTEGER PRIMARY KEY AUTOINCREMENT, sentence TEXT, sentimentValue INTEGER , sentiment TEXT )" INSERT_QUERY = "INSERT INTO sentiment_analysis (sentence,sentimentValue, sentiment) VALUES(?,?,?)" SELECT_ALL_QUERY ="SELECT * FROM sentiment_analysis" def __init__(self, server_host): if server_host == None : raise Exception('Stanford server not running .... ') self.nlp = StanfordCoreNLP(server_host) def analysis(self, is_table_exist, is_pandas_enabled, is_tweet_need): try: if is_tweet_need : tweets = self.get_tweets(); if tweets == None: raise Exception('Exception while fetching twitter data') if is_table_exist == False: self.create_table(self.CREATE_TABLE_QUERY) list_data = [] for tweet in tweets : txt = self.clean_tweet(tweet.text) tweet_txt = self.convert_str_utf8(txt) resp = self.sentiment_analyzer(tweet_txt) if resp != None : for sentence in resp["sentences"]: data = (str(" ".join([t["word"] for t in sentence["tokens"]])),sentence["sentimentValue"],str(sentence["sentiment"]) ) list_data.append(data) if list_data : self.insert_mass_data(self.INSERT_QUERY, list_data) if is_pandas_enabled : self.pandas_analysis() except Exception as ex: print(str(ex)) raise ex def create_table(self, tableString): connector = DBConnector('', "SentenceAnalyzer.db") conn = connector.create_schema() db_cmt = DBManipulation(conn) db_cmt.create_table(tableString) def get_all_data(self): connector = DBConnector('', "SentenceAnalyzer.db") conn = connector.create_schema() db_cmt = DBManipulation(conn) return db_cmt.select_all_data(self.SELECT_ALL_QUERY) def insert_mass_data(self, query, query_data): """ Make call to insert bulk data""" connector = DBConnector('', "SentenceAnalyzer.db") conn = connector.create_schema() db_cmt = DBManipulation(conn) db_cmt.many_insert_query_executor(query, query_data) def get_tweets(self): try: twitter = TwitterApi() twitter.authenticate(self.ACCESS_TOKEN, self.ACCESS_SECRET, self.CONSUMER_KEY, self.CONSUMER_SECRET) twitter.twitter_api() tweets = twitter.search('World Cup', 5) return tweets except Exception as ex: return None; def clean_tweet(self, tweet): return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w +:\ / \ / \S +)", " ", tweet).split()) def convert_str_utf8(self, txt): if txt == None or txt == '': return None try: #tweet_txt = txt.decode('unicode_escape').encode('ascii','ignore') tweet_txt = txt.encode('ascii', 'ignore') return tweet_txt except Exception as ex: return None def sentiment_analyzer(self, tweet): if isinstance(tweet, str) and tweet != None : resp = self.nlp.annotate(tweet, properties={ 'annotators': 'sentiment', 'outputFormat': 'json', 'timeout': 1000, }) return resp else: return None def pandas_analysis(self): connector = DBConnector('', "SentenceAnalyzer.db") conn = connector.create_schema() if conn != None : df = pd.io.sql.read_sql(self.SELECT_ALL_QUERY, conn) plt.scatter(x=df['ID'], y=df['sentimentValue']) #print df #df.plot() #df.groupby(['sentiment']) #sentiment_sum = df.pivot_table('sentimentValue', rows='sentiment', aggfunc='sum') #plt.figure() #sentiment_sum.plot(kind='barh', stacked=True, title="Sum of Sentiment value") #plt.figure() #sentiment_count = df.pivot_table('sentimentValue', rows='sentiment', aggfunc='count') #sentiment_count.plot(kind='barh', title="Count of Sentiment in sentence") plt.show() else : raise Exception("The data cant retrieved from SQLITE") if __name__ == "__main__" : analyzer = SentenceAnalyzer('http://localhost:9000') analyzer.analysis(True, True, False) |
We used the sqlite database to store the values .
No comments:
Post a Comment