diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..775f6ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +SentiStrength.jar +SentiStrengthData/ \ No newline at end of file diff --git a/sentistrength/__init__.py b/sentistrength/__init__.py new file mode 100644 index 0000000..7985933 --- /dev/null +++ b/sentistrength/__init__.py @@ -0,0 +1,47 @@ +import subprocess +import shlex +import os.path +import sys +import pandas as pd +from os import getcwd + +class PySentiStr: + def __init__(self): + self.SentiStrengthLocation = os.path.join(getcwd(),"SentiStrength.jar") + self.SentiStrengthLanguageFolder = os.path.join(getcwd(),"SentiStrengthData/") + + if not os.path.isfile(self.SentiStrengthLocation): + print("SentiStrength not found at: ", SentiStrengthLocation,'\nSet path using setSentiStrengthPath(path) function.') + if not os.path.isdir(self.SentiStrengthLanguageFolder): + print("SentiStrength data folder not found at: ", SentiStrengthLanguageFolder,'\nSet path using setSentiStrengthLanguageFolderPath(path) function.') + + def setSentiStrengthPath(self, ss_Path): + self.SentiStrengthLocation = ss_Path + + def setSentiStrengthLanguageFolderPath(self, sslf_Path): + self.SentiStrengthLanguageFolder = sslf_Path + + def getSentiment(self, df_text, score='scale'): + # Able to take in single string or list of string and convert into pandas Series + if type(df_text) != pd.Series: + df_text = pd.Series(df_text) + df_text = df_text.str.replace('\n','') + df_text = df_text.str.replace('\r','') + conc_text = '\n'.join(df_text) + p = subprocess.Popen(shlex.split("java -jar '" + self.SentiStrengthLocation + "' stdin sentidata '" + self.SentiStrengthLanguageFolder + "'"),stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE) + b = bytes(conc_text.replace(" ","+"), 'utf-8') + stdout_byte, stderr_text = p.communicate(b) + stdout_text = stdout_byte.decode("utf-8") + stdout_text = stdout_text.rstrip().replace("\t"," ") + stdout_text = stdout_text.replace('\r\n','') + senti_score = stdout_text.split(' ') + if score == 'scale': + senti_score = list(map(int, senti_score)) + senti_score = [sum(senti_score[i:i+2])/4 for i in range(0, len(senti_score), 2)] + elif score == 'binary': # Return Positive and Negative Score + senti_score = list(map(int, senti_score)) + senti_score = [tuple(senti_score[i:i+2]) for i in range(0, len(senti_score), 2)] + else: + return "Argument 'score' takes in either 'scale' (between -1 to 1) or 'binary' (two scores, positive and negative rating)" + return senti_score + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8d1f006 --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="sentistrength-zhunhung", + version="0.0.1", + author="Zhun Hung", + author_email="yongzhunhung@gmail.com", + description="Python 3 Wrapper for SentiStrength, reads a single or multiple input with options for binary class or scale output.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/zhunhung/pysentistrength", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], +) \ No newline at end of file