Files
PySenti/sentistrength/__init__.py
T
2019-04-05 13:07:06 +08:00

50 lines
2.2 KiB
Python

import subprocess
import shlex
import os.path
import sys
import pandas as pd
from os import getcwd
class PySentiStr:
def __init__(self):
pass
# self.SentiStrengthLocation = os.path.join(getcwd(),"SentiStrength.jar")
# self.SentiStrengthLanguageFolder = os.path.join(getcwd(),"SentiStrengthData/")
def setSentiStrengthPath(self, ss_Path):
self.SentiStrengthLocation = ss_Path
def setSentiStrengthLanguageFolderPath(self, sslf_Path):
self.SentiStrengthLanguageFolder = sslf_Path
def getSentiment(self, df_text, score='scale'):
if not hasattr(self, 'SentiStrengthLocation'):
assert False, "Set path using setSentiStrengthPath(path) function."
if not hasattr(self, 'SentiStrengthLanguageFolder'):
assert False, "Set path using setSentiStrengthLanguageFolderPath(path) function."
if type(df_text) != pd.Series:
df_text = pd.Series(df_text)
df_text = df_text.str.replace('\n','')
df_text = df_text.str.replace('\r','')
conc_text = '\n'.join(df_text)
p = subprocess.Popen(shlex.split("java -jar '" + self.SentiStrengthLocation + "' stdin sentidata '" + self.SentiStrengthLanguageFolder + "'"),stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
b = bytes(conc_text.replace(" ","+"), 'utf-8')
stdout_byte, stderr_text = p.communicate(b)
stdout_text = stdout_byte.decode("utf-8")
stdout_text = stdout_text.rstrip().replace("\t"," ")
stdout_text = stdout_text.replace('\r\n','')
senti_score = stdout_text.split(' ')
if score == 'scale':
senti_score = list(map(int, senti_score))
senti_score = [sum(senti_score[i:i+2])/4 for i in range(0, len(senti_score), 2)]
elif score == 'binary': # Return Positive and Negative Score
senti_score = list(map(int, senti_score))
senti_score = [tuple(senti_score[i:i+2]) for i in range(0, len(senti_score), 2)]
else:
return "Argument 'score' takes in either 'scale' (between -1 to 1) or 'binary' (two scores, positive and negative rating)"
return senti_score