️ Sentiment analysis is now more conclusive.

This commit is contained in:
Bhavanvir Rai
2022-09-20 07:57:17 -07:00
parent 4152cf7b40
commit 075e51d8d4

View File

@@ -4,11 +4,13 @@ from bs4 import BeautifulSoup
# Sentiment Analysis
import nltk
nltk.download()
#nltk.download()
import nltk.corpus
nltk.download('stopwords')
#nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
# Pattern Matching
import re
@@ -16,14 +18,13 @@ import re
def sentiment_analysis(text):
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores(text)
negative, neutral, positive, compound = sentiment['neg'], sentiment['neu'], sentiment['pos'], sentiment['compound']
if negative > positive and negative > neutral:
return("🙁 with {:.2f}% confidence".format(negative * 100))
elif positive > negative:
return("🙂 with {:.2f}% confidence".format((compound - positive) * 100))
if sentiment["compound"] >= 0.05:
return("🙂 with {:.2f}% confidence".format(sentiment["pos"] * 100))
elif sentiment["compound"] <= -0.05:
return("🙁 with {:.2f}% confidence".format(sentiment["neg"] * 100))
else:
return("😐 with {:.2f}% confidence".format(neutral * 100))
return("😐 with {:.2f}% confidence".format(sentiment["neu"] * 100))
def html_debug(soup):
f = open("index.html", "w")
@@ -31,12 +32,17 @@ def html_debug(soup):
f.close()
def clean_text(text):
text = text.lower()
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
text = ' '.join(text.splitlines())
text = ' '.join([word for word in text.split() if word not in (stopwords.words('english'))])
tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|http\S+')
tokenized = tokenizer.tokenize(text)
tokenized = [word.lower() for word in tokenized]
stop_words = stopwords.words('english')
filtered = [word for word in tokenized if word not in stop_words and word.isalpha()]
lemmatizer = WordNetLemmatizer()
lemmatized = [lemmatizer.lemmatize(word) for word in filtered]
return text
return " ".join(lemmatized)
def get_title(soup):
title = soup.find("meta", {"name": "DC.title"})
@@ -68,7 +74,7 @@ def main():
mobile_url = shortened_url.replace("www", "m")
print("\nHow we feel about this listing: {}".format(sentiment_analysis(get_description(create_soup(url)))))
print("Vehicle: {}".format(get_title(create_soup(url))))
print("Title: {}".format(get_title(create_soup(url))))
print("Price: {}".format(get_price(create_soup(mobile_url))))
if __name__ == "__main__":