From d49e3e7251709e5e201d6af11b06452175e344ff Mon Sep 17 00:00:00 2001
From: Bhavanvir Rai <bhavanvir.r@gmail.com>
Date: Sat, 22 Apr 2023 13:09:11 -0700
Subject: [PATCH] Removed usage/reference to nltk + added Python and Github
 Copilot as default extensions.

---
 .devcontainer/Dockerfile        |  1 -
 .devcontainer/devcontainer.json | 10 ++++++-
 .devcontainer/requirements.txt  |  3 +--
 scraper/scraper_class.py        |  2 +-
 scraper/utils.py                | 48 ---------------------------------
 scraper/views.py                |  3 ---
 6 files changed, 11 insertions(+), 56 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 1057e02..1f0cc60 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -9,7 +9,6 @@ COPY requirements.txt .
 
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
-RUN python3 -c "import nltk; nltk.download('all')"
 
 COPY . .
 
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 36d88d7..ed96220 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -7,11 +7,19 @@
 		"context": ".",
 		// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
 		"dockerfile": "./Dockerfile"
+	},
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"GitHub.copilot",
+				"ms-python.python"
+			]
+		}
 	}
 
 	// Features to add to the dev container. More info: https://containers.dev/features.
 	// "features": {},
-
+	
 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
 	// "forwardPorts": [],
 
diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt
index b94ffd1..c812234 100644
--- a/.devcontainer/requirements.txt
+++ b/.devcontainer/requirements.txt
@@ -21,5 +21,4 @@ tzdata==2023.3
 urllib3==1.26.15
 fontawesomefree==5.15.4
 plotly-express==0.4.1
-pandas==2.0.0
-django-bootstrap-v5==1.0.11
\ No newline at end of file
+pandas==2.0.0
\ No newline at end of file
diff --git a/scraper/scraper_class.py b/scraper/scraper_class.py
index 2e11d2a..b261d15 100644
--- a/scraper/scraper_class.py
+++ b/scraper/scraper_class.py
@@ -68,7 +68,7 @@ class FacebookScraper:
         description = self.base_soup.find("meta", {"name": "DC.description"})
         description_content = description["content"]
 
-        return clean_text(description_content)
+        return description_content
 
     def is_listing_missing(self) -> bool:
         title_element = self.mobile_soup.find("title")
diff --git a/scraper/utils.py b/scraper/utils.py
index a35b753..9289301 100644
--- a/scraper/utils.py
+++ b/scraper/utils.py
@@ -1,35 +1,9 @@
-from nltk.corpus import stopwords
-from nltk.sentiment import SentimentIntensityAnalyzer
-from nltk.tokenize import RegexpTokenizer
-from nltk.stem import WordNetLemmatizer
 from bs4 import BeautifulSoup
 from difflib import SequenceMatcher
 import numpy as np
 import requests
 import re
 
-def clean_text(text: str) -> str:
-    """
-    Cleans a string of text by removing punctuation and extra whitespace.
-
-    Args:
-        text: The string of text to clean.
-
-    Returns:
-        The cleaned string of text.
-    """
-    tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|http\S+')
-    tokenized = tokenizer.tokenize(text)
-    tokenized = [word.lower() for word in tokenized]
-
-    stop_words = stopwords.words('english')
-    filtered = [word for word in tokenized if word not in stop_words and word.isalpha()]
-
-    lemmatizer = WordNetLemmatizer()
-    lemmatized = [lemmatizer.lemmatize(word) for word in filtered]
-    
-    return " ".join(lemmatized)
-
 def clean_listing_title(title: str) -> str:
     """
     Clean a listing title by removing punctuation and converting to lowercase.
@@ -119,28 +93,6 @@ def get_product_price(soup: BeautifulSoup) -> np.ndarray:
 
     return outlierless
 
-def sentiment_analysis(text: str) -> float:
-    """
-    Returns the sentiment score of the text, with higher values indicating a more positive sentiment.
-
-    Args:
-        text (str): The text to analyze.
-    Returns:
-        float: The sentiment score, with higher values indicating a more positive sentiment.
-    """
-    sia = SentimentIntensityAnalyzer()
-    sentiment = sia.polarity_scores(text)
-    neg, neu, pos, compound = sentiment["neg"], sentiment["neu"], sentiment["pos"], sentiment["compound"]
-
-    if compound > 0.0:
-        rating = 5 * max(pos, compound)
-    elif compound < 0.0:
-        rating = 5 * min(neg, compound)
-    else:
-        rating = 5 * neu
-
-    return abs(rating)
-
 def create_soup(url: str, headers: dict) -> BeautifulSoup:
     """
     Create a BeautifulSoup object from a URL.
diff --git a/scraper/views.py b/scraper/views.py
index b5a1fb5..0eb8473 100644
--- a/scraper/views.py
+++ b/scraper/views.py
@@ -34,8 +34,6 @@ class Index(View):
             title = scraper_instance.get_listing_title()
             list_price = scraper_instance.get_listing_price()
 
-            sentiment_rating = sentiment_analysis(listing_description)
-
             list_price = re.sub("[\$,]", "", list_price)
             initial_price = int(re.sub("[\$,]", "", list_price))
 
@@ -77,7 +75,6 @@ class Index(View):
                 'shortened_url': shortened_url,
                 'mobile_url': mobile_url,
                 'market_id': market_id,
-                'sentiment_rating': round(sentiment_rating, 1),
                 'title': title,
                 'list_price': f"{float(list_price):,.2f}",
                 'initial_price': initial_price,