mirror of
https://github.com/Marketscrape/marketscrape-web.git
synced 2026-04-30 10:42:39 -04:00
128 lines
4.3 KiB
Python
128 lines
4.3 KiB
Python
from django.shortcuts import render
|
|
from django.views import View
|
|
from .forms import MarketForm
|
|
|
|
from .utils import create_soup, sentiment_analysis, find_viable_product, price_difference_rating
|
|
|
|
import re
|
|
|
|
import statistics
|
|
import datetime
|
|
|
|
class Index(View):
|
|
def get(self, request):
|
|
form = MarketForm()
|
|
return render(request, 'scraper/index.html', {'form': form})
|
|
|
|
def post(self, request):
|
|
form = MarketForm(request.POST)
|
|
|
|
if form.is_valid():
|
|
url = form.cleaned_data['url']
|
|
|
|
# Shorten the URL listing to the title of the listing
|
|
shortened_url = re.search(r".*[0-9]", url).group(0)
|
|
# Use the shortened URL and convert it to mobile, to get the price of the listing
|
|
mobile_url = shortened_url.replace("www", "m")
|
|
# Find the ID of the product
|
|
market_id = (re.search(r"\/item\/([0-9]*)", url)).group(1)
|
|
soup = create_soup(url, headers=None)
|
|
|
|
instance = FacebookScraper(soup=soup)
|
|
|
|
listing_image = instance.get_listing_image()
|
|
listing_days, listing_hours = instance.get_listing_date()
|
|
listing_description = instance.get_listing_description()
|
|
|
|
sentiment_rating = sentiment_analysis(listing_description)
|
|
|
|
title = instance.get_listing_title()
|
|
|
|
list_price = instance.get_listing_price()
|
|
list_price = re.sub("[\$,]", "", list_price)
|
|
initial_price = int(re.sub("[\$,]", "", list_price))
|
|
|
|
lower_bound, upper_bound, median = find_viable_product(title, ramp_down=0.0)
|
|
|
|
price_rating = price_difference_rating(initial_price, median)
|
|
average_rating = statistics.mean([sentiment_rating, price_rating])
|
|
|
|
context = {
|
|
'shortened_url': shortened_url,
|
|
'mobile_url': mobile_url,
|
|
'market_id': market_id,
|
|
'sentiment_rating': round(sentiment_rating, 1),
|
|
'title': title,
|
|
'list_price': "{0:,.2f}".format(float(list_price)),
|
|
'initial_price': initial_price,
|
|
'lower_bound': "{0:,.2f}".format(lower_bound),
|
|
'upper_bound': "{0:,.2f}".format(upper_bound),
|
|
'median': "{0:,.2f}".format(median),
|
|
'price_rating': round(price_rating, 1),
|
|
'average_rating': round(average_rating, 1),
|
|
'days': listing_days,
|
|
'hours': listing_hours,
|
|
'image': listing_image[0],
|
|
}
|
|
|
|
return render(request, 'scraper/result.html', context)
|
|
|
|
class FacebookScraper:
|
|
def __init__(self, soup):
|
|
self.soup = soup
|
|
|
|
def get_listing_price(self):
|
|
spans = self.soup.find_all("span")
|
|
|
|
free = [span.text for span in spans if "free" in span.text.lower()]
|
|
if (free):
|
|
return free
|
|
|
|
# Find the span that contains the price of the listing and extract the price
|
|
price = [str(span.text) for span in spans if "$" in span.text][0]
|
|
|
|
return price
|
|
|
|
def get_listing_image(self):
|
|
images = self.soup.find_all("img")
|
|
image = [image["src"] for image in images if "https://scontent" in image["src"]]
|
|
|
|
return image
|
|
|
|
def get_listing_title(self):
|
|
title = self.soup.find("meta", {"name": "DC.title"})
|
|
title_content = title["content"]
|
|
return title_content
|
|
|
|
def get_listing_date(self):
|
|
tag = self.soup.find('abbr')
|
|
tag = tag.text.strip()
|
|
|
|
month_str = re.search(r"[a-zA-Z]+", tag).group(0)
|
|
month_num = datetime.datetime.strptime(month_str, '%B').month
|
|
|
|
date_str = re.search(r"[0-9]+", tag).group(0)
|
|
year_str = datetime.datetime.now().year
|
|
|
|
time_str = re.search(r"[0-9]+:[0-9]+", tag).group(0)
|
|
am_pm = re.search(r"[A-Z]{2}", tag).group(0)
|
|
formated_time = f'{time_str}:00 {am_pm}'
|
|
|
|
date_str = f'{year_str}-{month_num}-{date_str}'
|
|
|
|
dt_str = f'{date_str} {formated_time}'
|
|
dt = datetime.datetime.strptime(dt_str, '%Y-%m-%d %I:%M:%S %p')
|
|
|
|
now = datetime.datetime.now()
|
|
diff = now - dt
|
|
|
|
days = diff.days
|
|
hours = diff.seconds // 3600
|
|
|
|
return days, hours
|
|
|
|
def get_listing_description(self):
|
|
description = self.soup.find("meta", {"name": "DC.description"})
|
|
description_content = description["content"]
|
|
|
|
return self.clean_text(description_content) |