Files
marketscrape-web/scraper/scraper_class.py

71 lines
2.2 KiB
Python

import datetime
import re
from .utils import *
class FacebookScraper:
def __init__(self, mobile_soup, base_soup):
self.mobile_soup = mobile_soup
self.base_soup = base_soup
def get_listing_price(self) -> str:
spans = self.mobile_soup.find_all("span")
free = [span.text for span in spans if "free" in span.text.lower()]
if (free):
return free
price = [str(span.text) for span in spans if "$" in span.text][0]
return price
def get_listing_image(self) -> list[str]:
images = self.mobile_soup.find_all("img")
image = [image["src"] for image in images if "https://scontent" in image["src"]]
return image
def get_listing_title(self) -> str:
title = self.base_soup.find("meta", {"name": "DC.title"})
title_content = title["content"]
return title_content
def get_listing_date(self) -> tuple[int, int]:
tag = self.mobile_soup.find('abbr')
tag = tag.text.strip()
try:
month_str = re.search(r"[a-zA-Z]+", tag).group(0)
month_num = datetime.datetime.strptime(month_str, '%B').month
except ValueError:
hour_str = re.search(r"[0-9]+", tag).group(0)
return (0, int(hour_str))
try:
year_str = re.search(r"[0-9]{4}", tag).group(0)
except AttributeError:
year_str = datetime.datetime.now().year
date_str = re.search(r"[0-9]+", tag).group(0)
time_str = re.search(r"[0-9]+:[0-9]+", tag).group(0)
am_pm_str = re.search(r"[A-Z]{2}", tag).group(0)
formated_time = f'{time_str}:00 {am_pm_str}'
formated_date = f'{year_str}-{month_num}-{date_str}'
dt_str = f'{formated_date} {formated_time}'
formated_dt = datetime.datetime.strptime(dt_str, '%Y-%m-%d %I:%M:%S %p')
now = datetime.datetime.now()
diff = now - formated_dt
days = diff.days
hours = diff.seconds // 3600
return (days, hours)
def get_listing_description(self) -> str:
description = self.base_soup.find("meta", {"name": "DC.description"})
description_content = description["content"]
return clean_text(description_content)