mirror of
https://github.com/Marketscrape/marketscrape-web.git
synced 2025-12-23 22:17:56 -05:00
Added item condition, and made Polynomial Regression hovertext more descriptive.
This commit is contained in:
@@ -49,8 +49,7 @@ class EbayScraper:
|
||||
soup: The HTML to extract the price from.
|
||||
|
||||
Returns:
|
||||
The price of each product. The price is represented as a
|
||||
NumPy array.
|
||||
The price of each product.
|
||||
"""
|
||||
|
||||
prices = self.soup.find_all('span', class_='s-item__price')
|
||||
@@ -65,6 +64,25 @@ class EbayScraper:
|
||||
|
||||
return cleansed
|
||||
|
||||
def get_product_condition(self) -> list[str]:
|
||||
"""
|
||||
Extracts the conditions of each product from the HTML
|
||||
|
||||
Args:
|
||||
soup: The HTML to extract the condition from.
|
||||
|
||||
Returns:
|
||||
The condition of each product. The conditions is represented as a
|
||||
"""
|
||||
|
||||
conditions = self.soup.find_all('span', class_='SECONDARY_INFO')
|
||||
|
||||
values = []
|
||||
for condition in conditions:
|
||||
values.append(condition.text)
|
||||
|
||||
return values
|
||||
|
||||
def get_product_shipping(self) -> list[float]:
|
||||
"""
|
||||
Extracts the shipping cost of each product from the HTML.
|
||||
@@ -73,8 +91,7 @@ class EbayScraper:
|
||||
soup: The HTML to extract the shipping cost from.
|
||||
|
||||
Returns:
|
||||
The shipping cost of each product. The shipping cost is represented as a
|
||||
NumPy array.
|
||||
The shipping cost of each product.
|
||||
"""
|
||||
|
||||
shipping = self.soup.find_all('span', class_='s-item__shipping s-item__logisticsCost')
|
||||
@@ -136,7 +153,7 @@ class EbayScraper:
|
||||
|
||||
return similarity
|
||||
|
||||
def remove_outliers(self, titles: list[str], prices: list[float], shipping: list[float], countries: list[str]) -> tuple[list[str], list[float], list[float], list[str]]:
|
||||
def remove_outliers(self, titles: list[str], prices: list[float], shipping: list[float], countries: list[str], conditions: list[str]) -> tuple[list[str], list[float], list[float], list[str]]:
|
||||
"""
|
||||
Removes outliers from a set of data consisting of titles, prices, and countries.
|
||||
|
||||
@@ -160,8 +177,9 @@ class EbayScraper:
|
||||
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
|
||||
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
|
||||
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
|
||||
conditions = [condition for i, condition in enumerate(conditions) if i not in outlier_indices]
|
||||
|
||||
return titles, prices, shipping, countries
|
||||
return titles, prices, shipping, countries, conditions
|
||||
|
||||
def get_product_info(self):
|
||||
"""
|
||||
@@ -184,16 +202,18 @@ class EbayScraper:
|
||||
prices = self.get_product_price()
|
||||
shipping = self.get_product_shipping()
|
||||
countries = self.get_product_country()
|
||||
conditions = self.get_product_condition()
|
||||
|
||||
titles, prices, shipping, countries = self.remove_outliers(titles, prices, shipping, countries)
|
||||
titles, prices, shipping, countries, conditions = self.remove_outliers(titles, prices, shipping, countries, conditions)
|
||||
|
||||
product_info = []
|
||||
for title, price, ship, country in zip(titles, prices, shipping, countries):
|
||||
for title, price, ship, country, condition in zip(titles, prices, shipping, countries, conditions):
|
||||
product_info.append({
|
||||
'title': clean_text(title.text.lower()),
|
||||
'price': price,
|
||||
'shipping': ship,
|
||||
'country': country
|
||||
'country': country,
|
||||
'condition': condition
|
||||
})
|
||||
|
||||
return product_info
|
||||
@@ -230,7 +250,7 @@ class EbayScraper:
|
||||
|
||||
return min_price_item
|
||||
|
||||
def construct_candidates(self, descriptions, prices, shipping, countries, similarities):
|
||||
def construct_candidates(self, descriptions, prices, shipping, countries, conditions, similarities):
|
||||
"""
|
||||
Constructs a list of candidates from the descriptions, prices, and
|
||||
countries.
|
||||
@@ -251,6 +271,7 @@ class EbayScraper:
|
||||
"price": prices[i],
|
||||
"shipping": shipping[i],
|
||||
"country": countries[i],
|
||||
"condition": conditions[i],
|
||||
"similarity": similarities[i]
|
||||
}
|
||||
|
||||
@@ -275,6 +296,7 @@ class EbayScraper:
|
||||
prices = []
|
||||
shipping = []
|
||||
countries = []
|
||||
conditions = []
|
||||
similarities = []
|
||||
|
||||
for page_number in range(5):
|
||||
@@ -304,9 +326,10 @@ class EbayScraper:
|
||||
prices += [f"{product['price']:,.2f}" for product in filtered_prices_descriptions.values()]
|
||||
shipping += [f"{product['shipping']:,.2f}" for product in filtered_prices_descriptions.values()]
|
||||
countries += [product['country'] for product in filtered_prices_descriptions.values()]
|
||||
conditions += [product['condition'] for product in filtered_prices_descriptions.values()]
|
||||
similarities += [product['similarity'] for product in filtered_prices_descriptions.values()]
|
||||
|
||||
return descriptions, prices, shipping, countries, similarities
|
||||
return descriptions, prices, shipping, countries, conditions, similarities
|
||||
|
||||
def filter_products_by_similarity(self, product_info: list, target_title: str, similarity_threshold: float):
|
||||
"""
|
||||
@@ -332,6 +355,7 @@ class EbayScraper:
|
||||
'price': product['price'],
|
||||
'shipping': product['shipping'],
|
||||
'country': product['country'],
|
||||
'condition': product['condition'],
|
||||
'similarity': similarity
|
||||
}
|
||||
except InvalidSimilarityThreshold:
|
||||
|
||||
@@ -156,7 +156,7 @@ def percentage_difference(list_price: float, best_price: float) -> dict:
|
||||
|
||||
return difference
|
||||
|
||||
def create_chart(similar_prices: list[float], similar_shipping: list[float], similar_descriptions: list[str], listing_currency: str, listing_title: str, best_title: str) -> object:
|
||||
def create_chart(similar_prices: list[float], similar_shipping: list[float], similar_descriptions: list[str], similar_conditions: list[str], listing_currency: str, listing_title: str, best_title: str) -> object:
|
||||
"""
|
||||
Creates a line chart visualization based on the categorized items, their prices, and their descriptions.
|
||||
|
||||
@@ -173,7 +173,8 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
|
||||
sorted_indices = np.argsort(similar_shipping)
|
||||
sorted_similar_prices = np.array([similar_prices[i] for i in sorted_indices]).reshape(-1, 1)
|
||||
sorted_similar_shipping = np.array([similar_shipping[i] for i in sorted_indices])
|
||||
sorted_similar_description = np.array([similar_descriptions[i] for i in sorted_indices])
|
||||
sorted_similar_descriptions = np.array([similar_descriptions[i] for i in sorted_indices])
|
||||
sorted_similar_conditions = np.array([similar_conditions[i] for i in sorted_indices])
|
||||
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
@@ -185,8 +186,8 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
|
||||
colorscale='RdYlGn_r',
|
||||
colorbar=dict(title="Price")),
|
||||
hovertemplate="%{text}",
|
||||
text=[f"Product: {desc.title()}<br>Price: ${price:.2f}<br>Shipping: ${ship:.2f}"
|
||||
for desc, price, ship in zip(sorted_similar_description, sorted_similar_prices[:, 0], sorted_similar_shipping)],
|
||||
text=[f"Product: {desc.title()}<br>Price: ${price:.2f}<br>Shipping: ${ship:.2f}<br>Condition: {cond}"
|
||||
for desc, price, ship, cond in zip(sorted_similar_descriptions, sorted_similar_prices[:, 0], sorted_similar_shipping, sorted_similar_conditions)],
|
||||
showlegend=False,
|
||||
name="Products"))
|
||||
fig.update_layout(
|
||||
@@ -226,17 +227,17 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
|
||||
poly_model = LinearRegression()
|
||||
poly_model.fit(X_poly, sorted_similar_shipping)
|
||||
|
||||
x_range = np.linspace(sorted_similar_prices.min(), sorted_similar_prices.max(), 100)
|
||||
X_range_poly = poly_features.fit_transform(x_range.reshape(-1, 1))
|
||||
X_range = np.linspace(sorted_similar_prices.min(), sorted_similar_prices.max(), 100)
|
||||
X_range_poly = poly_features.fit_transform(X_range.reshape(-1, 1))
|
||||
Y_range = poly_model.predict(X_range_poly)
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=x_range,
|
||||
x=X_range,
|
||||
y=poly_model.predict(X_range_poly),
|
||||
mode='lines',
|
||||
hovertemplate="%{text}",
|
||||
text=[f"Predicted Price: ${price:.2f}" for price in Y_range],
|
||||
text=[f"Predicted Price: ${price:.2f}<br>Predicted Shipping: ${ship:.2f}" for price, ship in zip(X_range, Y_range)],
|
||||
showlegend=False,
|
||||
name="Polynomial Regression"))
|
||||
|
||||
|
||||
@@ -48,8 +48,8 @@ class Index(View):
|
||||
|
||||
# Find viable products based on the title
|
||||
cleaned_title = remove_illegal_characters(title)
|
||||
similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_scores = shopping_instance.find_viable_product(cleaned_title, ramp_down=0.0)
|
||||
candidates = shopping_instance.construct_candidates(similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_scores)
|
||||
similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_conditions, similar_scores = shopping_instance.find_viable_product(cleaned_title, ramp_down=0.0)
|
||||
candidates = shopping_instance.construct_candidates(similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_conditions, similar_scores)
|
||||
|
||||
# Convert prices to float and shorten the descriptions if necessary
|
||||
similar_prices = [float(price.replace(',', '')) for price in similar_prices]
|
||||
@@ -70,7 +70,7 @@ class Index(View):
|
||||
price_rating = price_difference_rating(float(price), best_total, days)
|
||||
|
||||
# Categorize the titles and create the chart and bargraph
|
||||
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, currency, title, best_title)
|
||||
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, similar_conditions, currency, title, best_title)
|
||||
bargraph = create_bargraph(similar_countries)
|
||||
|
||||
# Get the total number of items
|
||||
|
||||
Reference in New Issue
Block a user