Added item condition, and made Polynomial Regression hovertext more descriptive.

This commit is contained in:
Bhavanvir Rai
2023-05-06 12:18:26 -07:00
parent 85891475a4
commit 59f2a2ec5e
3 changed files with 47 additions and 22 deletions

View File

@@ -49,8 +49,7 @@ class EbayScraper:
soup: The HTML to extract the price from.
Returns:
The price of each product. The price is represented as a
NumPy array.
The price of each product.
"""
prices = self.soup.find_all('span', class_='s-item__price')
@@ -65,6 +64,25 @@ class EbayScraper:
return cleansed
def get_product_condition(self) -> list[str]:
"""
Extracts the conditions of each product from the HTML
Args:
soup: The HTML to extract the condition from.
Returns:
The condition of each product. The conditions is represented as a
"""
conditions = self.soup.find_all('span', class_='SECONDARY_INFO')
values = []
for condition in conditions:
values.append(condition.text)
return values
def get_product_shipping(self) -> list[float]:
"""
Extracts the shipping cost of each product from the HTML.
@@ -73,8 +91,7 @@ class EbayScraper:
soup: The HTML to extract the shipping cost from.
Returns:
The shipping cost of each product. The shipping cost is represented as a
NumPy array.
The shipping cost of each product.
"""
shipping = self.soup.find_all('span', class_='s-item__shipping s-item__logisticsCost')
@@ -136,7 +153,7 @@ class EbayScraper:
return similarity
def remove_outliers(self, titles: list[str], prices: list[float], shipping: list[float], countries: list[str]) -> tuple[list[str], list[float], list[float], list[str]]:
def remove_outliers(self, titles: list[str], prices: list[float], shipping: list[float], countries: list[str], conditions: list[str]) -> tuple[list[str], list[float], list[float], list[str]]:
"""
Removes outliers from a set of data consisting of titles, prices, and countries.
@@ -160,8 +177,9 @@ class EbayScraper:
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
conditions = [condition for i, condition in enumerate(conditions) if i not in outlier_indices]
return titles, prices, shipping, countries
return titles, prices, shipping, countries, conditions
def get_product_info(self):
"""
@@ -184,16 +202,18 @@ class EbayScraper:
prices = self.get_product_price()
shipping = self.get_product_shipping()
countries = self.get_product_country()
conditions = self.get_product_condition()
titles, prices, shipping, countries = self.remove_outliers(titles, prices, shipping, countries)
titles, prices, shipping, countries, conditions = self.remove_outliers(titles, prices, shipping, countries, conditions)
product_info = []
for title, price, ship, country in zip(titles, prices, shipping, countries):
for title, price, ship, country, condition in zip(titles, prices, shipping, countries, conditions):
product_info.append({
'title': clean_text(title.text.lower()),
'price': price,
'shipping': ship,
'country': country
'country': country,
'condition': condition
})
return product_info
@@ -230,7 +250,7 @@ class EbayScraper:
return min_price_item
def construct_candidates(self, descriptions, prices, shipping, countries, similarities):
def construct_candidates(self, descriptions, prices, shipping, countries, conditions, similarities):
"""
Constructs a list of candidates from the descriptions, prices, and
countries.
@@ -251,6 +271,7 @@ class EbayScraper:
"price": prices[i],
"shipping": shipping[i],
"country": countries[i],
"condition": conditions[i],
"similarity": similarities[i]
}
@@ -275,6 +296,7 @@ class EbayScraper:
prices = []
shipping = []
countries = []
conditions = []
similarities = []
for page_number in range(5):
@@ -304,9 +326,10 @@ class EbayScraper:
prices += [f"{product['price']:,.2f}" for product in filtered_prices_descriptions.values()]
shipping += [f"{product['shipping']:,.2f}" for product in filtered_prices_descriptions.values()]
countries += [product['country'] for product in filtered_prices_descriptions.values()]
conditions += [product['condition'] for product in filtered_prices_descriptions.values()]
similarities += [product['similarity'] for product in filtered_prices_descriptions.values()]
return descriptions, prices, shipping, countries, similarities
return descriptions, prices, shipping, countries, conditions, similarities
def filter_products_by_similarity(self, product_info: list, target_title: str, similarity_threshold: float):
"""
@@ -332,6 +355,7 @@ class EbayScraper:
'price': product['price'],
'shipping': product['shipping'],
'country': product['country'],
'condition': product['condition'],
'similarity': similarity
}
except InvalidSimilarityThreshold:

View File

@@ -156,7 +156,7 @@ def percentage_difference(list_price: float, best_price: float) -> dict:
return difference
def create_chart(similar_prices: list[float], similar_shipping: list[float], similar_descriptions: list[str], listing_currency: str, listing_title: str, best_title: str) -> object:
def create_chart(similar_prices: list[float], similar_shipping: list[float], similar_descriptions: list[str], similar_conditions: list[str], listing_currency: str, listing_title: str, best_title: str) -> object:
"""
Creates a line chart visualization based on the categorized items, their prices, and their descriptions.
@@ -173,7 +173,8 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
sorted_indices = np.argsort(similar_shipping)
sorted_similar_prices = np.array([similar_prices[i] for i in sorted_indices]).reshape(-1, 1)
sorted_similar_shipping = np.array([similar_shipping[i] for i in sorted_indices])
sorted_similar_description = np.array([similar_descriptions[i] for i in sorted_indices])
sorted_similar_descriptions = np.array([similar_descriptions[i] for i in sorted_indices])
sorted_similar_conditions = np.array([similar_conditions[i] for i in sorted_indices])
fig = go.Figure()
fig.add_trace(
@@ -185,8 +186,8 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
colorscale='RdYlGn_r',
colorbar=dict(title="Price")),
hovertemplate="%{text}",
text=[f"Product: {desc.title()}<br>Price: ${price:.2f}<br>Shipping: ${ship:.2f}"
for desc, price, ship in zip(sorted_similar_description, sorted_similar_prices[:, 0], sorted_similar_shipping)],
text=[f"Product: {desc.title()}<br>Price: ${price:.2f}<br>Shipping: ${ship:.2f}<br>Condition: {cond}"
for desc, price, ship, cond in zip(sorted_similar_descriptions, sorted_similar_prices[:, 0], sorted_similar_shipping, sorted_similar_conditions)],
showlegend=False,
name="Products"))
fig.update_layout(
@@ -226,17 +227,17 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
poly_model = LinearRegression()
poly_model.fit(X_poly, sorted_similar_shipping)
x_range = np.linspace(sorted_similar_prices.min(), sorted_similar_prices.max(), 100)
X_range_poly = poly_features.fit_transform(x_range.reshape(-1, 1))
X_range = np.linspace(sorted_similar_prices.min(), sorted_similar_prices.max(), 100)
X_range_poly = poly_features.fit_transform(X_range.reshape(-1, 1))
Y_range = poly_model.predict(X_range_poly)
fig.add_trace(
go.Scatter(
x=x_range,
x=X_range,
y=poly_model.predict(X_range_poly),
mode='lines',
hovertemplate="%{text}",
text=[f"Predicted Price: ${price:.2f}" for price in Y_range],
text=[f"Predicted Price: ${price:.2f}<br>Predicted Shipping: ${ship:.2f}" for price, ship in zip(X_range, Y_range)],
showlegend=False,
name="Polynomial Regression"))

View File

@@ -48,8 +48,8 @@ class Index(View):
# Find viable products based on the title
cleaned_title = remove_illegal_characters(title)
similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_scores = shopping_instance.find_viable_product(cleaned_title, ramp_down=0.0)
candidates = shopping_instance.construct_candidates(similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_scores)
similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_conditions, similar_scores = shopping_instance.find_viable_product(cleaned_title, ramp_down=0.0)
candidates = shopping_instance.construct_candidates(similar_descriptions, similar_prices, similar_shipping, similar_countries, similar_conditions, similar_scores)
# Convert prices to float and shorten the descriptions if necessary
similar_prices = [float(price.replace(',', '')) for price in similar_prices]
@@ -70,7 +70,7 @@ class Index(View):
price_rating = price_difference_rating(float(price), best_total, days)
# Categorize the titles and create the chart and bargraph
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, currency, title, best_title)
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, similar_conditions, currency, title, best_title)
bargraph = create_bargraph(similar_countries)
# Get the total number of items