mirror of
https://github.com/Marketscrape/marketscrape-web.git
synced 2025-12-23 22:17:56 -05:00
Algorithmic tweaks + minor changes to Results page.
This commit is contained in:
@@ -9,5 +9,4 @@ plotly==5.14.1
|
||||
plotly-express==0.4.1
|
||||
regex==2023.3.23
|
||||
requests==2.28.2
|
||||
wordcloud==1.8.2.2
|
||||
scikit-learn==1.2.2
|
||||
@@ -150,12 +150,16 @@ class EbayScraper:
|
||||
A tuple of three lists: (1) titles with outliers removed, (2) prices with outliers removed, and (3) countries with outliers removed.
|
||||
"""
|
||||
|
||||
outlier_indices = reject_outliers(np.array(prices), m=1.5)
|
||||
# Minimum number of items required to start removing outliers
|
||||
removal_threshold = 100
|
||||
|
||||
titles = [title for i, title in enumerate(titles) if i not in outlier_indices]
|
||||
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
|
||||
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
|
||||
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
|
||||
if len(titles) >= removal_threshold:
|
||||
outlier_indices = reject_outliers(np.array(prices), m=1.5)
|
||||
|
||||
titles = [title for i, title in enumerate(titles) if i not in outlier_indices]
|
||||
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
|
||||
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
|
||||
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
|
||||
|
||||
return titles, prices, shipping, countries
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
var chart = document.getElementById('render-wordcloud');
|
||||
var chart = document.getElementById('render-bargraph');
|
||||
var chartContent = chart.getAttribute('data-chart');
|
||||
var chartObject = JSON.parse(chartContent);
|
||||
Plotly.newPlot(chart, chartObject);
|
||||
@@ -110,15 +110,15 @@
|
||||
|
||||
<div class="card" style="margin-top: 2.5rem; margin-bottom: 2.5rem;">
|
||||
<div class="card-header">
|
||||
<h4><i class="fas fa-globe"></i> Country Frequency</h4>
|
||||
<h4><i class="fas fa-chart-bar"></i> Country Frequency</h4>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
<div id="render-wordcloud" data-chart="{{ wordcloud }}"></div>
|
||||
<div id="render-bargraph" data-chart="{{ bargraph }}"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="{% static 'plotSimilarResults.js' %}"></script>
|
||||
<script src="{% static 'plotWordCloud.js' %}"></script>
|
||||
<script src="{% static 'plotCountryCitations.js' %}"></script>
|
||||
{% endblock content %}
|
||||
|
||||
@@ -3,11 +3,9 @@ from .exceptions import *
|
||||
import numpy as np
|
||||
import requests
|
||||
import re
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from wordcloud import WordCloud
|
||||
from collections import Counter
|
||||
|
||||
def remove_illegal_characters(title: str) -> str:
|
||||
@@ -244,27 +242,42 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
|
||||
|
||||
return fig.to_json()
|
||||
|
||||
def create_wordcloud(urls: list[str]) -> object:
|
||||
def create_bargraph(countries: list[str]) -> object:
|
||||
"""
|
||||
Creates a word cloud visualization based on a list of website URLs.
|
||||
Creates a word cloud visualization based on a list of countries.
|
||||
|
||||
Args:
|
||||
urls (list[str]): A list of website URLs to be used to generate the word cloud.
|
||||
countries (list[str]): A list of countries to be used to generate the word cloud.
|
||||
|
||||
Returns:
|
||||
A tuple of the following:
|
||||
- A JSON string containing the Plotly Express figure of the word cloud.
|
||||
- A dictionary where the keys are the website names and the values are the frequency count of each website in the URLs list.
|
||||
A JSON string containing the Plotly Express figure of the word cloud.
|
||||
"""
|
||||
|
||||
website_counts = Counter(urls)
|
||||
wordcloud = WordCloud(
|
||||
background_color='white',
|
||||
scale=4,
|
||||
prefer_horizontal=0.9,
|
||||
colormap='RdYlGn_r').generate_from_frequencies(website_counts)
|
||||
|
||||
fig = px.imshow(wordcloud)
|
||||
# Count the occurrences of each country
|
||||
country_counts = Counter(countries)
|
||||
|
||||
# Get the names and counts of the countries
|
||||
country_names = list(country_counts.keys())
|
||||
country_values = list(country_counts.values())
|
||||
|
||||
# Create a bar graph with the country names on the x-axis and counts on the y-axis
|
||||
fig = go.Figure(
|
||||
go.Bar(
|
||||
x=country_names,
|
||||
y=country_values,
|
||||
hoverinfo='text',
|
||||
hovertext=[f"Country: {country}<br>Citations: {count}" for country, count in zip(country_names, country_values)],
|
||||
marker=dict(
|
||||
color=country_values,
|
||||
colorscale='RdYlGn_r',
|
||||
showscale=True,
|
||||
colorbar=dict(
|
||||
title='Citations'
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
xaxis_title="Country of Origin",
|
||||
yaxis_title="Citations",
|
||||
@@ -273,6 +286,8 @@ def create_wordcloud(urls: list[str]) -> object:
|
||||
'xanchor': 'center',
|
||||
'yanchor': 'top',
|
||||
'y': 0.9,
|
||||
'x': 0.5})
|
||||
'x': 0.5},
|
||||
plot_bgcolor='rgba(0,0,0,0)'
|
||||
)
|
||||
|
||||
return fig.to_json()
|
||||
@@ -69,9 +69,9 @@ class Index(View):
|
||||
best_context = percentage_difference(float(price), best_total,)
|
||||
price_rating = price_difference_rating(float(price), best_total, days)
|
||||
|
||||
# Categorize the titles and create the chart and wordcloud
|
||||
# Categorize the titles and create the chart and bargraph
|
||||
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, currency, title, best_title)
|
||||
wordcloud = create_wordcloud(similar_countries)
|
||||
bargraph = create_bargraph(similar_countries)
|
||||
|
||||
# Get the total number of items
|
||||
total_items = len(similar_descriptions)
|
||||
@@ -83,7 +83,7 @@ class Index(View):
|
||||
'title': title,
|
||||
'price': f"{float(price):,.2f}",
|
||||
'chart': chart,
|
||||
'wordcloud': wordcloud,
|
||||
'bargraph': bargraph,
|
||||
'price_rating': round(price_rating, 1),
|
||||
'days': days,
|
||||
'hours': hours,
|
||||
|
||||
Reference in New Issue
Block a user