Algorithmic tweaks + minor changes to Results page.

This commit is contained in:
Bhavanvir Rai
2023-05-05 23:17:08 -07:00
parent e6b3fe96cd
commit 85891475a4
6 changed files with 48 additions and 30 deletions

View File

@@ -9,5 +9,4 @@ plotly==5.14.1
plotly-express==0.4.1
regex==2023.3.23
requests==2.28.2
wordcloud==1.8.2.2
scikit-learn==1.2.2

View File

@@ -150,12 +150,16 @@ class EbayScraper:
A tuple of three lists: (1) titles with outliers removed, (2) prices with outliers removed, and (3) countries with outliers removed.
"""
outlier_indices = reject_outliers(np.array(prices), m=1.5)
# Minimum number of items required to start removing outliers
removal_threshold = 100
titles = [title for i, title in enumerate(titles) if i not in outlier_indices]
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
if len(titles) >= removal_threshold:
outlier_indices = reject_outliers(np.array(prices), m=1.5)
titles = [title for i, title in enumerate(titles) if i not in outlier_indices]
prices = [price for i, price in enumerate(prices) if i not in outlier_indices]
shipping = [ship for i, ship in enumerate(shipping) if i not in outlier_indices]
countries = [country for i, country in enumerate(countries) if i not in outlier_indices]
return titles, prices, shipping, countries

View File

@@ -1,5 +1,5 @@
document.addEventListener("DOMContentLoaded", function () {
var chart = document.getElementById('render-wordcloud');
var chart = document.getElementById('render-bargraph');
var chartContent = chart.getAttribute('data-chart');
var chartObject = JSON.parse(chartContent);
Plotly.newPlot(chart, chartObject);

View File

@@ -110,15 +110,15 @@
<div class="card" style="margin-top: 2.5rem; margin-bottom: 2.5rem;">
<div class="card-header">
<h4><i class="fas fa-globe"></i> Country Frequency</h4>
<h4><i class="fas fa-chart-bar"></i> Country Frequency</h4>
</div>
<div class="card-body">
<div id="render-wordcloud" data-chart="{{ wordcloud }}"></div>
<div id="render-bargraph" data-chart="{{ bargraph }}"></div>
</div>
</div>
</div>
<script src="{% static 'plotSimilarResults.js' %}"></script>
<script src="{% static 'plotWordCloud.js' %}"></script>
<script src="{% static 'plotCountryCitations.js' %}"></script>
{% endblock content %}

View File

@@ -3,11 +3,9 @@ from .exceptions import *
import numpy as np
import requests
import re
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from wordcloud import WordCloud
from collections import Counter
def remove_illegal_characters(title: str) -> str:
@@ -244,27 +242,42 @@ def create_chart(similar_prices: list[float], similar_shipping: list[float], sim
return fig.to_json()
def create_wordcloud(urls: list[str]) -> object:
def create_bargraph(countries: list[str]) -> object:
"""
Creates a word cloud visualization based on a list of website URLs.
Creates a word cloud visualization based on a list of countries.
Args:
urls (list[str]): A list of website URLs to be used to generate the word cloud.
countries (list[str]): A list of countries to be used to generate the word cloud.
Returns:
A tuple of the following:
- A JSON string containing the Plotly Express figure of the word cloud.
- A dictionary where the keys are the website names and the values are the frequency count of each website in the URLs list.
A JSON string containing the Plotly Express figure of the word cloud.
"""
website_counts = Counter(urls)
wordcloud = WordCloud(
background_color='white',
scale=4,
prefer_horizontal=0.9,
colormap='RdYlGn_r').generate_from_frequencies(website_counts)
fig = px.imshow(wordcloud)
# Count the occurrences of each country
country_counts = Counter(countries)
# Get the names and counts of the countries
country_names = list(country_counts.keys())
country_values = list(country_counts.values())
# Create a bar graph with the country names on the x-axis and counts on the y-axis
fig = go.Figure(
go.Bar(
x=country_names,
y=country_values,
hoverinfo='text',
hovertext=[f"Country: {country}<br>Citations: {count}" for country, count in zip(country_names, country_values)],
marker=dict(
color=country_values,
colorscale='RdYlGn_r',
showscale=True,
colorbar=dict(
title='Citations'
)
)
)
)
fig.update_layout(
xaxis_title="Country of Origin",
yaxis_title="Citations",
@@ -273,6 +286,8 @@ def create_wordcloud(urls: list[str]) -> object:
'xanchor': 'center',
'yanchor': 'top',
'y': 0.9,
'x': 0.5})
'x': 0.5},
plot_bgcolor='rgba(0,0,0,0)'
)
return fig.to_json()

View File

@@ -69,9 +69,9 @@ class Index(View):
best_context = percentage_difference(float(price), best_total,)
price_rating = price_difference_rating(float(price), best_total, days)
# Categorize the titles and create the chart and wordcloud
# Categorize the titles and create the chart and bargraph
chart = create_chart(similar_prices, similar_shipping, similar_descriptions, currency, title, best_title)
wordcloud = create_wordcloud(similar_countries)
bargraph = create_bargraph(similar_countries)
# Get the total number of items
total_items = len(similar_descriptions)
@@ -83,7 +83,7 @@ class Index(View):
'title': title,
'price': f"{float(price):,.2f}",
'chart': chart,
'wordcloud': wordcloud,
'bargraph': bargraph,
'price_rating': round(price_rating, 1),
'days': days,
'hours': hours,