Fix Wikipedia API with User-Agent (#822)

* refactor(serializers): remove unused gpxpy and geojson imports * fix(generate_description): improve error handling and response validation for Wikipedia API calls * Potential fix for code scanning alert no. 42: Information exposure through an exception Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix(generate_description): improve error logging for Wikipedia API data fetch failures * chore(deps): bump devalue (#823) Bumps the npm_and_yarn group with 1 update in the /frontend directory: [devalue](https://github.com/sveltejs/devalue). Updates `devalue` from 5.1.1 to 5.3.2 - [Release notes](https://github.com/sveltejs/devalue/releases) - [Changelog](https://github.com/sveltejs/devalue/blob/main/CHANGELOG.md) - [Commits](https://github.com/sveltejs/devalue/compare/v5.1.1...v5.3.2) --- updated-dependencies: - dependency-name: devalue dependency-version: 5.3.2 dependency-type: indirect dependency-group: npm_and_yarn ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sean Morley <98704938+seanmorley15@users.noreply.github.com> * Refactor help documentation link in settings page - Updated the condition to display the help documentation link based on the `wandererEnabled` flag. - Removed the conditional rendering for staff users and Strava integration status. - Changed the documentation link to point to the Immich integration documentation. * fix(locations): update include_collections parameter handling for default behavior * Update backend/server/adventures/views/generate_description_view.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-23 22:58:17 -05:00 · 2025-09-01 10:06:44 -04:00
parent a3f0eda63f
commit cb431f7d26
7 changed files with 1175 additions and 1069 deletions
--- a/backend/server/adventures/serializers.py
+++ b/backend/server/adventures/serializers.py
@@ -7,8 +7,6 @@ from worldtravel.serializers import CountrySerializer, RegionSerializer, CitySer
 from geopy.distance import geodesic
 from integrations.models import ImmichIntegration
 from adventures.utils.geojson import gpx_to_geojson
-import gpxpy
-import geojson
 import logging

 logger = logging.getLogger(__name__)
--- a/backend/server/adventures/views/generate_description_view.py
+++ b/backend/server/adventures/views/generate_description_view.py
@@ -3,42 +3,137 @@ from rest_framework.decorators import action
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 import requests
+from django.conf import settings
+import urllib.parse
+import logging
+
+logger = logging.getLogger(__name__)

 class GenerateDescription(viewsets.ViewSet):
    permission_classes = [IsAuthenticated]
+    
+    # User-Agent header required by Wikipedia API
+    HEADERS = {
+        'User-Agent': f'AdventureLog/{getattr(settings, "ADVENTURELOG_RELEASE_VERSION", "unknown")}'
+    }

-    @action(detail=False, methods=['get'],)
+    @action(detail=False, methods=['get'])
    def desc(self, request):
        name = self.request.query_params.get('name', '')
-        # un url encode the name
-        name = name.replace('%20', ' ')
-        name = self.get_search_term(name)
-        url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles=%s' % name
-        response = requests.get(url)
-        data = response.json()
-        data = response.json()
-        page_id = next(iter(data["query"]["pages"]))
-        extract = data["query"]["pages"][page_id]
-        if extract.get('extract') is None:
-            return Response({"error": "No description found"}, status=400)
-        return Response(extract)
-    @action(detail=False, methods=['get'],)
+        if not name:
+            return Response({"error": "Name parameter is required"}, status=400)
+        
+        # Properly URL decode the name
+        name = urllib.parse.unquote(name)
+        search_term = self.get_search_term(name)
+        
+        if not search_term:
+            return Response({"error": "No matching Wikipedia article found"}, status=404)
+        
+        # Properly URL encode the search term for the API
+        encoded_term = urllib.parse.quote(search_term)
+        url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=extracts&exintro&explaintext&format=json&titles={encoded_term}'
+        
+        try:
+            response = requests.get(url, headers=self.HEADERS, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            
+            pages = data.get("query", {}).get("pages", {})
+            if not pages:
+                return Response({"error": "No page data found"}, status=404)
+            
+            page_id = next(iter(pages))
+            page_data = pages[page_id]
+            
+            # Check if page exists (page_id of -1 means page doesn't exist)
+            if page_id == "-1":
+                return Response({"error": "Wikipedia page not found"}, status=404)
+            
+            if not page_data.get('extract'):
+                return Response({"error": "No description found"}, status=404)
+            
+            return Response(page_data)
+            
+        except requests.exceptions.RequestException as e:
+            logger.exception("Failed to fetch data from Wikipedia")
+            return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
+        except ValueError as e:  # JSON decode error
+            return Response({"error": "Invalid response from Wikipedia API"}, status=500)
+
+    @action(detail=False, methods=['get'])
    def img(self, request):
        name = self.request.query_params.get('name', '')
-        # un url encode the name
-        name = name.replace('%20', ' ')
-        name = self.get_search_term(name)
-        url = 'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles=%s' % name
-        response = requests.get(url)
-        data = response.json()
-        page_id = next(iter(data["query"]["pages"]))
-        extract = data["query"]["pages"][page_id]
-        if extract.get('original') is None:
-            return Response({"error": "No image found"}, status=400)
-        return Response(extract["original"])
+        if not name:
+            return Response({"error": "Name parameter is required"}, status=400)
+        
+        # Properly URL decode the name
+        name = urllib.parse.unquote(name)
+        search_term = self.get_search_term(name)
+        
+        if not search_term:
+            return Response({"error": "No matching Wikipedia article found"}, status=404)
+        
+        # Properly URL encode the search term for the API
+        encoded_term = urllib.parse.quote(search_term)
+        url = f'https://en.wikipedia.org/w/api.php?origin=*&action=query&prop=pageimages&format=json&piprop=original&titles={encoded_term}'
+        
+        try:
+            response = requests.get(url, headers=self.HEADERS, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            
+            pages = data.get("query", {}).get("pages", {})
+            if not pages:
+                return Response({"error": "No page data found"}, status=404)
+            
+            page_id = next(iter(pages))
+            page_data = pages[page_id]
+            
+            # Check if page exists
+            if page_id == "-1":
+                return Response({"error": "Wikipedia page not found"}, status=404)
+            
+            original_image = page_data.get('original')
+            if not original_image:
+                return Response({"error": "No image found"}, status=404)
+            
+            return Response(original_image)
+            
+        except requests.exceptions.RequestException as e:
+            logger.exception("Failed to fetch data from Wikipedia")
+            return Response({"error": "Failed to fetch data from Wikipedia."}, status=500)
+        except ValueError as e:  # JSON decode error
+            return Response({"error": "Invalid response from Wikipedia API"}, status=500)
    
    def get_search_term(self, term):
-        response = requests.get(f'https://en.wikipedia.org/w/api.php?action=opensearch&search={term}&limit=10&namespace=0&format=json')
-        data = response.json()
-        if data[1] and len(data[1]) > 0:
-            return data[1][0]
+        if not term:
+            return None
+        
+        # Properly URL encode the search term
+        encoded_term = urllib.parse.quote(term)
+        url = f'https://en.wikipedia.org/w/api.php?action=opensearch&search={encoded_term}&limit=10&namespace=0&format=json'
+        
+        try:
+            response = requests.get(url, headers=self.HEADERS, timeout=10)
+            response.raise_for_status()
+            
+            # Check if response is empty
+            if not response.text.strip():
+                return None
+                
+            data = response.json()
+            
+            # OpenSearch API returns an array with 4 elements:
+            # [search_term, [titles], [descriptions], [urls]]
+            if len(data) >= 2 and data[1] and len(data[1]) > 0:
+                return data[1][0]  # Return the first title match
+            
+            return None
+            
+        except requests.exceptions.RequestException:
+            # If search fails, return the original term as fallback
+            return term
+        except ValueError:  # JSON decode error
+            # If JSON parsing fails, return the original term as fallback
+            return term
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -853,8 +853,8 @@ packages:
    resolution: {integrity: sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==}
    engines: {node: '>=8'}

-  devalue@5.1.1:
-    resolution: {integrity: sha512-maua5KUiapvEwiEAe+XnlZ3Rh0GD+qI1J/nb9vrJc3muPXvcF/8gXYTWF76+5DAqHyDUtOIImEuo0YKE9mshVw==}
+  devalue@5.3.2:
+    resolution: {integrity: sha512-UDsjUbpQn9kvm68slnrs+mfxwFkIflOhkanmyabZ8zOYk8SMEIbJ3TK+88g70hSIeytu4y18f0z/hYHMTrXIWw==}

  didyoumean@1.2.2:
    resolution: {integrity: sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==}
@@ -2235,7 +2235,7 @@ snapshots:
      '@sveltejs/vite-plugin-svelte': 3.1.2(svelte@4.2.19)(vite@5.4.19(@types/node@22.15.2))
      '@types/cookie': 0.6.0
      cookie: 0.6.0
-      devalue: 5.1.1
+      devalue: 5.3.2
      esm-env: 1.2.2
      import-meta-resolve: 4.1.0
      kleur: 4.1.5
@@ -2535,7 +2535,7 @@ snapshots:

  detect-libc@2.0.4: {}

-  devalue@5.1.1: {}
+  devalue@5.3.2: {}

  didyoumean@1.2.2: {}

--- a/frontend/src/locales/ru.json
+++ b/frontend/src/locales/ru.json
--- a/frontend/src/routes/locations/+page.server.ts
+++ b/frontend/src/routes/locations/+page.server.ts
@@ -22,7 +22,7 @@ export const load = (async (event) => {
 			typeString = 'all';
 		}

-		const include_collections = event.url.searchParams.get('include_collections') || 'false';
+		const include_collections = event.url.searchParams.get('include_collections') || 'true';
 		const order_by = event.url.searchParams.get('order_by') || 'updated_at';
 		const order_direction = event.url.searchParams.get('order_direction') || 'asc';
 		const page = event.url.searchParams.get('page') || '1';
--- a/frontend/src/routes/locations/+page.svelte
+++ b/frontend/src/routes/locations/+page.svelte
@@ -114,10 +114,13 @@
 		} else {
 			currentSort.visited = false;
 		}
-		if (url.searchParams.get('include_collections') === 'on') {
+		if (url.searchParams.get('include_collections') === 'true') {
 			currentSort.includeCollections = true;
-		} else {
+		} else if (url.searchParams.get('include_collections') === 'false') {
 			currentSort.includeCollections = false;
+		} else {
+			// Default to true when no parameter is present (first visit)
+			currentSort.includeCollections = true;
 		}

 		if (!currentSort.visited && !currentSort.planned) {
@@ -469,6 +472,18 @@
 									id="include_collections"
 									class="checkbox checkbox-primary"
 									checked={currentSort.includeCollections}
+									on:change={(e) => {
+										const target = e.currentTarget;
+										currentSort.includeCollections = target.checked;
+										// Immediately update the URL to reflect the change
+										let url = new URL(window.location.href);
+										if (target.checked) {
+											url.searchParams.set('include_collections', 'true');
+										} else {
+											url.searchParams.set('include_collections', 'false');
+										}
+										goto(url.toString(), { invalidateAll: true, replaceState: true });
+									}}
 								/>
 								<span class="label-text">{$t('adventures.collection_locations')}</span>
 							</label>
--- a/frontend/src/routes/settings/+page.svelte
+++ b/frontend/src/routes/settings/+page.svelte
@@ -1226,22 +1226,17 @@
 								{/if}

 								<!-- Help documentation link -->
-								{#if user.is_staff || !stravaGlobalEnabled}
+
+								{#if !wandererEnabled}
 									<div class="mt-4 p-4 bg-info/10 rounded-lg">
-										{#if user.is_staff}
-											<p class="text-sm">
-												📖 {$t('immich.need_help')}
-												<a
-													class="link link-primary"
-													href="https://adventurelog.app/docs/configuration/wanderer_integration.html"
-													target="_blank">{$t('navbar.documentation')}</a
-												>
-											</p>
-										{:else if !stravaGlobalEnabled}
-											<p class="text-sm">
-												ℹ️ {$t('google_maps.google_maps_integration_desc_no_staff')}
-											</p>
-										{/if}
+										<p class="text-sm">
+											📖 {$t('immich.need_help')}
+											<a
+												class="link link-primary"
+												href="https://adventurelog.app/docs/configuration/immich_integration.html"
+												target="_blank">{$t('navbar.documentation')}</a
+											>
+										</p>
 									</div>
 								{/if}
 							</div>