feat: update vision-camera-plugin-inatvision to a version that fully supports the geomodel (#2584)

* Update vision-plugin * useOfflineSuggestion TS * Use location for prediction from file * Remove log * Use plugin function to lookup location and elevation based on h3 cells * taxonomyRollupCutoff is now done in the plugin on basis of the tops score for a frame * Fix wrong commit reference. Now references main branch * Fix has location check to account for 0.0 locations * Only prop in lat and long * Add explanation * Fix: Import vision-plugin function from wrapper file * Add jest mock for new plugin function
2025-12-23 22:18:36 -05:00 · 2025-01-08 16:15:36 +01:00
parent f68d3cc252
commit feec5567a3
12 changed files with 61 additions and 50 deletions
--- a/mocks/vision-camera-plugin-inatvision.ts
+++ b/mocks/vision-camera-plugin-inatvision.ts
@@ -1,3 +1,8 @@
 export const getPredictionsForImage = jest.fn( () => Promise.resolve( { predictions: [] } ) );
+export const getPredictionsForLocation = jest.fn( () => Promise.resolve( { predictions: [] } ) );
 export const removeLogListener = jest.fn( );
 export const resetStoredResults = jest.fn( );
+export const lookUpLocation = jest.fn( location => ( {
+  ...location,
+  elevation: 12
+} ) );
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -1204,7 +1204,7 @@ PODS:
  - VisionCamera/React (4.0.5):
    - React-Core
    - VisionCamera/FrameProcessors
-  - VisionCameraPluginInatVision (4.1.4):
+  - VisionCameraPluginInatVision (4.2.0):
    - React-Core
  - Yoga (1.14.0)

@@ -1610,7 +1610,7 @@ SPEC CHECKSUMS:
  RNVectorIcons: 102cd20472bf0d7cd15443d43cd87f9c97228ac3
  SocketRocket: f32cd54efbe0f095c4d7594881e52619cfe80b17
  VisionCamera: f02de0b1b6b1516b327bd8215237a97e7386db8a
-  VisionCameraPluginInatVision: e9deb91ffd64c01e97b70329ef112a816f897de3
+  VisionCameraPluginInatVision: fcf3a3da9272def9014735257e065726c2d66d4f
  Yoga: c716aea2ee01df6258550c7505fa61b248145ced

 PODFILE CHECKSUM: eff4b75123af5d6680139a78c055b44ad37c269b
--- a/package-lock.json
+++ b/package-lock.json
@@ -103,7 +103,7 @@
        "realm": "^12.6.2",
        "sanitize-html": "^2.13.0",
        "ts-jest": "^29.1.2",
-        "vision-camera-plugin-inatvision": "github:inaturalist/vision-camera-plugin-inatvision#8788e6d6718a4501056bad1f9ee5dbcfd354be92",
+        "vision-camera-plugin-inatvision": "github:inaturalist/vision-camera-plugin-inatvision#b905ff2b9ce1cf64797d600c6cd22fc7617b2389",
        "zustand": "^4.5.2"
      },
      "devDependencies": {
@@ -11737,6 +11737,16 @@
      "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
      "dev": true
    },
+    "node_modules/h3-js": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/h3-js/-/h3-js-4.1.0.tgz",
+      "integrity": "sha512-LQhmMl1dRQQjMXPzJc7MpZ/CqPOWWuAvVEoVJM9n/s7vHypj+c3Pd5rLQCkAsOgAoAYKbNCsYFE++LF7MvSfCQ==",
+      "engines": {
+        "node": ">=4",
+        "npm": ">=3",
+        "yarn": ">=1.3.0"
+      }
+    },
    "node_modules/has-bigints": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz",
@@ -20675,10 +20685,13 @@
      }
    },
    "node_modules/vision-camera-plugin-inatvision": {
-      "version": "4.1.4",
-      "resolved": "git+ssh://git@github.com/inaturalist/vision-camera-plugin-inatvision.git#8788e6d6718a4501056bad1f9ee5dbcfd354be92",
-      "integrity": "sha512-jIIhv403pJnuNonE0gcr70JjjzXKumbWzpcqh3KykJAqQ3D3nLgZsFG2GYnudKQz/pJiJejc7jGG/wIzpTe3CA==",
+      "version": "4.2.0",
+      "resolved": "git+ssh://git@github.com/inaturalist/vision-camera-plugin-inatvision.git#b905ff2b9ce1cf64797d600c6cd22fc7617b2389",
+      "integrity": "sha512-8mw2Txnv5zxu2VcsQuEMVfbYP4aoi75XmeqM4Z2SbhFUOsQSnL4GGsTvTlxKPF2mNGJjeC4+NYZRlh6+tVPu9w==",
      "license": "MIT",
+      "dependencies": {
+        "h3-js": "^4.1.0"
+      },
      "engines": {
        "node": ">= 18.0.0"
      },
--- a/package.json
+++ b/package.json
@@ -137,7 +137,7 @@
    "realm": "^12.6.2",
    "sanitize-html": "^2.13.0",
    "ts-jest": "^29.1.2",
-    "vision-camera-plugin-inatvision": "github:inaturalist/vision-camera-plugin-inatvision#8788e6d6718a4501056bad1f9ee5dbcfd354be92",
+    "vision-camera-plugin-inatvision": "github:inaturalist/vision-camera-plugin-inatvision#b905ff2b9ce1cf64797d600c6cd22fc7617b2389",
    "zustand": "^4.5.2"
  },
  "devDependencies": {
--- a/src/components/Camera/AICamera/AICamera.js
+++ b/src/components/Camera/AICamera/AICamera.js
@@ -89,7 +89,6 @@ const AICamera = ( {
  } = useRotation( );
  const {
    confidenceThreshold,
-    taxonomyRollupCutoff,
    fps,
    handleTaxaDetected,
    modelLoaded,
@@ -98,7 +97,6 @@ const AICamera = ( {
    setResult,
    cropRatio,
    setConfidenceThreshold,
-    setTaxonomyRollupCutoff,
    setFPS,
    setNumStoredResults,
    setCropRatio
@@ -189,7 +187,6 @@ const AICamera = ( {
          <FrameProcessorCamera
            cameraRef={camera}
            confidenceThreshold={confidenceThreshold}
-            taxonomyRollupCutoff={taxonomyRollupCutoff}
            device={device}
            fps={fps}
            numStoredResults={numStoredResults}
@@ -267,7 +264,6 @@ const AICamera = ( {
      <AICameraButtons
        handleZoomButtonPress={handleZoomButtonPress}
        confidenceThreshold={confidenceThreshold}
-        taxonomyRollupCutoff={taxonomyRollupCutoff}
        cropRatio={cropRatio}
        flipCamera={onFlipCamera}
        fps={fps}
@@ -277,7 +273,6 @@ const AICamera = ( {
        numStoredResults={numStoredResults}
        rotatableAnimatedStyle={rotatableAnimatedStyle}
        setConfidenceThreshold={setConfidenceThreshold}
-        setTaxonomyRollupCutoff={setTaxonomyRollupCutoff}
        setCropRatio={setCropRatio}
        setFPS={setFPS}
        setNumStoredResults={setNumStoredResults}
--- a/src/components/Camera/AICamera/AICameraButtons.tsx
+++ b/src/components/Camera/AICamera/AICameraButtons.tsx
@@ -18,7 +18,6 @@ const isTablet = DeviceInfo.isTablet();
 interface Props {
  handleZoomButtonPress: ( _event: GestureResponderEvent ) => void;
  confidenceThreshold?: number;
-  taxonomyRollupCutoff?: number;
  cropRatio?: string;
  flipCamera: ( _event: GestureResponderEvent ) => void;
  fps?: number;
@@ -29,7 +28,6 @@ interface Props {
  rotatableAnimatedStyle: ViewStyle;
  // Those four are debug only so I don't bother with types
  setConfidenceThreshold?: Function;
-  setTaxonomyRollupCutoff?: Function;
  setCropRatio?: Function,
  setFPS?: Function,
  setNumStoredResults?: Function,
@@ -45,7 +43,6 @@ interface Props {
 const AICameraButtons = ( {
  handleZoomButtonPress,
  confidenceThreshold,
-  taxonomyRollupCutoff,
  cropRatio,
  flipCamera,
  fps,
@@ -55,7 +52,6 @@ const AICameraButtons = ( {
  numStoredResults,
  rotatableAnimatedStyle,
  setConfidenceThreshold,
-  setTaxonomyRollupCutoff,
  setCropRatio,
  setFPS,
  setNumStoredResults,
@@ -100,8 +96,6 @@ const AICameraButtons = ( {
        <AIDebugButton
          confidenceThreshold={confidenceThreshold}
          setConfidenceThreshold={setConfidenceThreshold}
-          taxonomyRollupCutoff={taxonomyRollupCutoff}
-          setTaxonomyRollupCutoff={setTaxonomyRollupCutoff}
          fps={fps}
          setFPS={setFPS}
          numStoredResults={numStoredResults}
--- a/src/components/Camera/AICamera/AIDebugButton.js
+++ b/src/components/Camera/AICamera/AIDebugButton.js
@@ -19,8 +19,6 @@ import SliderControl from "./SliderControl";
 const AIDebugButton = ( {
  confidenceThreshold,
  setConfidenceThreshold,
-  taxonomyRollupCutoff,
-  setTaxonomyRollupCutoff,
  fps,
  setFPS,
  numStoredResults,
@@ -81,15 +79,6 @@ const AIDebugButton = ( {
              precision={2}
              step={0.05}
            />
-            <SliderControl
-              name="Taxonomy Rollup Cutoff"
-              min={0}
-              max={0.0001}
-              value={taxonomyRollupCutoff}
-              setValue={setTaxonomyRollupCutoff}
-              precision={5}
-              step={0.00001}
-            />
            <SliderControl
              name="Center Crop Ratio (Android only)"
              min={0.5}
--- a/src/components/Camera/AICamera/FrameProcessorCamera.js
+++ b/src/components/Camera/AICamera/FrameProcessorCamera.js
@@ -43,7 +43,6 @@ type Props = {
  onTaxaDetected: Function,
  pinchToZoom?: Function,
  takingPhoto: boolean,
-  taxonomyRollupCutoff?: number,
  inactive?: boolean,
  resetCameraOnFocus: Function,
  userLocation?: Object // UserLocation | null
@@ -51,7 +50,6 @@ type Props = {

 const DEFAULT_FPS = 1;
 const DEFAULT_CONFIDENCE_THRESHOLD = 0.5;
-const DEFAULT_TAXONOMY_CUTOFF_THRESHOLD = 0.0;
 const DEFAULT_NUM_STORED_RESULTS = 4;
 const DEFAULT_CROP_RATIO = 1.0;

@@ -73,7 +71,6 @@ const FrameProcessorCamera = ( {
  onTaxaDetected,
  pinchToZoom,
  takingPhoto,
-  taxonomyRollupCutoff = DEFAULT_TAXONOMY_CUTOFF_THRESHOLD,
  inactive,
  resetCameraOnFocus,
  userLocation
@@ -134,6 +131,14 @@ const FrameProcessorCamera = ( {

  const patchedOrientationAndroid = orientationPatchFrameProcessor( deviceOrientation );
  const patchedRunAsync = usePatchedRunAsync( );
+  const hasUserLocation = userLocation?.latitude != null && userLocation?.longitude != null;
+  // The vision-plugin has a function to look up the location of the user in a h3 gridded world
+  // unfortunately, I was not able to run this new function in the worklets directly,
+  // so we need to do this here before calling the useFrameProcessor hook.
+  // For predictions from file this function runs in the vision-plugin code directly.
+  const location = hasUserLocation
+    ? InatVision.lookUpLocation( userLocation )
+    : null;
  const frameProcessor = useFrameProcessor(
    frame => {
      "worklet";
@@ -162,16 +167,15 @@ const FrameProcessorCamera = ( {
            modelPath,
            taxonomyPath,
            confidenceThreshold,
-            taxonomyRollupCutoff,
            numStoredResults,
            cropRatio,
            patchedOrientationAndroid,
-            useGeomodel: !!userLocation,
+            useGeomodel: hasUserLocation,
            geomodelPath,
            location: {
-              latitude: userLocation?.latitude,
-              longitude: userLocation?.longitude,
-              elevation: userLocation?.altitude
+              latitude: location?.latitude,
+              longitude: location?.longitude,
+              elevation: location?.altitude
            }
          } );
          const timeAfter = Date.now();
@@ -188,13 +192,13 @@ const FrameProcessorCamera = ( {
      modelVersion,
      confidenceThreshold,
      takingPhoto,
-      taxonomyRollupCutoff,
      patchedOrientationAndroid,
      numStoredResults,
      cropRatio,
      lastTimestamp,
      fps,
-      userLocation
+      hasUserLocation,
+      location
    ]
  );

--- a/src/components/Camera/AICamera/hooks/usePredictions.ts
+++ b/src/components/Camera/AICamera/hooks/usePredictions.ts
@@ -18,7 +18,6 @@ const usePredictions = ( ) => {
  const [resultTimestamp, setResultTimestamp] = useState<number | undefined>( undefined );
  const [modelLoaded, setModelLoaded] = useState( false );
  const [confidenceThreshold, setConfidenceThreshold] = useState( 0.5 );
-  const [taxonomyRollupCutoff, setTaxonomyRollupCutoff] = useState( 0.0 );
  const [fps, setFPS] = useState( 1 );
  const [numStoredResults, setNumStoredResults] = useState( 4 );
  const [cropRatio, setCropRatio] = useState( 1 );
@@ -69,7 +68,6 @@ const usePredictions = ( ) => {

  return {
    confidenceThreshold,
-    taxonomyRollupCutoff,
    fps,
    handleTaxaDetected,
    modelLoaded,
@@ -79,7 +77,6 @@ const usePredictions = ( ) => {
    resultTimestamp,
    setResult,
    setConfidenceThreshold,
-    setTaxonomyRollupCutoff,
    setFPS,
    setNumStoredResults,
    setCropRatio
--- a/src/components/Suggestions/SuggestionsContainer.tsx
+++ b/src/components/Suggestions/SuggestionsContainer.tsx
@@ -221,6 +221,8 @@ const SuggestionsContainer = ( ) => {
    offlineSuggestions
  } = useOfflineSuggestions( selectedPhotoUri, {
    dispatch,
+    latitude: flattenedUploadParams?.lat,
+    longitude: flattenedUploadParams?.lng,
    tryOfflineSuggestions
  } );

--- a/src/components/Suggestions/hooks/useOfflineSuggestions.ts
+++ b/src/components/Suggestions/hooks/useOfflineSuggestions.ts
@@ -1,5 +1,3 @@
-// @flow
-
 import { RealmContext } from "providers/contexts.ts";
 import {
  useEffect,
@@ -14,7 +12,12 @@ const { useRealm } = RealmContext;

 const useOfflineSuggestions = (
  selectedPhotoUri: string,
-  options: Object
+  options: {
+    dispatch: () => void,
+    latitude: number,
+    longitude: number,
+    tryOfflineSuggestions: boolean
+  }
 ): {
  offlineSuggestions: Array<Object>
 } => {
@@ -22,13 +25,16 @@ const useOfflineSuggestions = (
  const [offlineSuggestions, setOfflineSuggestions] = useState( [] );
  const [error, setError] = useState( null );

-  const { dispatch, tryOfflineSuggestions } = options;
+  const {
+    dispatch, latitude, longitude, tryOfflineSuggestions
+  } = options;

  useEffect( ( ) => {
    const predictOffline = async ( ) => {
      let rawPredictions = [];
      try {
-        const result = await predictImage( selectedPhotoUri );
+        const location = { latitude, longitude };
+        const result = await predictImage( selectedPhotoUri, location );
        rawPredictions = result.predictions;
      } catch ( predictImageError ) {
        dispatch( { type: "SET_FETCH_STATUS", fetchStatus: "offline-error" } );
@@ -71,7 +77,7 @@ const useOfflineSuggestions = (
        setError( predictOfflineError );
      } );
    }
-  }, [selectedPhotoUri, tryOfflineSuggestions, setError, dispatch, realm] );
+  }, [selectedPhotoUri, tryOfflineSuggestions, setError, dispatch, realm, latitude, longitude] );

  if ( error ) throw error;

--- a/src/sharedHelpers/mlModel.ts
+++ b/src/sharedHelpers/mlModel.ts
@@ -36,7 +36,7 @@ export const geomodelPath: string = Platform.select( {

 export const modelVersion = Config.CV_MODEL_VERSION;

-export const predictImage = ( uri: string ) => {
+export const predictImage = ( uri: string, location: Location ) => {
  // Ensure uri is actually well-formed and try to make it well-formed if it's
  // a path
  let url;
@@ -52,12 +52,18 @@ export const predictImage = ( uri: string ) => {
  if ( !url ) {
    throw new Error( `predictImage received invalid URI: ${uri}` );
  }
+  const hasLocation = location?.latitude != null && location?.longitude != null;
  return getPredictionsForImage( {
    uri: url.toString(),
    modelPath,
    taxonomyPath,
    version: modelVersion,
-    confidenceThreshold: 0.2
+    confidenceThreshold: 0.2,
+    useGeomodel: hasLocation,
+    geomodelPath,
+    location: hasLocation
+      ? location
+      : undefined
  } );
 };