From dda75169809977cb28274b87b528efdb0ae0a825 Mon Sep 17 00:00:00 2001 From: "Ericson \"Fogo\" Soares" Date: Tue, 16 May 2023 11:15:51 -0300 Subject: [PATCH] [ENG-536] Improve indexer for huge locations (#781) * WIP materialized_path abstraction revamp * Optimizing indexer rules loading * Using a better serialize impl for indexer rules * New interruptable and faster Walker * WIP new indexer * WIP first success compiling after breaking the world * Fixing some warnings * Handling some lifetime issues in the walker * New job completed with errors feature * Introducing completed with errors to indexer Removing IOError variant from JobError and using FileIOError instead * Rust fmt * Adding missing job status * Better ergonomics to IsolatedFilePathData Conversions from db's file_path kinds Keeping original's relative path data to better conversion to OS's path First unit tests * Testing and fixing parent method * Some error handling * Rust fmt * Some small fixes * Fixing indexer rules decoding * Bunch of small fixes * Rust fmt * Fixing indexer rules * Updating frontend to new materialized_path format * Trying to fix windows CI --------- Co-authored-by: Brendan Allan --- apps/desktop/src-tauri/src/file.rs | 2 +- .../src/components/explorer/Explorer.tsx | 2 +- .../migration.sql | 43 + .../migration.sql | 2 + core/prisma/schema.prisma | 14 +- core/src/api/files.rs | 12 +- core/src/api/jobs.rs | 1 - core/src/api/search.rs | 58 +- core/src/custom_uri.rs | 72 +- core/src/job/job_manager.rs | 11 + core/src/job/mod.rs | 64 +- core/src/job/worker.rs | 38 +- core/src/lib.rs | 14 +- core/src/library/library.rs | 29 +- core/src/library/manager.rs | 117 ++- core/src/location/error.rs | 47 +- core/src/location/file_path_helper.rs | 680 -------------- .../isolated_file_path_data.rs | 636 +++++++++++++ core/src/location/file_path_helper/mod.rs | 458 +++++++++ core/src/location/indexer/indexer_job.rs | 369 ++++---- core/src/location/indexer/mod.rs | 235 +++-- core/src/location/indexer/rules.rs | 541 +++++++++-- .../location/indexer/shallow_indexer_job.rs | 237 ++--- core/src/location/indexer/walk.rs | 875 ++++++++++++------ core/src/location/manager/mod.rs | 19 +- core/src/location/manager/watcher/linux.rs | 9 +- core/src/location/manager/watcher/macos.rs | 30 +- core/src/location/manager/watcher/utils.rs | 210 ++--- core/src/location/manager/watcher/windows.rs | 10 +- core/src/location/mod.rs | 27 +- .../file_identifier/file_identifier_job.rs | 63 +- core/src/object/file_identifier/mod.rs | 18 +- .../shallow_file_identifier_job.rs | 95 +- core/src/object/fs/copy.rs | 86 +- core/src/object/fs/cut.rs | 39 +- core/src/object/fs/decrypt.rs | 15 +- core/src/object/fs/delete.rs | 10 +- core/src/object/fs/encrypt.rs | 31 +- core/src/object/fs/erase.rs | 77 +- core/src/object/fs/mod.rs | 11 +- core/src/object/preview/thumbnail/mod.rs | 94 +- .../thumbnail/shallow_thumbnailer_job.rs | 81 +- .../preview/thumbnail/thumbnailer_job.rs | 68 +- core/src/object/validation/validator_job.rs | 43 +- core/src/util/error.rs | 24 + core/src/util/mod.rs | 1 + core/src/util/seeder.rs | 204 ++-- crates/file-ext/Cargo.toml | 2 +- .../$libraryId/Explorer/Inspector/index.tsx | 312 +++---- interface/app/$libraryId/Explorer/View.tsx | 4 +- .../Layout/Sidebar/JobManager/Job.tsx | 1 + packages/client/src/core.ts | 84 +- 52 files changed, 3769 insertions(+), 2456 deletions(-) create mode 100644 core/prisma/migrations/20230426011629_remove_parent_id_from_file_path/migration.sql create mode 100644 core/prisma/migrations/20230504031258_errors_text_field_on_job_table/migration.sql delete mode 100644 core/src/location/file_path_helper.rs create mode 100644 core/src/location/file_path_helper/isolated_file_path_data.rs create mode 100644 core/src/location/file_path_helper/mod.rs create mode 100644 core/src/util/error.rs diff --git a/apps/desktop/src-tauri/src/file.rs b/apps/desktop/src-tauri/src/file.rs index 78f592f88..76fb25e32 100644 --- a/apps/desktop/src-tauri/src/file.rs +++ b/apps/desktop/src-tauri/src/file.rs @@ -67,7 +67,7 @@ pub async fn get_file_path_open_with_apps( sd_desktop_macos::get_open_with_applications(&path.to_str().unwrap().into()) } .as_slice() - .into_iter() + .iter() .map(|app| OpenWithApplication { name: app.name.to_string(), url: app.url.to_string(), diff --git a/apps/mobile/src/components/explorer/Explorer.tsx b/apps/mobile/src/components/explorer/Explorer.tsx index 903effafe..5f3102097 100644 --- a/apps/mobile/src/components/explorer/Explorer.tsx +++ b/apps/mobile/src/components/explorer/Explorer.tsx @@ -34,7 +34,7 @@ const Explorer = ({ items }: ExplorerProps) => { if (isPath(data) && data.item.is_dir) { navigation.push('Location', { id: data.item.location_id, - path: data.item.materialized_path + path: `${data.item.materialized_path}${data.item.name}/` }); } else { setData(data); diff --git a/core/prisma/migrations/20230426011629_remove_parent_id_from_file_path/migration.sql b/core/prisma/migrations/20230426011629_remove_parent_id_from_file_path/migration.sql new file mode 100644 index 000000000..ff57fef10 --- /dev/null +++ b/core/prisma/migrations/20230426011629_remove_parent_id_from_file_path/migration.sql @@ -0,0 +1,43 @@ +/* + Warnings: + + - The primary key for the `file_path` table will be changed. If it partially fails, the table could be left without primary key constraint. + - You are about to drop the column `parent_id` on the `file_path` table. All the data in the column will be lost. + - Added the required column `pub_id` to the `file_path` table without a default value. This is not possible if the table is not empty. + +*/ +-- RedefineTables +PRAGMA foreign_keys=OFF; +CREATE TABLE "new_file_path" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "pub_id" BLOB NOT NULL, + "is_dir" BOOLEAN NOT NULL DEFAULT false, + "cas_id" TEXT, + "integrity_checksum" TEXT, + "location_id" INTEGER NOT NULL, + "materialized_path" TEXT NOT NULL, + "name" TEXT NOT NULL, + "extension" TEXT NOT NULL, + "size_in_bytes" TEXT NOT NULL DEFAULT '0', + "inode" BLOB NOT NULL, + "device" BLOB NOT NULL, + "object_id" INTEGER, + "key_id" INTEGER, + "date_created" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "date_modified" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "date_indexed" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT "file_path_location_id_fkey" FOREIGN KEY ("location_id") REFERENCES "location" ("id") ON DELETE CASCADE ON UPDATE CASCADE, + CONSTRAINT "file_path_object_id_fkey" FOREIGN KEY ("object_id") REFERENCES "object" ("id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "file_path_key_id_fkey" FOREIGN KEY ("key_id") REFERENCES "key" ("id") ON DELETE SET NULL ON UPDATE CASCADE +); +INSERT INTO "new_file_path" ("cas_id", "date_created", "date_indexed", "date_modified", "device", "extension", "id", "inode", "integrity_checksum", "is_dir", "key_id", "location_id", "materialized_path", "name", "object_id", "size_in_bytes") SELECT "cas_id", "date_created", "date_indexed", "date_modified", "device", "extension", "id", "inode", "integrity_checksum", "is_dir", "key_id", "location_id", "materialized_path", "name", "object_id", "size_in_bytes" FROM "file_path"; +DROP TABLE "file_path"; +ALTER TABLE "new_file_path" RENAME TO "file_path"; +CREATE UNIQUE INDEX "file_path_pub_id_key" ON "file_path"("pub_id"); +CREATE UNIQUE INDEX "file_path_integrity_checksum_key" ON "file_path"("integrity_checksum"); +CREATE INDEX "file_path_location_id_idx" ON "file_path"("location_id"); +CREATE INDEX "file_path_location_id_materialized_path_idx" ON "file_path"("location_id", "materialized_path"); +CREATE UNIQUE INDEX "file_path_location_id_materialized_path_name_extension_key" ON "file_path"("location_id", "materialized_path", "name", "extension"); +CREATE UNIQUE INDEX "file_path_location_id_inode_device_key" ON "file_path"("location_id", "inode", "device"); +PRAGMA foreign_key_check; +PRAGMA foreign_keys=ON; diff --git a/core/prisma/migrations/20230504031258_errors_text_field_on_job_table/migration.sql b/core/prisma/migrations/20230504031258_errors_text_field_on_job_table/migration.sql new file mode 100644 index 000000000..1b6c628c2 --- /dev/null +++ b/core/prisma/migrations/20230504031258_errors_text_field_on_job_table/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "job" ADD COLUMN "errors_text" TEXT; diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index 233612439..99bc7047f 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -135,10 +135,10 @@ model FilePath { // the path of the file relative to its location materialized_path String + // the name and extension - // Must have 'COLLATE NOCASE' in migration - name String - extension String + name String + extension String // Extension MUST have 'COLLATE NOCASE' in migration size_in_bytes String @default("0") @@ -149,9 +149,7 @@ model FilePath { object_id Int? object Object? @relation(fields: [object_id], references: [id], onDelete: Restrict) - // the parent in the file tree - parent_id Bytes? - key_id Int? // replacement for encryption + key_id Int? // replacement for encryption // permissions String? date_created DateTime @default(now()) @@ -167,6 +165,7 @@ model FilePath { @@unique([location_id, materialized_path, name, extension]) @@unique([location_id, inode, device]) @@index([location_id]) + @@index([location_id, materialized_path]) @@map("file_path") } @@ -378,6 +377,9 @@ model Job { // Enum: sd_core::job::job_manager:JobStatus status Int @default(0) // 0 = Queued + // List of errors, separated by "\n\n" in case of failed jobs or completed with errors + errors_text String? + data Bytes? // Serialized data to be used on pause/resume metadata Bytes? // Serialized metadata field with info about the job after completion diff --git a/core/src/api/files.rs b/core/src/api/files.rs index 1035d8016..860ee444b 100644 --- a/core/src/api/files.rs +++ b/core/src/api/files.rs @@ -1,7 +1,7 @@ use crate::{ api::utils::library, invalidate_query, - location::{file_path_helper::MaterializedPath, find_location, LocationError}, + location::{file_path_helper::IsolatedFilePathData, find_location, LocationError}, object::fs::{ copy::FileCopierJobInit, cut::FileCutterJobInit, decrypt::FileDecryptorJobInit, delete::FileDeleterJobInit, encrypt::FileEncryptorJobInit, erase::FileEraserJobInit, @@ -250,8 +250,14 @@ pub(crate) fn mount() -> AlphaRouter { let location_path = Path::new(&location.path); fs::rename( - location_path.join(&MaterializedPath::from((location_id, &file_name))), - location_path.join(&MaterializedPath::from((location_id, &new_file_name))), + location_path.join(IsolatedFilePathData::from_relative_str( + location_id, + &file_name, + )), + location_path.join(IsolatedFilePathData::from_relative_str( + location_id, + &new_file_name, + )), ) .await .map_err(|e| { diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index d4d7ac2f8..fa7e32f21 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -60,7 +60,6 @@ pub(crate) fn mount() -> AlphaRouter { .spawn_job(ThumbnailerJobInit { location, sub_path: Some(args.path), - background: false, }) .await .map_err(Into::into) diff --git a/core/src/api/search.rs b/core/src/api/search.rs index e388d12fa..90307a02f 100644 --- a/core/src/api/search.rs +++ b/core/src/api/search.rs @@ -1,4 +1,5 @@ -use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; +use crate::location::file_path_helper::{check_file_path_exists, IsolatedFilePathData}; +use std::collections::BTreeSet; use chrono::{DateTime, Utc}; use prisma_client_rust::{operator::or, Direction}; @@ -77,7 +78,7 @@ pub fn mount() -> AlphaRouter { #[specta(optional)] extension: Option, #[serde(default)] - kind: Vec, + kind: BTreeSet, #[serde(default)] tags: Vec, #[serde(default)] @@ -103,39 +104,30 @@ pub fn mount() -> AlphaRouter { None }; - let directory_id = if let Some(mut path) = args.path.clone() { - if !path.ends_with(MAIN_SEPARATOR) { - path += MAIN_SEPARATOR_STR; - } - - Some( - db.file_path() - .find_first(chain_optional_iter( - [ - file_path::materialized_path::equals(path), - file_path::is_dir::equals(true), - ], - [location.map(|l| file_path::location_id::equals(l.id))], - )) - .select(file_path::select!({ pub_id })) - .exec() + let directory_materialized_path_str = match (args.path, location) { + (Some(path), Some(location)) if !path.is_empty() && path != "/" => { + let parent_iso_file_path = + IsolatedFilePathData::from_relative_str(location.id, &path); + if !check_file_path_exists::(&parent_iso_file_path, db) .await? - .ok_or_else(|| { - rspc::Error::new( - ErrorCode::NotFound, - "Directory not found".into(), - ) - })? - .pub_id, - ) - } else { - None + { + return Err(rspc::Error::new( + ErrorCode::NotFound, + "Directory not found".into(), + )); + } + + parent_iso_file_path.materialized_path_for_children() + } + (Some(_empty), _) => Some("/".into()), + _ => None, }; let object_params = chain_optional_iter( [], [ - (!args.kind.is_empty()).then(|| object::kind::in_vec(args.kind)), + (!args.kind.is_empty()) + .then(|| object::kind::in_vec(args.kind.into_iter().collect())), (!args.tags.is_empty()).then(|| { let tags = args.tags.into_iter().map(tag::id::equals).collect(); let tags_on_object = tag_on_object::tag::is(vec![or(tags)]); @@ -149,7 +141,7 @@ pub fn mount() -> AlphaRouter { args.search .split(' ') .map(str::to_string) - .map(file_path::materialized_path::contains), + .map(file_path::name::contains), [ args.location_id.map(file_path::location_id::equals), args.extension.map(file_path::extension::equals), @@ -159,8 +151,8 @@ pub fn mount() -> AlphaRouter { args.created_at .to .map(|v| file_path::date_created::lte(v.into())), - args.path.map(file_path::materialized_path::starts_with), - directory_id.map(Some).map(file_path::parent_id::equals), + directory_materialized_path_str + .map(file_path::materialized_path::equals), (!object_params.is_empty()) .then(|| file_path::object::is(object_params)), ], @@ -203,7 +195,7 @@ pub fn mount() -> AlphaRouter { library .thumbnail_exists(cas_id) .await - .map_err(LocationError::IOError)? + .map_err(LocationError::from)? } else { false }; diff --git a/core/src/custom_uri.rs b/core/src/custom_uri.rs index fb29f03e1..3136e2a3a 100644 --- a/core/src/custom_uri.rs +++ b/core/src/custom_uri.rs @@ -1,4 +1,9 @@ -use crate::{location::file_path_helper::MaterializedPath, prisma::file_path, Node}; +use crate::{ + location::file_path_helper::{file_path_to_handle_custom_uri, IsolatedFilePathData}, + prisma::file_path, + util::error::FileIOError, + Node, +}; use std::{ io, @@ -106,15 +111,19 @@ async fn handle_thumbnail( .join(file_cas_id) .with_extension("webp"); - let file = File::open(filename).await.map_err(|err| { + let file = File::open(&filename).await.map_err(|err| { if err.kind() == io::ErrorKind::NotFound { HandleCustomUriError::NotFound("file") } else { - err.into() + FileIOError::from((&filename, err)).into() } })?; - let content_lenght = file.metadata().await?.len(); + let content_lenght = file + .metadata() + .await + .map_err(|e| FileIOError::from((&filename, e)))? + .len(); Ok(builder .header("Content-Type", "image/webp") @@ -123,7 +132,9 @@ async fn handle_thumbnail( .body(if method == Method::HEAD { vec![] } else { - read_file(file, content_lenght, None).await? + read_file(file, content_lenght, None) + .await + .map_err(|e| FileIOError::from((&filename, e)))? })?) } @@ -161,7 +172,7 @@ async fn handle_file( let lru_cache_key = (library_id, file_path_id); - let (file_path_materialized_path, extension) = + let (file_path_full_path, extension) = if let Some(entry) = FILE_METADATA_CACHE.get(&lru_cache_key) { entry } else { @@ -175,16 +186,14 @@ async fn handle_file( .db .file_path() .find_unique(file_path::id::equals(file_path_id)) - .include(file_path::include!({ location })) + .select(file_path_to_handle_custom_uri::select()) .exec() .await? .ok_or_else(|| HandleCustomUriError::NotFound("object"))?; let lru_entry = ( - Path::new(&file_path.location.path).join(&MaterializedPath::from(( - location_id, - &file_path.materialized_path, - ))), + Path::new(&file_path.location.path) + .join(IsolatedFilePathData::from((location_id, &file_path))), file_path.extension, ); FILE_METADATA_CACHE.insert(lru_cache_key, lru_entry.clone()); @@ -192,15 +201,13 @@ async fn handle_file( lru_entry }; - let file = File::open(file_path_materialized_path) - .await - .map_err(|err| { - if err.kind() == io::ErrorKind::NotFound { - HandleCustomUriError::NotFound("file") - } else { - err.into() - } - })?; + let file = File::open(&file_path_full_path).await.map_err(|err| { + if err.kind() == io::ErrorKind::NotFound { + HandleCustomUriError::NotFound("file") + } else { + FileIOError::from((&file_path_full_path, err)).into() + } + })?; // TODO: This should be determined from magic bytes when the file is indexed and stored it in the DB on the file path // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types @@ -266,7 +273,12 @@ async fn handle_file( } }; - let mut content_lenght = file.metadata().await?.len(); + let mut content_lenght = file + .metadata() + .await + .map_err(|e| FileIOError::from((&file_path_full_path, e)))? + .len(); + // GET is the only method for which range handling is defined, according to the spec // https://httpwg.org/specs/rfc9110.html#field.range let range = if method == Method::GET { @@ -330,10 +342,14 @@ async fn handle_file( // FIXME: Add ETag support (caching on the webview) - read_file(file, content_lenght, Some(range.start)).await? + read_file(file, content_lenght, Some(range.start)) + .await + .map_err(|e| FileIOError::from((&file_path_full_path, e)))? } _ if method == Method::HEAD => vec![], - _ => read_file(file, content_lenght, None).await?, + _ => read_file(file, content_lenght, None) + .await + .map_err(|e| FileIOError::from((&file_path_full_path, e)))?, }; Ok(builder @@ -360,7 +376,7 @@ pub enum HandleCustomUriError { #[error("error creating http request/response: {0}")] Http(#[from] httpz::http::Error), #[error("io error: {0}")] - Io(#[from] io::Error), + FileIO(#[from] FileIOError), #[error("query error: {0}")] QueryError(#[from] QueryError), #[error("{0}")] @@ -377,19 +393,19 @@ impl From for Response> { (match value { HandleCustomUriError::Http(err) => { - error!("Error creating http request/response: {}", err); + error!("Error creating http request/response: {:#?}", err); builder .status(StatusCode::INTERNAL_SERVER_ERROR) .body(b"Internal Server Error".to_vec()) } - HandleCustomUriError::Io(err) => { - error!("IO error: {}", err); + HandleCustomUriError::FileIO(err) => { + error!("IO error: {:#?}", err); builder .status(StatusCode::INTERNAL_SERVER_ERROR) .body(b"Internal Server Error".to_vec()) } HandleCustomUriError::QueryError(err) => { - error!("Query error: {}", err); + error!("Query error: {:#?}", err); builder .status(StatusCode::INTERNAL_SERVER_ERROR) .body(b"Internal Server Error".to_vec()) diff --git a/core/src/job/job_manager.rs b/core/src/job/job_manager.rs index 3b8bb8802..19219c55f 100644 --- a/core/src/job/job_manager.rs +++ b/core/src/job/job_manager.rs @@ -321,6 +321,7 @@ pub struct JobReport { pub data: Option>, pub metadata: Option, pub is_background: bool, + pub errors_text: Vec, pub created_at: Option>, pub started_at: Option>, @@ -361,6 +362,10 @@ impl From for JobReport { None }) }), + errors_text: data + .errors_text + .map(|errors_str| errors_str.split("\n\n").map(str::to_string).collect()) + .unwrap_or_default(), created_at: Some(data.date_created.into()), started_at: data.date_started.map(|d| d.into()), completed_at: data.date_completed.map(|d| d.into()), @@ -386,6 +391,7 @@ impl JobReport { started_at: None, completed_at: None, status: JobStatus::Queued, + errors_text: vec![], task_count: 0, data: None, metadata: None, @@ -449,6 +455,9 @@ impl JobReport { job::id::equals(self.id.as_bytes().to_vec()), vec![ job::status::set(self.status as i32), + job::errors_text::set( + (!self.errors_text.is_empty()).then(|| self.errors_text.join("\n\n")), + ), job::data::set(self.data.clone()), job::metadata::set(serde_json::to_vec(&self.metadata).ok()), job::task_count::set(self.task_count), @@ -472,6 +481,7 @@ pub enum JobStatus { Canceled = 3, Failed = 4, Paused = 5, + CompletedWithErrors = 6, } impl TryFrom for JobStatus { @@ -485,6 +495,7 @@ impl TryFrom for JobStatus { 3 => Self::Canceled, 4 => Self::Failed, 5 => Self::Paused, + 6 => Self::CompletedWithErrors, _ => return Err(JobError::InvalidJobStatusInt(value)), }; diff --git a/core/src/job/mod.rs b/core/src/job/mod.rs index f7e7e5609..84a83da35 100644 --- a/core/src/job/mod.rs +++ b/core/src/job/mod.rs @@ -2,6 +2,7 @@ use crate::{ library::Library, location::indexer::IndexerError, object::{file_identifier::FileIdentifierJobError, preview::ThumbnailerError}, + util::error::FileIOError, }; use std::{ @@ -17,7 +18,7 @@ use rmp_serde::{decode::Error as DecodeError, encode::Error as EncodeError}; use sd_crypto::Error as CryptoError; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use thiserror::Error; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use uuid::Uuid; mod job_manager; @@ -29,10 +30,8 @@ pub use worker::*; #[derive(Error, Debug)] pub enum JobError { // General errors - #[error("Database error: {0}")] + #[error("database error")] DatabaseError(#[from] prisma_client_rust::QueryError), - #[error("I/O error: {0}")] - IOError(#[from] std::io::Error), #[error("Failed to join Tokio spawn blocking: {0}")] JoinTaskError(#[from] tokio::task::JoinError), #[error("Job state encode error: {0}")] @@ -57,6 +56,8 @@ pub enum JobError { Path, #[error("invalid job status integer")] InvalidJobStatusInt(i32), + #[error(transparent)] + FileIO(#[from] FileIOError), // Specific job errors #[error("Indexer error: {0}")] @@ -73,16 +74,19 @@ pub enum JobError { WouldOverwrite(PathBuf), // Not errors - #[error("Job had a early finish: ")] + #[error("step completed with errors")] + StepCompletedWithErrors(JobRunErrors), + #[error("job had a early finish: ")] EarlyFinish { name: String, reason: String }, - #[error("Data needed for job execution not found: job ")] + #[error("data needed for job execution not found: job ")] JobDataNotFound(String), - #[error("Job paused")] + #[error("job paused")] Paused(Vec), } pub type JobResult = Result; pub type JobMetadata = Option; +pub type JobRunErrors = Vec; /// `JobInitData` is a trait to represent the data being passed to initialize a `Job` pub trait JobInitData: Serialize + DeserializeOwned + Send + Sync + Hash { @@ -131,7 +135,11 @@ pub trait DynJob: Send + Sync { fn report(&self) -> &Option; fn report_mut(&mut self) -> &mut Option; fn name(&self) -> &'static str; - async fn run(&mut self, job_manager: Arc, ctx: WorkerContext) -> JobResult; + async fn run( + &mut self, + job_manager: Arc, + ctx: WorkerContext, + ) -> Result<(JobMetadata, JobRunErrors), JobError>; fn hash(&self) -> u64; fn set_next_jobs(&mut self, next_jobs: VecDeque>); fn serialize_state(&self) -> Result, JobError>; @@ -306,17 +314,25 @@ impl DynJob for Job { ::NAME } - async fn run(&mut self, job_manager: Arc, ctx: WorkerContext) -> JobResult { + async fn run( + &mut self, + job_manager: Arc, + ctx: WorkerContext, + ) -> Result<(JobMetadata, JobRunErrors), JobError> { let mut job_should_run = true; + let mut errors = vec![]; + // Checking if we have a brand new job, or if we are resuming an old one. if self.state.data.is_none() { if let Err(e) = self.stateful_job.init(ctx.clone(), &mut self.state).await { - if matches!(e, JobError::EarlyFinish { .. }) { - info!("{e}"); - job_should_run = false; - } else { - return Err(e); + match e { + JobError::EarlyFinish { .. } => { + info!("{e}"); + job_should_run = false; + } + JobError::StepCompletedWithErrors(errors_text) => errors.extend(errors_text), + other => return Err(other), } } } @@ -329,12 +345,18 @@ impl DynJob for Job { ctx.clone(), &mut self.state, ) => { - if matches!(step_result, Err(JobError::EarlyFinish { .. })) { - info!("{}", step_result.unwrap_err()); - break; - } else { - step_result?; - }; + match step_result { + Err(JobError::EarlyFinish { .. }) => { + info!("{}", step_result.unwrap_err()); + break; + }, + Err(JobError::StepCompletedWithErrors(errors_text)) => { + warn!("Job had a step with errors", self.id); + errors.extend(errors_text); + }, + maybe_err => maybe_err? + } + self.state.steps.pop_front(); } _ = shutdown_rx.recv() => { @@ -368,7 +390,7 @@ impl DynJob for Job { } } - Ok(metadata) + Ok((metadata, errors)) } fn hash(&self) -> u64 { diff --git a/core/src/job/worker.rs b/core/src/job/worker.rs index b3a130322..469e77b04 100644 --- a/core/src/job/worker.rs +++ b/core/src/job/worker.rs @@ -14,7 +14,7 @@ use tokio::{ }; use tracing::{error, info, warn}; -use super::{JobMetadata, JobReport}; +use super::{JobMetadata, JobReport, JobRunErrors}; const JOB_REPORT_UPDATE_INTERVAL: Duration = Duration::from_millis(1000 / 60); @@ -23,6 +23,7 @@ const JOB_REPORT_UPDATE_INTERVAL: Duration = Duration::from_millis(1000 / 60); pub enum WorkerEvent { Progressed(Vec), Completed(oneshot::Sender<()>, JobMetadata), + CompletedWithErrors(oneshot::Sender<()>, JobMetadata, JobRunErrors), Failed(oneshot::Sender<()>), Paused(Vec, oneshot::Sender<()>), } @@ -143,15 +144,21 @@ impl Worker { let (done_tx, done_rx) = oneshot::channel(); match job.run(job_manager.clone(), worker_ctx.clone()).await { - Ok(metadata) => { - // handle completion + Ok((metadata, errors)) if errors.is_empty() => { worker_ctx .events_tx .send(WorkerEvent::Completed(done_tx, metadata)) .expect("critical error: failed to send worker complete event"); } + Ok((metadata, errors)) => { + warn!("Job completed with errors"); + worker_ctx + .events_tx + .send(WorkerEvent::CompletedWithErrors(done_tx, metadata, errors)) + .expect("critical error: failed to send worker complete event"); + } Err(JobError::Paused(state)) => { - info!("Job paused, we will pause all children jobs"); + info!("Job paused, we will pause all children jobs"); if let Err(e) = job.pause_children(&library).await { error!("Failed to pause children jobs: {e:#?}"); } @@ -162,7 +169,7 @@ impl Worker { .expect("critical error: failed to send worker pause event"); } Err(e) => { - error!("Job failed with error: {e:#?}; We will cancel all children jobs"); + error!("Job failed with error: {e:#?}; We will cancel all children jobs"); if let Err(e) = job.cancel_children(&library).await { error!("Failed to cancel children jobs: {e:#?}"); } @@ -233,6 +240,27 @@ impl Worker { break; } + WorkerEvent::CompletedWithErrors(done_tx, metadata, errors) => { + worker.report.status = JobStatus::CompletedWithErrors; + worker.report.errors_text = errors; + worker.report.data = None; + worker.report.metadata = metadata; + worker.report.completed_at = Some(Utc::now()); + if let Err(e) = worker.report.update(&library).await { + error!("failed to update job report: {:#?}", e); + } + + invalidate_query!(library, "jobs.getRunning"); + invalidate_query!(library, "jobs.getHistory"); + + info!("{}", worker.report); + + done_tx + .send(()) + .expect("critical error: failed to send worker completion"); + + break; + } WorkerEvent::Failed(done_tx) => { worker.report.status = JobStatus::Failed; worker.report.data = None; diff --git a/core/src/lib.rs b/core/src/lib.rs index 61e3b3f1f..be3bb73ae 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -130,7 +130,9 @@ impl Node { .init(); let event_bus = broadcast::channel(1024); - let config = NodeConfigManager::new(data_dir.to_path_buf()).await?; + let config = NodeConfigManager::new(data_dir.to_path_buf()) + .await + .map_err(NodeError::FailedToInitializeConfig)?; let jobs = JobManager::new(); let location_manager = LocationManager::new(); @@ -216,13 +218,11 @@ impl Node { /// Error type for Node related errors. #[derive(Error, Debug)] pub enum NodeError { - #[error("Failed to create data directory: {0}")] - FailedToCreateDataDirectory(#[from] std::io::Error), - #[error("Failed to initialize config: {0}")] - FailedToInitializeConfig(#[from] util::migrator::MigratorError), - #[error("Failed to initialize library manager: {0}")] + #[error("failed to initialize config")] + FailedToInitializeConfig(util::migrator::MigratorError), + #[error("failed to initialize library manager")] FailedToInitializeLibraryManager(#[from] library::LibraryManagerError), - #[error("Location manager error: {0}")] + #[error(transparent)] LocationManager(#[from] LocationManagerError), #[error("invalid platform integer")] InvalidPlatformInt(i32), diff --git a/core/src/library/library.rs b/core/src/library/library.rs index c4eb5fc3d..01141ccf0 100644 --- a/core/src/library/library.rs +++ b/core/src/library/library.rs @@ -1,11 +1,15 @@ use crate::{ api::CoreEvent, job::{IntoJob, JobInitData, JobManagerError, StatefulJob}, - location::{file_path_helper::MaterializedPath, LocationManager}, + location::{ + file_path_helper::{file_path_to_full_path, IsolatedFilePathData}, + LocationManager, + }, node::NodeConfigManager, object::{orphan_remover::OrphanRemoverActor, preview::get_thumbnail_path}, prisma::{file_path, location, PrismaClient}, sync::SyncManager, + util::error::FileIOError, NodeContext, }; @@ -16,6 +20,7 @@ use std::{ }; use sd_crypto::keys::keymanager::KeyManager; +use tokio::{fs, io}; use tracing::warn; use uuid::Uuid; @@ -85,13 +90,13 @@ impl Library { &self.node_context.location_manager } - pub async fn thumbnail_exists(&self, cas_id: &str) -> tokio::io::Result { + pub async fn thumbnail_exists(&self, cas_id: &str) -> Result { let thumb_path = get_thumbnail_path(self, cas_id); - match tokio::fs::metadata(thumb_path).await { + match fs::metadata(&thumb_path).await { Ok(_) => Ok(true), - Err(e) if e.kind() == tokio::io::ErrorKind::NotFound => Ok(false), - Err(e) => Err(e), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(FileIOError::from((thumb_path, e))), } } @@ -104,20 +109,12 @@ impl Library { file_path::location::is(vec![location::node_id::equals(self.node_local_id)]), file_path::id::equals(id), ]) - .select(file_path::select!({ - materialized_path - location: select { - id - path - } - })) + .select(file_path_to_full_path::select()) .exec() .await? .map(|record| { - Path::new(&record.location.path).join(&MaterializedPath::from(( - record.location.id, - &record.materialized_path, - ))) + Path::new(&record.location.path) + .join(IsolatedFilePathData::from((record.location.id, &record))) })) } } diff --git a/core/src/library/manager.rs b/core/src/library/manager.rs index 6e9c2b1f0..9b14356b6 100644 --- a/core/src/library/manager.rs +++ b/core/src/library/manager.rs @@ -6,6 +6,8 @@ use crate::{ sync::{SyncManager, SyncMessage}, util::{ db::load_and_migrate, + error::{FileIOError, NonUtf8PathError}, + migrator::MigratorError, seeder::{indexer_rules_seeder, SeederError}, }, NodeContext, @@ -15,15 +17,17 @@ use sd_crypto::{ keys::keymanager::{KeyManager, StoredKey}, types::{EncryptedKey, Nonce, Salt}, }; + use std::{ - env, fs, io, + env, path::{Path, PathBuf}, str::FromStr, sync::Arc, }; + use thiserror::Error; -use tokio::sync::RwLock; -use tracing::{debug, error}; +use tokio::{fs, io, sync::RwLock, try_join}; +use tracing::{debug, error, warn}; use uuid::Uuid; use super::{Library, LibraryConfig, LibraryConfigWrapped}; @@ -40,28 +44,28 @@ pub struct LibraryManager { #[derive(Error, Debug)] pub enum LibraryManagerError { - #[error("error saving or loading the config from the filesystem")] - IO(#[from] io::Error), + #[error(transparent)] + FileIO(#[from] FileIOError), #[error("error serializing or deserializing the JSON in the config file")] Json(#[from] serde_json::Error), - #[error("Database error: {0}")] + #[error("database error")] Database(#[from] prisma_client_rust::QueryError), - #[error("Library not found error")] + #[error("library not found error")] LibraryNotFound, #[error("error migrating the config file")] Migration(String), #[error("failed to parse uuid")] Uuid(#[from] uuid::Error), - #[error("error opening database as the path contains non-UTF-8 characters")] - InvalidDatabasePath(PathBuf), - #[error("Failed to run seeder: {0}")] + #[error("failed to run seeder")] Seeder(#[from] SeederError), #[error("failed to initialise the key manager")] KeyManager(#[from] sd_crypto::Error), - #[error("failed to run library migrations: {0}")] - MigratorError(#[from] crate::util::migrator::MigratorError), + #[error("failed to run library migrations")] + MigratorError(#[from] MigratorError), #[error("invalid library configuration: {0}")] InvalidConfig(String), + #[error(transparent)] + NonUtf8Path(#[from] NonUtf8PathError), } impl From for rspc::Error { @@ -132,42 +136,56 @@ impl LibraryManager { libraries_dir: PathBuf, node_context: NodeContext, ) -> Result, LibraryManagerError> { - fs::create_dir_all(&libraries_dir)?; + fs::create_dir_all(&libraries_dir) + .await + .map_err(|e| FileIOError::from((&libraries_dir, e)))?; let mut libraries = Vec::new(); - for entry in fs::read_dir(&libraries_dir)? - .filter_map(|entry| entry.ok()) - .filter(|entry| { - entry.path().is_file() - && entry - .path() - .extension() - .map(|v| v == "sdlibrary") - .unwrap_or(false) - }) { - let config_path = entry.path(); - let library_id = match Path::new(&config_path) - .file_stem() - .map(|v| v.to_str().map(Uuid::from_str)) + let mut read_dir = fs::read_dir(&libraries_dir) + .await + .map_err(|e| FileIOError::from((&libraries_dir, e)))?; + + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| FileIOError::from((&libraries_dir, e)))? + { + let entry_path = entry.path(); + let metadata = entry + .metadata() + .await + .map_err(|e| FileIOError::from((&entry_path, e)))?; + if metadata.is_file() + && entry_path + .extension() + .map(|ext| ext == "sdlibrary") + .unwrap_or(false) { - Some(Some(Ok(id))) => id, - _ => { - println!("Attempted to load library from path '{}' but it has an invalid filename. Skipping...", config_path.display()); + let Some(Ok(library_id)) = entry_path + .file_stem() + .and_then(|v| v.to_str().map(Uuid::from_str)) + else { + warn!("Attempted to load library from path '{}' but it has an invalid filename. Skipping...", entry_path.display()); continue; - } }; - let db_path = config_path.clone().with_extension("db"); - if !db_path.try_exists().unwrap() { - println!( + let db_path = entry_path.with_extension("db"); + match fs::metadata(&db_path).await { + Ok(_) => {} + Err(e) if e.kind() == io::ErrorKind::NotFound => { + warn!( "Found library '{}' but no matching database file was found. Skipping...", - config_path.display() - ); - continue; - } + entry_path.display() + ); + continue; + } + Err(e) => return Err(FileIOError::from((db_path, e)).into()), + } - let config = LibraryConfig::read(config_path)?; - libraries.push(Self::load(library_id, &db_path, config, node_context.clone()).await?); + let config = LibraryConfig::read(entry_path)?; + libraries + .push(Self::load(library_id, &db_path, config, node_context.clone()).await?); + } } let this = Arc::new(Self { @@ -302,8 +320,21 @@ impl LibraryManager { .find(|l| l.id == id) .ok_or(LibraryManagerError::LibraryNotFound)?; - fs::remove_file(Path::new(&self.libraries_dir).join(format!("{}.db", library.id)))?; - fs::remove_file(Path::new(&self.libraries_dir).join(format!("{}.sdlibrary", library.id)))?; + let db_path = self.libraries_dir.join(format!("{}.db", library.id)); + let sd_lib_path = self.libraries_dir.join(format!("{}.sdlibrary", library.id)); + + try_join!( + async { + fs::remove_file(&db_path) + .await + .map_err(|e| LibraryManagerError::FileIO(FileIOError::from((db_path, e)))) + }, + async { + fs::remove_file(&sd_lib_path) + .await + .map_err(|e| LibraryManagerError::FileIO(FileIOError::from((sd_lib_path, e)))) + }, + )?; invalidate_query!(library, "library.list"); @@ -334,7 +365,7 @@ impl LibraryManager { load_and_migrate(&format!( "file:{}", db_path.as_os_str().to_str().ok_or_else(|| { - LibraryManagerError::InvalidDatabasePath(db_path.to_path_buf()) + LibraryManagerError::NonUtf8Path(NonUtf8PathError(db_path.into())) })? )) .await diff --git a/core/src/location/error.rs b/core/src/location/error.rs index 0c701fd1d..7baa80024 100644 --- a/core/src/location/error.rs +++ b/core/src/location/error.rs @@ -1,8 +1,9 @@ +use crate::util::error::FileIOError; + use std::path::PathBuf; use rspc::{self, ErrorCode}; use thiserror::Error; -use tokio::io; use uuid::Uuid; use super::{ @@ -13,20 +14,20 @@ use super::{ #[derive(Error, Debug)] pub enum LocationError { // Not Found errors - #[error("Location not found (path: {})", .0.display())] + #[error("location not found ", .0.display())] PathNotFound(PathBuf), - #[error("Location not found (uuid: {0})")] + #[error("location not found ")] UuidNotFound(Uuid), - #[error("Location not found (id: {0})")] + #[error("location not found ")] IdNotFound(i32), // User errors - #[error("Location not a directory (path: {})", .0.display())] + #[error("location not a directory ", .0.display())] NotDirectory(PathBuf), - #[error("Could not find directory in Location (path: {})", .0.display())] + #[error("could not find directory in location ", .0.display())] DirectoryNotFound(PathBuf), #[error( - "Library exists in the location metadata file, must relink: (old_path: {}, new_path: {})", + "library exists in the location metadata file, must relink ", .old_path.display(), .new_path.display(), )] @@ -35,36 +36,36 @@ pub enum LocationError { new_path: PathBuf, }, #[error( - "This location belongs to another library, must update .spacedrive file: (path: {})", + "this location belongs to another library, must update .spacedrive file ", .0.display() )] AddLibraryToMetadata(PathBuf), - #[error("Location metadata file not found: (path: {})", .0.display())] + #[error("location metadata file not found ", .0.display())] MetadataNotFound(PathBuf), - #[error("Location already exists in database (path: {})", .0.display())] + #[error("location already exists in database ", .0.display())] LocationAlreadyExists(PathBuf), - #[error("Nested location currently not supported (path: {})", .0.display())] + #[error("nested location currently not supported ", .0.display())] NestedLocation(PathBuf), // Internal Errors - #[error("Location metadata error (error: {0:?})")] + #[error(transparent)] LocationMetadataError(#[from] LocationMetadataError), - #[error("Failed to read location path metadata info (path: {}); (error: {0:?})", .1.display())] - LocationPathFilesystemMetadataAccess(io::Error, PathBuf), - #[error("Missing metadata file for location (path: {})", .0.display())] + #[error("failed to read location path metadata info")] + LocationPathFilesystemMetadataAccess(FileIOError), + #[error("missing metadata file for location ", .0.display())] MissingMetadataFile(PathBuf), - #[error("Failed to open file from local os (error: {0:?})")] - FileReadError(io::Error), - #[error("Failed to read mounted volumes from local os (error: {0:?})")] + #[error("failed to open file from local OS")] + FileReadError(FileIOError), + #[error("failed to read mounted volumes from local OS")] VolumeReadError(String), - #[error("Failed to connect to database (error: {0:?})")] - IOError(io::Error), - #[error("Database error (error: {0:?})")] + #[error("database error")] DatabaseError(#[from] prisma_client_rust::QueryError), - #[error("Location manager error (error: {0:?})")] + #[error(transparent)] LocationManagerError(#[from] LocationManagerError), - #[error("File path related error (error: {0})")] + #[error(transparent)] FilePathError(#[from] FilePathError), + #[error(transparent)] + FileIO(#[from] FileIOError), } impl From for rspc::Error { diff --git a/core/src/location/file_path_helper.rs b/core/src/location/file_path_helper.rs deleted file mode 100644 index 830ab489e..000000000 --- a/core/src/location/file_path_helper.rs +++ /dev/null @@ -1,680 +0,0 @@ -use crate::{ - prisma::{file_path, location, PrismaClient}, - util::db::uuid_to_bytes, -}; - -use std::{ - borrow::Cow, - fmt::{Display, Formatter}, - fs::Metadata, - path::{Path, PathBuf, MAIN_SEPARATOR, MAIN_SEPARATOR_STR}, - time::SystemTime, -}; - -use chrono::{DateTime, Utc}; -use futures::future::try_join_all; -use prisma_client_rust::QueryError; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tokio::{fs, io}; -use tracing::error; -use uuid::Uuid; - -use super::LocationId; - -// File Path selectables! -file_path::select!(file_path_just_id_materialized_path { - pub_id - materialized_path -}); -file_path::select!(file_path_for_file_identifier { - id - pub_id - materialized_path - date_created -}); -file_path::select!(file_path_for_object_validator { - pub_id - materialized_path - integrity_checksum - location: select { - id - pub_id - } -}); -file_path::select!(file_path_just_materialized_path_cas_id { - materialized_path - cas_id -}); - -// File Path includes! -file_path::include!(file_path_with_object { object }); - -#[derive(Clone, Copy, Debug, Serialize, Deserialize)] -pub struct FilePathMetadata { - pub inode: u64, - pub device: u64, - pub size_in_bytes: u64, - pub created_at: DateTime, - pub modified_at: DateTime, -} - -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct MaterializedPath<'a> { - pub(super) materialized_path: Cow<'a, str>, - pub(super) is_dir: bool, - pub(super) location_id: LocationId, - pub(super) name: Cow<'a, str>, - pub(super) extension: Cow<'a, str>, -} - -impl MaterializedPath<'static> { - pub fn new( - location_id: LocationId, - location_path: impl AsRef, - full_path: impl AsRef, - is_dir: bool, - ) -> Result { - let full_path = full_path.as_ref(); - let mut materialized_path = format!( - "{MAIN_SEPARATOR_STR}{}", - extract_materialized_path(location_id, location_path, full_path)? - .to_str() - .expect("Found non-UTF-8 path") - ); - - if is_dir && !materialized_path.ends_with(MAIN_SEPARATOR) { - materialized_path += MAIN_SEPARATOR_STR; - } - - let extension = if !is_dir { - let extension = full_path - .extension() - .unwrap_or_default() - .to_str() - .unwrap_or_default(); - - #[cfg(debug_assertions)] - { - // In dev mode, we lowercase the extension as we don't use the SQL migration, - // and using prisma.schema directly we can't set `COLLATE NOCASE` in the - // `extension` column at `file_path` table - extension.to_lowercase() - } - #[cfg(not(debug_assertions))] - { - extension.to_string() - } - } else { - String::new() - }; - - Ok(Self { - materialized_path: Cow::Owned(materialized_path), - is_dir, - location_id, - name: Cow::Owned(Self::prepare_name(full_path).to_string()), - extension: Cow::Owned(extension), - }) - } -} - -impl<'a> MaterializedPath<'a> { - pub fn location_id(&self) -> LocationId { - self.location_id - } - - fn prepare_name(path: &Path) -> &str { - // Not using `impl AsRef` here because it's an private method - path.file_stem() - .unwrap_or_default() - .to_str() - .unwrap_or_default() - } - - pub fn parent(&self) -> Self { - let parent_path = Path::new(self.materialized_path.as_ref()) - .parent() - .unwrap_or_else(|| Path::new(MAIN_SEPARATOR_STR)); - - let mut parent_path_str = parent_path - .to_str() - .unwrap() // SAFETY: This unwrap is ok because this path was a valid UTF-8 String before - .to_string(); - - if !parent_path_str.ends_with(MAIN_SEPARATOR) { - parent_path_str += MAIN_SEPARATOR_STR; - } - - Self { - materialized_path: Cow::Owned(parent_path_str), - is_dir: true, - location_id: self.location_id, - // NOTE: This way we don't use the same name for "/" `file_path`, that uses the location - // name in the database, check later if this is a problem - name: Cow::Owned(Self::prepare_name(parent_path).to_string()), - extension: Cow::Owned(String::new()), - } - } -} - -impl<'a, S: AsRef + 'a> From<(LocationId, &'a S)> for MaterializedPath<'a> { - fn from((location_id, materialized_path): (LocationId, &'a S)) -> Self { - let materialized_path = materialized_path.as_ref(); - let is_dir = materialized_path.ends_with(MAIN_SEPARATOR); - let length = materialized_path.len(); - - let (name, extension) = if length == 1 { - // The case for the root path - (materialized_path, "") - } else if is_dir { - let first_name_char = materialized_path[..(length - 1)] - .rfind(MAIN_SEPARATOR) - .unwrap_or(0) + 1; - (&materialized_path[first_name_char..(length - 1)], "") - } else { - let first_name_char = materialized_path.rfind(MAIN_SEPARATOR).unwrap_or(0) + 1; - if let Some(last_dot_relative_idx) = materialized_path[first_name_char..].rfind('.') { - let last_dot_idx = first_name_char + last_dot_relative_idx; - ( - &materialized_path[first_name_char..last_dot_idx], - &materialized_path[last_dot_idx + 1..], - ) - } else { - (&materialized_path[first_name_char..], "") - } - }; - - Self { - materialized_path: Cow::Borrowed(materialized_path), - location_id, - is_dir, - name: Cow::Borrowed(name), - extension: Cow::Borrowed(extension), - } - } -} - -impl From> for String { - fn from(path: MaterializedPath) -> Self { - path.materialized_path.into_owned() - } -} - -impl From<&MaterializedPath<'_>> for String { - fn from(path: &MaterializedPath) -> Self { - path.materialized_path.to_string() - } -} - -impl AsRef for MaterializedPath<'_> { - fn as_ref(&self) -> &str { - self.materialized_path.as_ref() - } -} - -impl AsRef for &MaterializedPath<'_> { - fn as_ref(&self) -> &Path { - // Skipping / because it's not a valid path to be joined - Path::new(&self.materialized_path[1..]) - } -} - -impl Display for MaterializedPath<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.materialized_path) - } -} - -#[derive(Error, Debug)] -pub enum FilePathError { - #[error("File Path not found: ")] - NotFound(PathBuf), - #[error("Received an invalid sub path: ")] - InvalidSubPath { - location_path: PathBuf, - sub_path: PathBuf, - }, - #[error("Sub path is not a directory: {0}")] - SubPathNotDirectory(PathBuf), - #[error("The parent directory of the received sub path isn't indexed in the location: ")] - SubPathParentNotInLocation { - location_id: LocationId, - sub_path: PathBuf, - }, - #[error("Unable to extract materialized path from location: ")] - UnableToExtractMaterializedPath(LocationId, PathBuf), - #[error("Database error (error: {0:?})")] - DatabaseError(#[from] QueryError), - #[error("Database error (error: {0:?})")] - IOError(#[from] io::Error), -} - -#[cfg(feature = "location-watcher")] -pub async fn create_file_path( - crate::location::Library { db, sync, .. }: &crate::location::Library, - MaterializedPath { - materialized_path, - is_dir, - location_id, - name, - extension, - }: MaterializedPath<'_>, - parent_id: Option, - cas_id: Option, - metadata: FilePathMetadata, -) -> Result { - // Keeping a reference in that map for the entire duration of the function, so we keep it locked - - use crate::{sync, util}; - use serde_json::json; - - let location = db - .location() - .find_unique(location::id::equals(location_id)) - .select(location::select!({ id pub_id })) - .exec() - .await? - .unwrap(); - - let params = { - use file_path::*; - - util::db::chain_optional_iter( - [ - ( - location::NAME, - json!(sync::location::SyncId { - pub_id: location.pub_id - }), - ), - (cas_id::NAME, json!(cas_id)), - (materialized_path::NAME, json!(materialized_path)), - (name::NAME, json!(name)), - (extension::NAME, json!(extension)), - ( - size_in_bytes::NAME, - json!(metadata.size_in_bytes.to_string()), - ), - (inode::NAME, json!(metadata.inode.to_le_bytes())), - (device::NAME, json!(metadata.device.to_le_bytes())), - (is_dir::NAME, json!(is_dir)), - (date_created::NAME, json!(metadata.created_at)), - (date_modified::NAME, json!(metadata.modified_at)), - ], - [parent_id.map(|parent_id| { - ( - parent_id::NAME, - json!(sync::file_path::SyncId { - pub_id: uuid_to_bytes(parent_id) - }), - ) - })], - ) - }; - - let pub_id = uuid_to_bytes(Uuid::new_v4()); - - let created_path = sync - .write_op( - db, - sync.unique_shared_create( - sync::file_path::SyncId { - pub_id: pub_id.clone(), - }, - params, - ), - db.file_path().create( - pub_id, - location::id::equals(location.id), - materialized_path.into_owned(), - name.into_owned(), - extension.into_owned(), - metadata.inode.to_le_bytes().into(), - metadata.device.to_le_bytes().into(), - { - use file_path::*; - vec![ - cas_id::set(cas_id), - parent_id::set(parent_id.map(uuid_to_bytes)), - is_dir::set(is_dir), - size_in_bytes::set(metadata.size_in_bytes.to_string()), - date_created::set(metadata.created_at.into()), - date_modified::set(metadata.modified_at.into()), - ] - }, - ), - ) - .await?; - - Ok(created_path) -} - -pub fn subtract_location_path( - location_path: impl AsRef, - current_path: impl AsRef, -) -> Option { - let location_path = location_path.as_ref(); - let current_path = current_path.as_ref(); - - if let Ok(stripped) = current_path.strip_prefix(location_path) { - Some(stripped.to_path_buf()) - } else { - error!( - "Failed to strip location root path ({}) from current path ({})", - location_path.display(), - current_path.display() - ); - None - } -} - -pub fn extract_materialized_path( - location_id: LocationId, - location_path: impl AsRef, - path: impl AsRef, -) -> Result { - subtract_location_path(location_path, &path).ok_or_else(|| { - FilePathError::UnableToExtractMaterializedPath(location_id, path.as_ref().to_path_buf()) - }) -} - -pub async fn filter_file_paths_by_many_full_path_params( - location: &location::Data, - full_paths: &[impl AsRef], -) -> Result, FilePathError> { - let is_dirs = try_join_all( - full_paths - .iter() - .map(|path| async move { fs::metadata(path).await.map(|metadata| metadata.is_dir()) }), - ) - .await?; - - let materialized_paths = full_paths - .iter() - .zip(is_dirs.into_iter()) - .map(|(path, is_dir)| { - MaterializedPath::new(location.id, &location.path, path, is_dir).map(Into::into) - }) - // Collecting in a Result, so we stop on the first error - .collect::, _>>()?; - - Ok(vec![ - file_path::location_id::equals(location.id), - file_path::materialized_path::in_vec(materialized_paths), - ]) -} - -#[cfg(feature = "location-watcher")] -pub async fn check_existing_file_path( - materialized_path: &MaterializedPath<'_>, - db: &PrismaClient, -) -> Result { - db.file_path() - .count(filter_existing_file_path_params(materialized_path)) - .exec() - .await - .map_or_else(|e| Err(e.into()), |count| Ok(count > 0)) -} - -pub fn filter_existing_file_path_params( - MaterializedPath { - materialized_path, - is_dir, - location_id, - name, - extension, - }: &MaterializedPath, -) -> Vec { - let mut params = vec![ - file_path::location_id::equals(*location_id), - file_path::materialized_path::equals(materialized_path.to_string()), - file_path::is_dir::equals(*is_dir), - file_path::extension::equals(extension.to_string()), - ]; - - // This is due to a limitation of MaterializedPath, where we don't know the location name to use - // as the file_path name at the root of the location "/" or "\" on Windows - if materialized_path != MAIN_SEPARATOR_STR { - params.push(file_path::name::equals(name.to_string())); - } - - params -} - -/// With this function we try to do a loose filtering of file paths, to avoid having to do check -/// twice for directories and for files. This is because directories have a trailing `/` or `\` in -/// the materialized path -#[allow(unused)] -pub fn loose_find_existing_file_path_params( - MaterializedPath { - materialized_path, - is_dir, - location_id, - name, - .. - }: &MaterializedPath, -) -> Vec { - let mut materialized_path_str = materialized_path.to_string(); - if *is_dir { - materialized_path_str.pop(); - } - - let mut params = vec![ - file_path::location_id::equals(*location_id), - file_path::materialized_path::starts_with(materialized_path_str), - ]; - - // This is due to a limitation of MaterializedPath, where we don't know the location name to use - // as the file_path name at the root of the location "/" or "\" on Windows - if materialized_path != MAIN_SEPARATOR_STR { - params.push(file_path::name::equals(name.to_string())); - } - - params -} - -pub async fn get_existing_file_path_id( - materialized_path: &MaterializedPath<'_>, - db: &PrismaClient, -) -> Result, FilePathError> { - db.file_path() - .find_first(filter_existing_file_path_params(materialized_path)) - .select(file_path::select!({ pub_id })) - .exec() - .await - .map_or_else( - |e| Err(e.into()), - |r| Ok(r.map(|r| Uuid::from_slice(&r.pub_id).unwrap())), - ) -} - -#[cfg(feature = "location-watcher")] -pub async fn get_parent_dir( - materialized_path: &MaterializedPath<'_>, - db: &PrismaClient, -) -> Result, FilePathError> { - db.file_path() - .find_first(filter_existing_file_path_params( - &materialized_path.parent(), - )) - .exec() - .await - .map_err(Into::into) -} - -#[cfg(feature = "location-watcher")] -pub async fn get_parent_dir_id( - materialized_path: &MaterializedPath<'_>, - db: &PrismaClient, -) -> Result, FilePathError> { - get_existing_file_path_id(&materialized_path.parent(), db).await -} - -pub async fn ensure_sub_path_is_in_location( - location_path: impl AsRef, - sub_path: impl AsRef, -) -> Result { - let mut sub_path = sub_path.as_ref(); - if sub_path.starts_with(MAIN_SEPARATOR_STR) { - // SAFETY: we just checked that it starts with the separator - sub_path = sub_path.strip_prefix(MAIN_SEPARATOR_STR).unwrap(); - } - let location_path = location_path.as_ref(); - - if !sub_path.starts_with(location_path) { - // If the sub_path doesn't start with the location_path, we have to check if it's a - // materialized path received from the frontend, then we check if the full path exists - let full_path = location_path.join(sub_path); - - match fs::metadata(&full_path).await { - Ok(_) => Ok(full_path), - Err(e) if e.kind() == io::ErrorKind::NotFound => Err(FilePathError::InvalidSubPath { - sub_path: sub_path.to_path_buf(), - location_path: location_path.to_path_buf(), - }), - Err(e) => Err(e.into()), - } - } else { - Ok(sub_path.to_path_buf()) - } -} - -pub async fn ensure_sub_path_is_directory( - location_path: impl AsRef, - sub_path: impl AsRef, -) -> Result<(), FilePathError> { - let mut sub_path = sub_path.as_ref(); - - match fs::metadata(sub_path).await { - Ok(meta) => { - if meta.is_file() { - Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf())) - } else { - Ok(()) - } - } - Err(e) if e.kind() == io::ErrorKind::NotFound => { - if sub_path.starts_with(MAIN_SEPARATOR_STR) { - // SAFETY: we just checked that it starts with the separator - sub_path = sub_path.strip_prefix(MAIN_SEPARATOR_STR).unwrap(); - } - - let location_path = location_path.as_ref(); - - match fs::metadata(location_path.join(sub_path)).await { - Ok(meta) => { - if meta.is_file() { - Err(FilePathError::SubPathNotDirectory(sub_path.to_path_buf())) - } else { - Ok(()) - } - } - Err(e) if e.kind() == io::ErrorKind::NotFound => { - Err(FilePathError::InvalidSubPath { - sub_path: sub_path.to_path_buf(), - location_path: location_path.to_path_buf(), - }) - } - Err(e) => Err(e.into()), - } - } - Err(e) => Err(e.into()), - } -} - -pub async fn retain_file_paths_in_location( - location_id: LocationId, - to_retain: Vec, - maybe_parent_file_path: Option, - db: &PrismaClient, -) -> Result { - let mut to_delete_params = vec![ - file_path::location_id::equals(location_id), - file_path::pub_id::not_in_vec(to_retain.into_iter().map(uuid_to_bytes).collect()), - ]; - - if let Some(parent_file_path) = maybe_parent_file_path { - // If the parent_materialized_path is not the root path, we only delete file paths that start with the parent path - let param = if parent_file_path.materialized_path != MAIN_SEPARATOR_STR { - file_path::materialized_path::starts_with(parent_file_path.materialized_path) - } else { - // If the parent_materialized_path is the root path, we fetch children using the parent id - file_path::parent_id::equals(Some(parent_file_path.pub_id)) - }; - - to_delete_params.push(param); - } - - db.file_path() - .delete_many(to_delete_params) - .exec() - .await - .map_err(Into::into) -} - -#[allow(unused)] // TODO remove this annotation when we can use it on windows -pub fn get_inode_and_device(metadata: &Metadata) -> Result<(u64, u64), FilePathError> { - #[cfg(target_family = "unix")] - { - use std::os::unix::fs::MetadataExt; - - Ok((metadata.ino(), metadata.dev())) - } - - #[cfg(target_family = "windows")] - { - // TODO use this when it's stable and remove winapi-utils dependency - - // use std::os::windows::fs::MetadataExt; - - // Ok(( - // metadata - // .file_index() - // .expect("This function must not be called from a `DirEntry`'s `Metadata"), - // metadata - // .volume_serial_number() - // .expect("This function must not be called from a `DirEntry`'s `Metadata") as u64, - // )) - - todo!("Use metadata: {:#?}", metadata) - } -} - -#[allow(unused)] -pub async fn get_inode_and_device_from_path( - path: impl AsRef, -) -> Result<(u64, u64), FilePathError> { - #[cfg(target_family = "unix")] - { - // TODO use this when it's stable and remove winapi-utils dependency - let metadata = fs::metadata(path.as_ref()).await?; - - get_inode_and_device(&metadata) - } - - #[cfg(target_family = "windows")] - { - use winapi_util::{file::information, Handle}; - - let info = information(&Handle::from_path_any(path.as_ref())?)?; - - Ok((info.file_index(), info.volume_serial_number())) - } -} - -pub trait MetadataExt { - fn created_or_now(&self) -> SystemTime; - - fn modified_or_now(&self) -> SystemTime; -} - -impl MetadataExt for Metadata { - fn created_or_now(&self) -> SystemTime { - self.created().unwrap_or_else(|_| SystemTime::now()) - } - - fn modified_or_now(&self) -> SystemTime { - self.modified().unwrap_or_else(|_| SystemTime::now()) - } -} diff --git a/core/src/location/file_path_helper/isolated_file_path_data.rs b/core/src/location/file_path_helper/isolated_file_path_data.rs new file mode 100644 index 000000000..23e1c5ac9 --- /dev/null +++ b/core/src/location/file_path_helper/isolated_file_path_data.rs @@ -0,0 +1,636 @@ +use crate::{location::LocationId, prisma::file_path, util::error::NonUtf8PathError}; + +use std::{borrow::Cow, fmt, path::Path}; + +use serde::{Deserialize, Serialize}; + +use super::{ + file_path_for_file_identifier, file_path_for_object_validator, file_path_for_thumbnailer, + file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_isolate, + file_path_with_object, FilePathError, +}; + +#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq)] +#[non_exhaustive] +pub struct IsolatedFilePathData<'a> { + pub(in crate::location) location_id: LocationId, + pub(in crate::location) materialized_path: Cow<'a, str>, + pub(in crate::location) is_dir: bool, + pub(in crate::location) name: Cow<'a, str>, + pub(in crate::location) extension: Cow<'a, str>, + pub(in crate::location) relative_path: Cow<'a, str>, +} + +impl IsolatedFilePathData<'static> { + pub fn new( + location_id: LocationId, + location_path: impl AsRef, + full_path: impl AsRef, + is_dir: bool, + ) -> Result { + let full_path = full_path.as_ref(); + let location_path = location_path.as_ref(); + + let extension = (!is_dir) + .then(|| { + let extension = full_path + .extension() + .unwrap_or_default() + .to_str() + .unwrap_or_default(); + + #[cfg(debug_assertions)] + { + // In dev mode, we lowercase the extension as we don't use the SQL migration, + // and using prisma.schema directly we can't set `COLLATE NOCASE` in the + // `extension` column at `file_path` table + extension.to_lowercase() + } + #[cfg(not(debug_assertions))] + { + extension.to_string() + } + }) + .unwrap_or_default(); + + Ok(Self { + is_dir, + location_id, + materialized_path: Cow::Owned(extract_normalized_materialized_path_str( + location_id, + location_path, + full_path, + )?), + name: Cow::Owned( + (location_path != full_path) + .then(|| Self::prepare_name(full_path).to_string()) + .unwrap_or_default(), + ), + extension: Cow::Owned(extension), + relative_path: Cow::Owned(extract_relative_path( + location_id, + location_path, + full_path, + )?), + }) + } +} + +impl<'a> IsolatedFilePathData<'a> { + pub fn location_id(&self) -> LocationId { + self.location_id + } + + pub fn parent(&'a self) -> Self { + let (parent_path_str, name, relative_path) = if self.materialized_path == "/" { + ("/", "", "") + } else { + let trailing_slash_idx = self.materialized_path.len() - 1; + let last_slash_idx = self.materialized_path[..trailing_slash_idx] + .rfind('/') + .expect("malformed materialized path at `parent` method"); + + ( + &self.materialized_path[..last_slash_idx + 1], + &self.materialized_path[last_slash_idx + 1..trailing_slash_idx], + &self.materialized_path[1..trailing_slash_idx], + ) + }; + + Self { + is_dir: true, + location_id: self.location_id, + relative_path: Cow::Borrowed(relative_path), + materialized_path: Cow::Borrowed(parent_path_str), + name: Cow::Borrowed(name), + extension: Cow::Borrowed(""), + } + } + + pub fn from_relative_str(location_id: LocationId, relative_file_path_str: &'a str) -> Self { + let is_dir = relative_file_path_str.ends_with('/'); + + let (materialized_path, maybe_name, maybe_extension) = + Self::separate_path_name_and_extension_from_str(relative_file_path_str, is_dir); + + Self { + location_id, + materialized_path: Cow::Borrowed(materialized_path), + is_dir, + name: maybe_name.map(Cow::Borrowed).unwrap_or_default(), + extension: maybe_extension.map(Cow::Borrowed).unwrap_or_default(), + relative_path: Cow::Borrowed(relative_file_path_str), + } + } + + pub fn materialized_path_for_children(&self) -> Option { + if self.materialized_path == "/" && self.name.is_empty() && self.is_dir { + // We're at the root file_path + Some("/".to_string()) + } else { + self.is_dir + .then(|| format!("{}{}/", self.materialized_path, self.name)) + } + } + + pub fn separate_path_name_and_extension_from_str( + source: &'a str, + is_dir: bool, + ) -> ( + &'a str, // Materialized path + Option<&'a str>, // Maybe a name + Option<&'a str>, // Maybe an extension + ) { + let length = source.len(); + + if length == 1 { + // The case for the root path + (source, None, None) + } else if is_dir { + let last_char_idx = if source.ends_with('/') { + length - 1 + } else { + length + }; + + let first_name_char_idx = source[..last_char_idx].rfind('/').unwrap_or(0) + 1; + ( + &source[..first_name_char_idx], + Some(&source[first_name_char_idx..last_char_idx]), + None, + ) + } else { + let first_name_char_idx = source.rfind('/').unwrap_or(0) + 1; + let end_idx = first_name_char_idx - 1; + if let Some(last_dot_relative_idx) = source[first_name_char_idx..].rfind('.') { + let last_dot_idx = first_name_char_idx + last_dot_relative_idx; + ( + &source[..end_idx], + Some(&source[first_name_char_idx..last_dot_idx]), + Some(&source[last_dot_idx + 1..]), + ) + } else { + ( + &source[..end_idx], + Some(&source[first_name_char_idx..]), + None, + ) + } + } + } + + fn prepare_name(path: &Path) -> &str { + // Not using `impl AsRef` here because it's an private method + path.file_stem() + .unwrap_or_default() + .to_str() + .unwrap_or_default() + } + + fn from_db_data( + location_id: LocationId, + db_materialized_path: &'a str, + db_is_dir: bool, + db_name: &'a str, + db_extension: &'a str, + ) -> Self { + Self { + location_id, + materialized_path: Cow::Borrowed(db_materialized_path), + is_dir: db_is_dir, + name: Cow::Borrowed(db_name), + extension: Cow::Borrowed(db_extension), + relative_path: Cow::Owned(assemble_relative_path( + db_materialized_path, + db_name, + db_extension, + db_is_dir, + )), + } + } +} + +impl AsRef for IsolatedFilePathData<'_> { + fn as_ref(&self) -> &Path { + Path::new(self.relative_path.as_ref()) + } +} + +impl From> for file_path::UniqueWhereParam { + fn from(path: IsolatedFilePathData<'static>) -> Self { + Self::LocationIdMaterializedPathNameExtensionEquals( + path.location_id, + path.materialized_path.into_owned(), + path.name.into_owned(), + path.extension.into_owned(), + ) + } +} + +impl From> for file_path::WhereParam { + fn from(path: IsolatedFilePathData<'static>) -> Self { + Self::And(vec![ + file_path::location_id::equals(path.location_id), + file_path::materialized_path::equals(path.materialized_path.into_owned()), + file_path::name::equals(path.name.into_owned()), + file_path::extension::equals(path.extension.into_owned()), + ]) + } +} + +impl From<&IsolatedFilePathData<'_>> for file_path::UniqueWhereParam { + fn from(path: &IsolatedFilePathData<'_>) -> Self { + Self::LocationIdMaterializedPathNameExtensionEquals( + path.location_id, + path.materialized_path.to_string(), + path.name.to_string(), + path.extension.to_string(), + ) + } +} + +impl From<&IsolatedFilePathData<'_>> for file_path::WhereParam { + fn from(path: &IsolatedFilePathData<'_>) -> Self { + Self::And(vec![ + file_path::location_id::equals(path.location_id), + file_path::materialized_path::equals(path.materialized_path.to_string()), + file_path::name::equals(path.name.to_string()), + file_path::extension::equals(path.extension.to_string()), + ]) + } +} + +impl fmt::Display for IsolatedFilePathData<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.relative_path) + } +} + +#[macro_use] +mod macros { + macro_rules! impl_from_db { + ($($file_path_kind:ident),+ $(,)?) => { + $( + impl ::std::convert::From<$file_path_kind::Data> for $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + IsolatedFilePathData<'static> + { + fn from(path: $file_path_kind::Data) -> Self { + Self { + location_id: path.location_id, + relative_path: ::std::borrow::Cow::Owned( + $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + assemble_relative_path( + &path.materialized_path, + &path.name, + &path.extension, + path.is_dir, + ) + ), + materialized_path: ::std::borrow::Cow::Owned(path.materialized_path), + is_dir: path.is_dir, + name: ::std::borrow::Cow::Owned(path.name), + extension: ::std::borrow::Cow::Owned(path.extension), + } + } + } + + impl<'a> ::std::convert::From<&'a $file_path_kind::Data> for $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + IsolatedFilePathData<'a> + { + fn from(path: &'a $file_path_kind::Data) -> Self { + Self::from_db_data( + path.location_id, + &path.materialized_path, + path.is_dir, + &path.name, + &path.extension + ) + } + } + )+ + }; + } + + macro_rules! impl_from_db_without_location_id { + ($($file_path_kind:ident),+ $(,)?) => { + $( + impl ::std::convert::From<($crate::location::LocationId, $file_path_kind::Data)> for $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + IsolatedFilePathData<'static> + { + fn from((location_id, path): ($crate::location::LocationId, $file_path_kind::Data)) -> Self { + Self { + location_id, + relative_path: Cow::Owned( + $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + assemble_relative_path( + &path.materialized_path, + &path.name, + &path.extension, + path.is_dir, + ) + ), + materialized_path: Cow::Owned(path.materialized_path), + is_dir: path.is_dir, + name: Cow::Owned(path.name), + extension: Cow::Owned(path.extension), + } + } + } + + impl<'a> ::std::convert::From<($crate::location::LocationId, &'a $file_path_kind::Data)> for $crate:: + location:: + file_path_helper:: + isolated_file_path_data:: + IsolatedFilePathData<'a> + { + fn from((location_id, path): ($crate::location::LocationId, &'a $file_path_kind::Data)) -> Self { + Self::from_db_data( + location_id, + &path.materialized_path, + path.is_dir, + &path.name, + &path.extension + ) + } + } + )+ + }; + } +} + +impl_from_db!(file_path, file_path_to_isolate, file_path_with_object); + +impl_from_db_without_location_id!( + file_path_for_file_identifier, + file_path_to_full_path, + file_path_for_thumbnailer, + file_path_for_object_validator, + file_path_to_handle_custom_uri +); + +fn extract_relative_path( + location_id: LocationId, + location_path: impl AsRef, + path: impl AsRef, +) -> Result { + let path = path.as_ref(); + + path.strip_prefix(location_path) + .map_err(|_| FilePathError::UnableToExtractMaterializedPath { + location_id, + path: path.into(), + }) + .and_then(|relative| { + relative + .to_str() + .map(|relative_str| relative_str.replace('\\', "/")) + .ok_or_else(|| NonUtf8PathError(path.into()).into()) + }) +} + +/// This function separates a file path from a location path, and normalizes replacing '\' with '/' +/// to be consistent between Windows and Unix like systems +pub fn extract_normalized_materialized_path_str( + location_id: LocationId, + location_path: impl AsRef, + path: impl AsRef, +) -> Result { + let path = path.as_ref(); + + path.strip_prefix(location_path) + .map_err(|_| FilePathError::UnableToExtractMaterializedPath { + location_id, + path: path.into(), + })? + .parent() + .map(|materialized_path| { + materialized_path + .to_str() + .map(|materialized_path_str| { + if !materialized_path_str.is_empty() { + format!("/{}/", materialized_path_str.replace('\\', "/")) + } else { + "/".to_string() + } + }) + .ok_or_else(|| NonUtf8PathError(path.into())) + }) + .unwrap_or_else(|| Ok("/".to_string())) + .map_err(Into::into) +} + +fn assemble_relative_path( + materialized_path: &str, + name: &str, + extension: &str, + is_dir: bool, +) -> String { + match (is_dir, extension) { + (false, extension) if !extension.is_empty() => { + format!("{}{}.{}", &materialized_path[1..], name, extension) + } + (_, _) => format!("{}{}", &materialized_path[1..], name), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn expected( + materialized_path: &'static str, + is_dir: bool, + name: &'static str, + extension: &'static str, + relative_path: &'static str, + ) -> IsolatedFilePathData<'static> { + IsolatedFilePathData { + location_id: 1, + materialized_path: materialized_path.into(), + is_dir, + name: name.into(), + extension: extension.into(), + relative_path: relative_path.into(), + } + } + + #[test] + fn new_method() { + let tester = |full_path, is_dir, expected, msg| { + let actual = + IsolatedFilePathData::new(1, "/spacedrive/location", full_path, is_dir).unwrap(); + assert_eq!(actual, expected, "{msg}"); + }; + + tester( + "/spacedrive/location", + true, + expected("/", true, "", "", ""), + "the location root directory", + ); + + tester( + "/spacedrive/location/file.txt", + false, + expected("/", false, "file", "txt", "file.txt"), + "a file in the root directory", + ); + + tester( + "/spacedrive/location/dir", + true, + expected("/", true, "dir", "", "dir"), + "a directory in the root directory", + ); + + tester( + "/spacedrive/location/dir/file.txt", + false, + expected("/dir/", false, "file", "txt", "dir/file.txt"), + "a directory with a file inside", + ); + + tester( + "/spacedrive/location/dir/dir2", + true, + expected("/dir/", true, "dir2", "", "dir/dir2"), + "a directory in a directory", + ); + + tester( + "/spacedrive/location/dir/dir2/dir3", + true, + expected("/dir/dir2/", true, "dir3", "", "dir/dir2/dir3"), + "3 level of directories", + ); + + tester( + "/spacedrive/location/dir/dir2/dir3/file.txt", + false, + expected( + "/dir/dir2/dir3/", + false, + "file", + "txt", + "dir/dir2/dir3/file.txt", + ), + "a file inside a third level directory", + ); + } + + #[test] + fn parent_method() { + let tester = |full_path, is_dir, expected, msg| { + let child = + IsolatedFilePathData::new(1, "/spacedrive/location", full_path, is_dir).unwrap(); + + let actual = child.parent(); + assert_eq!(actual, expected, "{msg}"); + }; + + tester( + "/spacedrive/location", + true, + expected("/", true, "", "", ""), + "the location root directory", + ); + + tester( + "/spacedrive/location/file.txt", + false, + expected("/", true, "", "", ""), + "a file in the root directory", + ); + + tester( + "/spacedrive/location/dir", + true, + expected("/", true, "", "", ""), + "a directory in the root directory", + ); + + tester( + "/spacedrive/location/dir/file.txt", + false, + expected("/", true, "dir", "", "dir"), + "a directory with a file inside", + ); + + tester( + "/spacedrive/location/dir/dir2", + true, + expected("/", true, "dir", "", "dir"), + "a directory in a directory", + ); + + tester( + "/spacedrive/location/dir/dir2/dir3", + true, + expected("/dir/", true, "dir2", "", "dir/dir2"), + "3 level of directories", + ); + + tester( + "/spacedrive/location/dir/dir2/dir3/file.txt", + false, + expected("/dir/dir2/", true, "dir3", "", "dir/dir2/dir3"), + "a file inside a third level directory", + ); + } + + #[test] + fn extract_normalized_materialized_path() { + let tester = |path, expected, msg| { + let actual = + extract_normalized_materialized_path_str(1, "/spacedrive/location", path).unwrap(); + assert_eq!(actual, expected, "{msg}"); + }; + + tester("/spacedrive/location", "/", "the location root directory"); + tester( + "/spacedrive/location/file.txt", + "/", + "a file in the root directory", + ); + tester( + "/spacedrive/location/dir", + "/", + "a directory in the root directory", + ); + tester( + "/spacedrive/location/dir/file.txt", + "/dir/", + "a directory with a file inside", + ); + tester( + "/spacedrive/location/dir/dir2", + "/dir/", + "a directory in a directory", + ); + tester( + "/spacedrive/location/dir/dir2/dir3", + "/dir/dir2/", + "3 level of directories", + ); + tester( + "/spacedrive/location/dir/dir2/dir3/file.txt", + "/dir/dir2/dir3/", + "a file inside a third level directory", + ); + } +} diff --git a/core/src/location/file_path_helper/mod.rs b/core/src/location/file_path_helper/mod.rs new file mode 100644 index 000000000..8aa416879 --- /dev/null +++ b/core/src/location/file_path_helper/mod.rs @@ -0,0 +1,458 @@ +use crate::{ + prisma::{file_path, PrismaClient}, + util::error::{FileIOError, NonUtf8PathError}, +}; + +use std::{ + fs::Metadata, + path::{Path, PathBuf}, + time::SystemTime, +}; + +use chrono::{DateTime, Utc}; +use prisma_client_rust::QueryError; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use tokio::{fs, io}; +use tracing::error; + +pub mod isolated_file_path_data; + +pub use isolated_file_path_data::IsolatedFilePathData; + +use super::LocationId; + +// File Path selectables! +file_path::select!(file_path_just_pub_id { pub_id }); +file_path::select!(file_path_just_pub_id_materialized_path { + pub_id + materialized_path +}); +file_path::select!(file_path_for_file_identifier { + id + pub_id + materialized_path + date_created + is_dir + name + extension +}); +file_path::select!(file_path_for_object_validator { + pub_id + materialized_path + is_dir + name + extension + integrity_checksum + location: select { + id + pub_id + } +}); +file_path::select!(file_path_for_thumbnailer { + materialized_path + is_dir + name + extension + cas_id +}); +file_path::select!(file_path_to_isolate { + location_id + materialized_path + is_dir + name + extension +}); +file_path::select!(file_path_to_handle_custom_uri { + materialized_path + is_dir + name + extension + location: select { + path + } +}); +file_path::select!(file_path_to_full_path { + materialized_path + is_dir + name + extension + location: select { + id + path + } +}); + +// File Path includes! +file_path::include!(file_path_with_object { object }); + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct FilePathMetadata { + pub inode: u64, + pub device: u64, + pub size_in_bytes: u64, + pub created_at: DateTime, + pub modified_at: DateTime, +} + +#[derive(Error, Debug)] +pub enum FilePathError { + #[error("file Path not found: ", .0.display())] + NotFound(Box), + #[error("received an invalid sub path: ", .location_path.display(), .sub_path.display())] + InvalidSubPath { + location_path: Box, + sub_path: Box, + }, + #[error("sub path is not a directory: ", .0.display())] + SubPathNotDirectory(Box), + #[error( + "the parent directory of the received sub path isn't indexed in the location: ", + .location_id, + .sub_path.display() + )] + SubPathParentNotInLocation { + location_id: LocationId, + sub_path: Box, + }, + #[error("unable to extract materialized path from location: ", .location_id, .path.display())] + UnableToExtractMaterializedPath { + location_id: LocationId, + path: Box, + }, + #[error("database error")] + Database(#[from] QueryError), + + #[error(transparent)] + FileIO(#[from] FileIOError), + #[error(transparent)] + NonUtf8Path(#[from] NonUtf8PathError), +} + +#[cfg(feature = "location-watcher")] +pub async fn create_file_path( + crate::location::Library { db, sync, .. }: &crate::location::Library, + IsolatedFilePathData { + materialized_path, + is_dir, + location_id, + name, + extension, + .. + }: IsolatedFilePathData<'_>, + cas_id: Option, + metadata: FilePathMetadata, +) -> Result { + use crate::{prisma::location, sync, util::db::uuid_to_bytes}; + + use serde_json::json; + use uuid::Uuid; + + let location = db + .location() + .find_unique(location::id::equals(location_id)) + .select(location::select!({ id pub_id })) + .exec() + .await? + .unwrap(); + + let params = { + use file_path::*; + + vec![ + ( + location::NAME, + json!(sync::location::SyncId { + pub_id: location.pub_id + }), + ), + (cas_id::NAME, json!(cas_id)), + (materialized_path::NAME, json!(materialized_path)), + (name::NAME, json!(name)), + (extension::NAME, json!(extension)), + ( + size_in_bytes::NAME, + json!(metadata.size_in_bytes.to_string()), + ), + (inode::NAME, json!(metadata.inode.to_le_bytes())), + (device::NAME, json!(metadata.device.to_le_bytes())), + (is_dir::NAME, json!(is_dir)), + (date_created::NAME, json!(metadata.created_at)), + (date_modified::NAME, json!(metadata.modified_at)), + ] + }; + + let pub_id = uuid_to_bytes(Uuid::new_v4()); + + let created_path = sync + .write_op( + db, + sync.unique_shared_create( + sync::file_path::SyncId { + pub_id: pub_id.clone(), + }, + params, + ), + db.file_path().create( + pub_id, + location::id::equals(location.id), + materialized_path.into_owned(), + name.into_owned(), + extension.into_owned(), + metadata.inode.to_le_bytes().into(), + metadata.device.to_le_bytes().into(), + { + use file_path::*; + vec![ + cas_id::set(cas_id), + is_dir::set(is_dir), + size_in_bytes::set(metadata.size_in_bytes.to_string()), + date_created::set(metadata.created_at.into()), + date_modified::set(metadata.modified_at.into()), + ] + }, + ), + ) + .await?; + + Ok(created_path) +} + +#[cfg(feature = "location-watcher")] +pub async fn check_existing_file_path( + materialized_path: &IsolatedFilePathData<'_>, + db: &PrismaClient, +) -> Result { + Ok(db + .file_path() + .count(filter_existing_file_path_params(materialized_path)) + .exec() + .await? > 0) +} + +pub fn filter_existing_file_path_params( + IsolatedFilePathData { + materialized_path, + is_dir, + location_id, + name, + extension, + .. + }: &IsolatedFilePathData, +) -> Vec { + vec![ + file_path::location_id::equals(*location_id), + file_path::materialized_path::equals(materialized_path.to_string()), + file_path::is_dir::equals(*is_dir), + file_path::name::equals(name.to_string()), + file_path::extension::equals(extension.to_string()), + ] +} + +/// With this function we try to do a loose filtering of file paths, to avoid having to do check +/// twice for directories and for files. This is because directories have a trailing `/` or `\` in +/// the materialized path +#[allow(unused)] +pub fn loose_find_existing_file_path_params( + IsolatedFilePathData { + materialized_path, + location_id, + name, + extension, + .. + }: &IsolatedFilePathData, +) -> Vec { + vec![ + file_path::location_id::equals(*location_id), + file_path::materialized_path::equals(materialized_path.to_string()), + file_path::name::equals(name.to_string()), + file_path::extension::equals(extension.to_string()), + ] +} + +#[cfg(feature = "location-watcher")] +pub async fn get_parent_dir( + materialized_path: &IsolatedFilePathData<'_>, + db: &PrismaClient, +) -> Result, FilePathError> { + db.file_path() + .find_first(filter_existing_file_path_params( + &materialized_path.parent(), + )) + .exec() + .await + .map_err(Into::into) +} + +pub async fn ensure_sub_path_is_in_location( + location_path: impl AsRef, + sub_path: impl AsRef, +) -> Result { + let mut sub_path = sub_path.as_ref(); + if sub_path.starts_with("/") { + // SAFETY: we just checked that it starts with the separator + sub_path = sub_path.strip_prefix("/").unwrap(); + } + let location_path = location_path.as_ref(); + + if !sub_path.starts_with(location_path) { + // If the sub_path doesn't start with the location_path, we have to check if it's a + // materialized path received from the frontend, then we check if the full path exists + let full_path = location_path.join(sub_path); + + match fs::metadata(&full_path).await { + Ok(_) => Ok(full_path), + Err(e) if e.kind() == io::ErrorKind::NotFound => Err(FilePathError::InvalidSubPath { + sub_path: sub_path.into(), + location_path: location_path.into(), + }), + Err(e) => Err(FileIOError::from((full_path, e)).into()), + } + } else { + Ok(sub_path.to_path_buf()) + } +} + +pub async fn ensure_file_path_exists( + sub_path: impl AsRef, + iso_file_path: &IsolatedFilePathData<'_>, + db: &PrismaClient, + error_fn: impl FnOnce(Box) -> E, +) -> Result<(), E> +where + E: From, +{ + if !check_file_path_exists(iso_file_path, db).await? { + Err(error_fn(sub_path.as_ref().into())) + } else { + Ok(()) + } +} + +pub async fn check_file_path_exists( + iso_file_path: &IsolatedFilePathData<'_>, + db: &PrismaClient, +) -> Result +where + E: From, +{ + db.file_path() + .count(filter_existing_file_path_params(iso_file_path)) + .exec() + .await + .map(|count| count > 0) + .map_err(Into::into) +} + +pub async fn ensure_sub_path_is_directory( + location_path: impl AsRef, + sub_path: impl AsRef, +) -> Result<(), FilePathError> { + let mut sub_path = sub_path.as_ref(); + + match fs::metadata(sub_path).await { + Ok(meta) => { + if meta.is_file() { + Err(FilePathError::SubPathNotDirectory(sub_path.into())) + } else { + Ok(()) + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + if sub_path.starts_with("/") { + // SAFETY: we just checked that it starts with the separator + sub_path = sub_path.strip_prefix("/").unwrap(); + } + + let location_path = location_path.as_ref(); + let full_path = location_path.join(sub_path); + match fs::metadata(&full_path).await { + Ok(meta) => { + if meta.is_file() { + Err(FilePathError::SubPathNotDirectory(sub_path.into())) + } else { + Ok(()) + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Err(FilePathError::InvalidSubPath { + sub_path: sub_path.into(), + location_path: location_path.into(), + }) + } + Err(e) => Err(FileIOError::from((full_path, e)).into()), + } + } + Err(e) => Err(FileIOError::from((sub_path, e)).into()), + } +} + +#[allow(unused)] // TODO remove this annotation when we can use it on windows +pub fn get_inode_and_device(metadata: &Metadata) -> Result<(u64, u64), FilePathError> { + #[cfg(target_family = "unix")] + { + use std::os::unix::fs::MetadataExt; + + Ok((metadata.ino(), metadata.dev())) + } + + #[cfg(target_family = "windows")] + { + // TODO use this when it's stable and remove winapi-utils dependency + + // use std::os::windows::fs::MetadataExt; + + // Ok(( + // metadata + // .file_index() + // .expect("This function must not be called from a `DirEntry`'s `Metadata"), + // metadata + // .volume_serial_number() + // .expect("This function must not be called from a `DirEntry`'s `Metadata") as u64, + // )) + + todo!("Use metadata: {:#?}", metadata) + } +} + +#[allow(unused)] +pub async fn get_inode_and_device_from_path( + path: impl AsRef, +) -> Result<(u64, u64), FilePathError> { + #[cfg(target_family = "unix")] + { + // TODO use this when it's stable and remove winapi-utils dependency + let metadata = fs::metadata(path.as_ref()) + .await + .map_err(|e| FileIOError::from((path, e)))?; + + get_inode_and_device(&metadata) + } + + #[cfg(target_family = "windows")] + { + use winapi_util::{file::information, Handle}; + + let info = Handle::from_path_any(path.as_ref()) + .and_then(|ref handle| information(handle)) + .map_err(|e| FileIOError::from((path, e)))?; + + Ok((info.file_index(), info.volume_serial_number())) + } +} + +pub trait MetadataExt { + fn created_or_now(&self) -> SystemTime; + + fn modified_or_now(&self) -> SystemTime; +} + +impl MetadataExt for Metadata { + fn created_or_now(&self) -> SystemTime { + self.created().unwrap_or_else(|_| SystemTime::now()) + } + + fn modified_or_now(&self) -> SystemTime { + self.modified().unwrap_or_else(|_| SystemTime::now()) + } +} diff --git a/core/src/location/indexer/indexer_job.rs b/core/src/location/indexer/indexer_job.rs index dee89d17b..9242dae4d 100644 --- a/core/src/location/indexer/indexer_job.rs +++ b/core/src/location/indexer/indexer_job.rs @@ -1,31 +1,26 @@ use crate::{ + file_paths_db_fetcher_fn, job::{JobError, JobInitData, JobResult, JobState, StatefulJob, WorkerContext}, location::file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_just_id_materialized_path, filter_existing_file_path_params, - filter_file_paths_by_many_full_path_params, retain_file_paths_in_location, - MaterializedPath, + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + IsolatedFilePathData, }, - prisma::location, + to_remove_db_fetcher_fn, }; -use std::{ - collections::{HashMap, VecDeque}, - path::Path, -}; +use std::{path::Path, sync::Arc}; -use chrono::Utc; use itertools::Itertools; +use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::error; -use uuid::Uuid; use super::{ - execute_indexer_step, finalize_indexer, - rules::{IndexerRule, RuleKind}, - walk::walk, - IndexerError, IndexerJobData, IndexerJobInit, IndexerJobStep, IndexerJobStepEntry, - ScanProgress, + execute_indexer_save_step, finalize_indexer, iso_file_path_factory, + remove_non_existing_file_paths, + rules::aggregate_rules_by_kind, + update_notifier_fn, + walk::{keep_walking, walk, ToWalkEntry, WalkResult}, + IndexerError, IndexerJobData, IndexerJobInit, IndexerJobSaveStep, ScanProgress, }; /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. @@ -40,11 +35,19 @@ impl JobInitData for IndexerJobInit { type Job = IndexerJob; } +/// `IndexerJobStepInput` defines the action that should be executed in the current step +#[derive(Serialize, Deserialize, Debug)] +pub enum IndexerJobStepInput { + /// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant. + Save(IndexerJobSaveStep), + Walk(ToWalkEntry), +} + #[async_trait::async_trait] impl StatefulJob for IndexerJob { type Init = IndexerJobInit; type Data = IndexerJobData; - type Step = IndexerJobStep; + type Step = IndexerJobStepInput; const NAME: &'static str = "indexer"; @@ -61,21 +64,12 @@ impl StatefulJob for IndexerJob { let location_id = state.init.location.id; let location_path = Path::new(&state.init.location.path); - let mut indexer_rules_by_kind: HashMap> = - HashMap::with_capacity(state.init.location.indexer_rules.len()); - for location_rule in &state.init.location.indexer_rules { - let indexer_rule = IndexerRule::try_from(&location_rule.indexer_rule)?; + let db = Arc::clone(&ctx.library.db); - indexer_rules_by_kind - .entry(indexer_rule.kind) - .or_default() - .push(indexer_rule); - } + let rules_by_kind = aggregate_rules_by_kind(state.init.location.indexer_rules.iter()) + .map_err(IndexerError::from)?; - let mut dirs_ids = HashMap::new(); - - let (to_walk_path, maybe_parent_file_path) = if let Some(ref sub_path) = state.init.sub_path - { + let to_walk_path = if let Some(ref sub_path) = state.init.sub_path { let full_path = ensure_sub_path_is_in_location(location_path, sub_path) .await .map_err(IndexerError::from)?; @@ -83,196 +77,183 @@ impl StatefulJob for IndexerJob { .await .map_err(IndexerError::from)?; - let sub_path_file_path = ctx - .library - .db - .file_path() - .find_first(filter_existing_file_path_params( - &MaterializedPath::new(location_id, location_path, &full_path, true) - .map_err(IndexerError::from)?, - )) - .select(file_path_just_id_materialized_path::select()) - .exec() - .await - .map_err(IndexerError::from)? - .expect("Sub path should already exist in the database"); + ensure_file_path_exists( + sub_path, + &IsolatedFilePathData::new(location_id, location_path, &full_path, true) + .map_err(IndexerError::from)?, + &db, + IndexerError::SubPathNotFound, + ) + .await?; - // If we're operating with a sub_path, then we have to put its id on `dirs_ids` map - dirs_ids.insert( - full_path.clone(), - Uuid::from_slice(&sub_path_file_path.pub_id).unwrap(), - ); - - (full_path, Some(sub_path_file_path)) + full_path } else { - (location_path.to_path_buf(), None) + location_path.to_path_buf() }; let scan_start = Instant::now(); - let found_paths = { - let ctx = &mut ctx; // Borrow outside of closure so it's not moved + let WalkResult { + walked, + to_walk, + to_remove, + errors, + } = { walk( &to_walk_path, - &indexer_rules_by_kind, - |path, total_entries| { - IndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::Message(format!("Scanning {}", path.display())), - ScanProgress::ChunkCount(total_entries / BATCH_SIZE), - ], - ); - }, - // if we're not using a sub_path, then its a full indexing and we must include root dir - state.init.sub_path.is_none(), + &rules_by_kind, + update_notifier_fn(BATCH_SIZE, &mut ctx), + file_paths_db_fetcher_fn!(&db), + to_remove_db_fetcher_fn!(location_id, location_path, &db), + iso_file_path_factory(location_id, location_path), + 50_000, ) .await? }; + let scan_read_time = scan_start.elapsed(); - // NOTE: - // As we're passing the list of currently existing file paths to the `find_many_file_paths_by_full_path` query, - // it means that `dirs_ids` contains just paths that still exists on the filesystem. - dirs_ids.extend( - ctx.library - .db - .file_path() - .find_many( - filter_file_paths_by_many_full_path_params( - &location::Data::from(&state.init.location), - &found_paths - .iter() - .map(|entry| &entry.path) - .collect::>(), - ) - .await - .map_err(IndexerError::from)?, - ) - .select(file_path_just_id_materialized_path::select()) - .exec() - .await? + let db_delete_start = Instant::now(); + // TODO pass these uuids to sync system + let removed_count = remove_non_existing_file_paths(to_remove, &db).await?; + let db_delete_time = db_delete_start.elapsed(); + + let total_paths = &mut 0; + let to_walk_count = to_walk.len(); + + state.steps.extend( + walked + .chunks(BATCH_SIZE) .into_iter() - .map(|file_path| { - ( - location_path.join(&MaterializedPath::from(( - location_id, - &file_path.materialized_path, - ))), - Uuid::from_slice(&file_path.pub_id).unwrap(), - ) - }), + .enumerate() + .map(|(i, chunk)| { + let chunk_steps = chunk.collect::>(); + + *total_paths += chunk_steps.len() as u64; + + IndexerJobStepInput::Save(IndexerJobSaveStep { + chunk_idx: i, + walked: chunk_steps, + }) + }) + .chain(to_walk.into_iter().map(IndexerJobStepInput::Walk)), ); - // Removing all other file paths that are not in the filesystem anymore - let removed_paths = retain_file_paths_in_location( - location_id, - dirs_ids.values().copied().collect(), - maybe_parent_file_path, - &ctx.library.db, - ) - .await - .map_err(IndexerError::from)?; - - let mut new_paths = found_paths - .into_iter() - .filter_map(|entry| { - MaterializedPath::new( - location_id, - &state.init.location.path, - &entry.path, - entry.is_dir, - ) - .map_or_else( - |e| { - error!("Failed to create materialized path: {e}"); - None - }, - |materialized_path| { - (!dirs_ids.contains_key(&entry.path)).then(|| { - IndexerJobStepEntry { - materialized_path, - file_id: Uuid::new_v4(), // To be set later - parent_id: entry.path.parent().and_then(|parent_dir| { - /*************************************************************** - * If we're dealing with a new path which its parent already * - * exist, we fetch its parent id from our `dirs_ids` map * - **************************************************************/ - dirs_ids.get(parent_dir).copied() - }), - full_path: entry.path, - metadata: entry.metadata, - } - }) - }, - ) - }) - .collect::>(); - - new_paths.iter_mut().for_each(|entry| { - // If the `parent_id` is still none here, is because the parent of this entry is also - // a new one in the DB - if entry.parent_id.is_none() { - entry.parent_id = entry - .full_path - .parent() - .and_then(|parent_dir| dirs_ids.get(parent_dir).copied()); - } - - dirs_ids.insert(entry.full_path.clone(), entry.file_id); - }); - - let total_paths = new_paths.len(); + IndexerJobData::on_scan_progress( + &mut ctx, + vec![ScanProgress::Message(format!( + "Starting saving {total_paths} files or directories, \ + there still {to_walk_count} directories to index", + ))], + ); state.data = Some(IndexerJobData { indexed_path: to_walk_path, - db_write_start: Utc::now(), - scan_read_time: scan_start.elapsed(), - total_paths, - indexed_paths: 0, - removed_paths, + rules_by_kind, + db_write_time: db_delete_time, + scan_read_time, + total_paths: *total_paths, + indexed_count: 0, + removed_count, + total_save_steps: state.steps.len() as u64 - to_walk_count as u64, }); - state.steps = VecDeque::with_capacity(new_paths.len() / BATCH_SIZE); - - for (i, chunk) in new_paths - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - { - let chunk_steps = chunk.collect::>(); - IndexerJobData::on_scan_progress( - &mut ctx, - vec![ - ScanProgress::SavedChunks(i), - ScanProgress::Message(format!( - "Writing {} of {} to db", - i * chunk_steps.len(), - total_paths, - )), - ], - ); - - state.steps.push_back(chunk_steps); + if !errors.is_empty() { + Err(JobError::StepCompletedWithErrors( + errors.into_iter().map(|e| format!("{e}")).collect(), + )) + } else { + Ok(()) } - - Ok(()) } /// Process each chunk of entries in the indexer job, writing to the `file_path` table async fn execute_step( &self, - ctx: WorkerContext, + mut ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - execute_indexer_step(&state.init.location, &state.steps[0], ctx) - .await - .map(|indexed_paths| { - state - .data - .as_mut() - .expect("critical error: missing data on job state") - .indexed_paths = indexed_paths; - }) + let data = state + .data + .as_mut() + .expect("critical error: missing data on job state"); + + match &state.steps[0] { + IndexerJobStepInput::Save(step) => { + execute_indexer_save_step(&state.init.location, step, data, &mut ctx) + .await + .map(|(indexed_count, elapsed_time)| { + data.indexed_count += indexed_count; + data.db_write_time += elapsed_time; + })? + } + IndexerJobStepInput::Walk(to_walk_entry) => { + let location_id = state.init.location.id; + let location_path = Path::new(&state.init.location.path); + let db = Arc::clone(&ctx.library.db); + + let scan_start = Instant::now(); + + let WalkResult { + walked, + to_walk, + to_remove, + errors, + } = { + keep_walking( + to_walk_entry, + &data.rules_by_kind, + update_notifier_fn(BATCH_SIZE, &mut ctx), + file_paths_db_fetcher_fn!(&db), + to_remove_db_fetcher_fn!(location_id, location_path, &db), + iso_file_path_factory(location_id, location_path), + ) + .await? + }; + + data.scan_read_time += scan_start.elapsed(); + + let db_delete_time = Instant::now(); + // TODO pass these uuids to sync system + data.removed_count += remove_non_existing_file_paths(to_remove, &db).await?; + data.db_write_time += db_delete_time.elapsed(); + + let old_total = data.total_paths; + let old_steps_count = state.steps.len() as u64; + + state.steps.extend( + walked + .chunks(BATCH_SIZE) + .into_iter() + .enumerate() + .map(|(i, chunk)| { + let chunk_steps = chunk.collect::>(); + data.total_paths += chunk_steps.len() as u64; + + IndexerJobStepInput::Save(IndexerJobSaveStep { + chunk_idx: i, + walked: chunk_steps, + }) + }) + .chain(to_walk.into_iter().map(IndexerJobStepInput::Walk)), + ); + + IndexerJobData::on_scan_progress( + &mut ctx, + vec![ScanProgress::Message(format!( + "Scanned more {} files or directories; {} more directories to scan", + data.total_paths - old_total, + state.steps.len() as u64 - old_steps_count - data.total_paths + ))], + ); + + if !errors.is_empty() { + return Err(JobError::StepCompletedWithErrors( + errors.into_iter().map(|e| format!("{e}")).collect(), + )); + } + } + } + + Ok(()) } async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { diff --git a/core/src/location/indexer/mod.rs b/core/src/location/indexer/mod.rs index 2bf313100..0aa94db3b 100644 --- a/core/src/location/indexer/mod.rs +++ b/core/src/location/indexer/mod.rs @@ -1,31 +1,29 @@ use crate::{ invalidate_query, - job::{JobError, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, + job::{JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext}, library::Library, - prisma::file_path, + prisma::{file_path, PrismaClient}, sync, - util::db::uuid_to_bytes, + util::{db::uuid_to_bytes, error::FileIOError}, }; use std::{ + collections::HashMap, hash::{Hash, Hasher}, path::{Path, PathBuf}, time::Duration, }; -use chrono::{DateTime, Utc}; -use rmp_serde::{decode, encode}; use rspc::ErrorCode; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use serde_json::json; use thiserror::Error; -use tokio::io; +use tokio::time::Instant; use tracing::info; -use uuid::Uuid; use super::{ - file_path_helper::{FilePathError, FilePathMetadata, MaterializedPath}, - location_with_indexer_rules, + file_path_helper::{file_path_just_pub_id, FilePathError, IsolatedFilePathData}, + location_with_indexer_rules, LocationId, }; pub mod indexer_job; @@ -33,6 +31,9 @@ pub mod rules; pub mod shallow_indexer_job; mod walk; +use rules::IndexerRuleError; +use walk::WalkedEntry; + /// `IndexerJobInit` receives a `location::Data` object to be indexed /// and possibly a `sub_path` to be indexed. The `sub_path` is used when /// we want do index just a part of a location. @@ -56,26 +57,13 @@ impl Hash for IndexerJobInit { #[derive(Serialize, Deserialize)] pub struct IndexerJobData { indexed_path: PathBuf, - db_write_start: DateTime, + rules_by_kind: HashMap>, + db_write_time: Duration, scan_read_time: Duration, - total_paths: usize, - indexed_paths: i64, - removed_paths: i64, -} - -/// `IndexerJobStep` is a type alias, specifying that each step of the [`IndexerJob`] is a vector of -/// `IndexerJobStepEntry`. The size of this vector is given by the [`BATCH_SIZE`] constant. -pub type IndexerJobStep = Vec; - -/// `IndexerJobStepEntry` represents a single file to be indexed, given its metadata to be written -/// on the `file_path` table in the database -#[derive(Serialize, Deserialize)] -pub struct IndexerJobStepEntry { - full_path: PathBuf, - materialized_path: MaterializedPath<'static>, - file_id: Uuid, - parent_id: Option, - metadata: FilePathMetadata, + total_paths: u64, + total_save_steps: u64, + indexed_count: u64, + removed_count: u64, } impl IndexerJobData { @@ -93,6 +81,12 @@ impl IndexerJobData { } } +#[derive(Serialize, Deserialize, Debug)] +pub struct IndexerJobSaveStep { + chunk_idx: usize, + walked: Vec, +} + #[derive(Clone)] pub enum ScanProgress { ChunkCount(usize), @@ -104,99 +98,103 @@ pub enum ScanProgress { #[derive(Error, Debug)] pub enum IndexerError { // Not Found errors - #[error("Indexer rule not found: ")] + #[error("indexer rule not found: ")] IndexerRuleNotFound(i32), - - // User errors - #[error("Invalid indexer rule kind integer: {0}")] - InvalidRuleKindInt(i32), - #[error("Glob builder error: {0}")] - GlobBuilderError(#[from] globset::Error), + #[error("received sub path not in database: ", .0.display())] + SubPathNotFound(Box), // Internal Errors - #[error("Database error: {0}")] - DatabaseError(#[from] prisma_client_rust::QueryError), - #[error("I/O error: {0}")] - IOError(#[from] io::Error), - #[error("Indexer rule parameters json serialization error: {0}")] - RuleParametersSerdeJson(#[from] serde_json::Error), - #[error("Indexer rule parameters encode error: {0}")] - RuleParametersRMPEncode(#[from] encode::Error), - #[error("Indexer rule parameters decode error: {0}")] - RuleParametersRMPDecode(#[from] decode::Error), - #[error("File path related error (error: {0})")] - FilePathError(#[from] FilePathError), + #[error("database error")] + Database(#[from] prisma_client_rust::QueryError), + #[error(transparent)] + FileIO(#[from] FileIOError), + #[error(transparent)] + FilePath(#[from] FilePathError), + + // Mixed errors + #[error(transparent)] + IndexerRules(#[from] IndexerRuleError), } impl From for rspc::Error { fn from(err: IndexerError) -> Self { match err { - IndexerError::IndexerRuleNotFound(_) => { + IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => { rspc::Error::with_cause(ErrorCode::NotFound, err.to_string(), err) } - IndexerError::InvalidRuleKindInt(_) | IndexerError::GlobBuilderError(_) => { - rspc::Error::with_cause(ErrorCode::BadRequest, err.to_string(), err) - } + IndexerError::IndexerRules(rule_err) => rule_err.into(), _ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err), } } } -async fn execute_indexer_step( +async fn execute_indexer_save_step( location: &location_with_indexer_rules::Data, - step: &[IndexerJobStepEntry], - ctx: WorkerContext, -) -> Result { + save_step: &IndexerJobSaveStep, + data: &IndexerJobData, + ctx: &mut WorkerContext, +) -> Result<(u64, Duration), IndexerError> { + let start_time = Instant::now(); + + IndexerJobData::on_scan_progress( + ctx, + vec![ + ScanProgress::SavedChunks(save_step.chunk_idx), + ScanProgress::Message(format!( + "Writing {}/{} to db", + save_step.chunk_idx, data.total_save_steps + )), + ], + ); let Library { sync, db, .. } = &ctx.library; - let (sync_stuff, paths): (Vec<_>, Vec<_>) = step + let (sync_stuff, paths): (Vec<_>, Vec<_>) = save_step + .walked .iter() .map(|entry| { - let MaterializedPath { + let IsolatedFilePathData { materialized_path, is_dir, name, extension, .. - } = entry.materialized_path.clone(); + } = &entry.iso_file_path; use file_path::*; ( sync.unique_shared_create( sync::file_path::SyncId { - pub_id: uuid_to_bytes(entry.file_id), + pub_id: uuid_to_bytes(entry.pub_id), }, [ - (materialized_path::NAME, json!(materialized_path.clone())), - (name::NAME, json!(name.clone())), - (is_dir::NAME, json!(is_dir)), - (extension::NAME, json!(extension.clone())), + (materialized_path::NAME, json!(materialized_path)), + (name::NAME, json!(name)), + (is_dir::NAME, json!(*is_dir)), + (extension::NAME, json!(extension)), ( size_in_bytes::NAME, json!(entry.metadata.size_in_bytes.to_string()), ), (inode::NAME, json!(entry.metadata.inode.to_le_bytes())), (device::NAME, json!(entry.metadata.device.to_le_bytes())), - (parent_id::NAME, json!(entry.parent_id)), (date_created::NAME, json!(entry.metadata.created_at)), (date_modified::NAME, json!(entry.metadata.modified_at)), ], ), file_path::create_unchecked( - uuid_to_bytes(entry.file_id), + uuid_to_bytes(entry.pub_id), location.id, - materialized_path.into_owned(), - name.into_owned(), - extension.into_owned(), + materialized_path.to_string(), + name.to_string(), + extension.to_string(), entry.metadata.inode.to_le_bytes().into(), entry.metadata.device.to_le_bytes().into(), vec![ - is_dir::set(is_dir), + is_dir::set(*is_dir), size_in_bytes::set(entry.metadata.size_in_bytes.to_string()), - parent_id::set(entry.parent_id.map(uuid_to_bytes)), date_created::set(entry.metadata.created_at.into()), date_modified::set(entry.metadata.modified_at.into()), ], @@ -217,17 +215,18 @@ async fn execute_indexer_step( info!("Inserted {count} records"); - Ok(count) + Ok((count as u64, start_time.elapsed())) } -fn finalize_indexer( +fn finalize_indexer( location_path: impl AsRef, state: &JobState, ctx: WorkerContext, ) -> JobResult where - SJob: StatefulJob, + SJob: StatefulJob, Init: Serialize + DeserializeOwned + Send + Sync + Hash, + Step: Serialize + DeserializeOwned + Send + Sync, { let data = state .data @@ -240,15 +239,95 @@ where location_path.as_ref().display(), data.scan_read_time, data.total_paths, - data.indexed_paths, - (Utc::now() - data.db_write_start) - .to_std() - .expect("critical error: non-negative duration"), + data.indexed_count, + data.db_write_time, ); - if data.indexed_paths > 0 || data.removed_paths > 0 { + if data.indexed_count > 0 || data.removed_count > 0 { invalidate_query!(ctx.library, "search.paths"); } Ok(Some(serde_json::to_value(state)?)) } + +fn update_notifier_fn(batch_size: usize, ctx: &mut WorkerContext) -> impl FnMut(&Path, usize) + '_ { + move |path, total_entries| { + IndexerJobData::on_scan_progress( + ctx, + vec![ + ScanProgress::Message(format!("Scanning {}", path.display())), + ScanProgress::ChunkCount(total_entries / batch_size), + ], + ); + } +} + +fn iso_file_path_factory( + location_id: LocationId, + location_path: &Path, +) -> impl Fn(&Path, bool) -> Result, IndexerError> + '_ { + move |path, is_dir| { + IsolatedFilePathData::new(location_id, location_path, path, is_dir).map_err(Into::into) + } +} + +async fn remove_non_existing_file_paths( + to_remove: impl IntoIterator, + db: &PrismaClient, +) -> Result { + db.file_path() + .delete_many(vec![file_path::pub_id::in_vec( + to_remove.into_iter().map(|data| data.pub_id).collect(), + )]) + .exec() + .await + .map(|count| count as u64) + .map_err(Into::into) +} + +// TODO: Change this macro to a fn when we're able to return +// `impl Fn(Vec) -> impl Future, IndexerError>>` +// Maybe when TAITs arrive +#[macro_export] +macro_rules! file_paths_db_fetcher_fn { + ($db:expr) => {{ + |found_paths| async { + $db.file_path() + .find_many(found_paths) + .select($crate::location::file_path_helper::file_path_to_isolate::select()) + .exec() + .await + .map_err(Into::into) + } + }}; +} + +// TODO: Change this macro to a fn when we're able to return +// `impl Fn(&Path, Vec) -> impl Future, IndexerError>>` +// Maybe when TAITs arrive +// FIXME: (fogodev) I was receiving this error here https://github.com/rust-lang/rust/issues/74497 +#[macro_export] +macro_rules! to_remove_db_fetcher_fn { + ($location_id:expr, $location_path:expr, $db:expr) => {{ + |iso_file_path, unique_location_id_materialized_path_name_extension_params| async { + let iso_file_path: $crate::location::file_path_helper::IsolatedFilePathData<'static> = + iso_file_path; + $db.file_path() + .find_many(vec![ + $crate::prisma::file_path::location_id::equals($location_id), + $crate::prisma::file_path::materialized_path::equals( + iso_file_path + .materialized_path_for_children() + .expect("the received isolated file path must be from a directory"), + ), + ::prisma_client_rust::operator::not( + unique_location_id_materialized_path_name_extension_params, + ), + ]) + .select($crate::location::file_path_helper::file_path_just_pub_id::select()) + .exec() + .await + .map_err(Into::into) + } + }}; +} diff --git a/core/src/location/indexer/rules.rs b/core/src/location/indexer/rules.rs index 52ad74e55..b71f1bfaa 100644 --- a/core/src/location/indexer/rules.rs +++ b/core/src/location/indexer/rules.rs @@ -1,18 +1,62 @@ use crate::{ library::Library, - location::indexer::IndexerError, + location::location_with_indexer_rules, prisma::{indexer_rule, PrismaClient}, + util::error::{FileIOError, NonUtf8PathError}, }; use chrono::{DateTime, Utc}; use globset::{Glob, GlobSet, GlobSetBuilder}; -use rmp_serde; -use serde::{Deserialize, Serialize}; +use rmp_serde::{self, decode, encode}; +use rspc::ErrorCode; +use serde::{de, ser, Deserialize, Serialize}; use specta::Type; -use std::{collections::HashSet, path::Path}; +use std::{ + collections::{HashMap, HashSet}, + marker::PhantomData, + path::Path, +}; +use thiserror::Error; use tokio::fs; use tracing::debug; +#[derive(Error, Debug)] +pub enum IndexerRuleError { + // User errors + #[error("invalid indexer rule kind integer: {0}")] + InvalidRuleKindInt(i32), + #[error("glob builder error")] + Glob(#[from] globset::Error), + #[error(transparent)] + NonUtf8Path(#[from] NonUtf8PathError), + + // Internal Errors + #[error("indexer rule parameters encode error")] + RuleParametersRMPEncode(#[from] encode::Error), + #[error("indexer rule parameters decode error")] + RuleParametersRMPDecode(#[from] decode::Error), + #[error("accept by its children file I/O error")] + AcceptByItsChildrenFileIO(FileIOError), + #[error("reject by its children file I/O error")] + RejectByItsChildrenFileIO(FileIOError), + #[error("database error")] + Database(#[from] prisma_client_rust::QueryError), +} + +impl From for rspc::Error { + fn from(err: IndexerRuleError) -> Self { + match err { + IndexerRuleError::InvalidRuleKindInt(_) + | IndexerRuleError::Glob(_) + | IndexerRuleError::NonUtf8Path(_) => { + rspc::Error::with_cause(ErrorCode::BadRequest, err.to_string(), err) + } + + _ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err), + } + } +} + /// `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule. /// Note that `parameters` field **MUST** be a JSON object serialized to bytes. /// @@ -33,7 +77,7 @@ impl IndexerRuleCreateArgs { pub async fn create( self, library: &Library, - ) -> Result, IndexerError> { + ) -> Result, IndexerRuleError> { debug!( "{} a new indexer rule (name = {}, params = {:?})", if self.dry_run { @@ -83,8 +127,15 @@ pub enum RuleKind { RejectIfChildrenDirectoriesArePresent = 3, } +impl RuleKind { + pub const fn variant_count() -> usize { + // TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized + 4 + } +} + impl TryFrom for RuleKind { - type Error = IndexerError; + type Error = IndexerRuleError; fn try_from(value: i32) -> Result { let s = match value { @@ -92,7 +143,7 @@ impl TryFrom for RuleKind { 1 => Self::RejectFilesByGlob, 2 => Self::AcceptIfChildrenDirectoriesArePresent, 3 => Self::RejectIfChildrenDirectoriesArePresent, - _ => return Err(IndexerError::InvalidRuleKindInt(value)), + _ => return Err(Self::Error::InvalidRuleKindInt(value)), }; Ok(s) @@ -111,14 +162,257 @@ pub enum ParametersPerKind { // TODO: Add an indexer rule that filter files based on their extended attributes // https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants // https://en.wikipedia.org/wiki/Extended_file_attributes - AcceptFilesByGlob(Vec), - RejectFilesByGlob(Vec), + AcceptFilesByGlob(Vec, GlobSet), + RejectFilesByGlob(Vec, GlobSet), AcceptIfChildrenDirectoriesArePresent(HashSet), RejectIfChildrenDirectoriesArePresent(HashSet), } impl ParametersPerKind { - async fn apply(&self, source: impl AsRef) -> Result { + fn new_files_by_globs_str_and_kind( + globs_str: impl IntoIterator>, + kind_fn: impl Fn(Vec, GlobSet) -> Self, + ) -> Result { + globs_str + .into_iter() + .map(|s| s.as_ref().parse::()) + .collect::, _>>() + .and_then(|globs| { + globs + .iter() + .cloned() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob) + }) + .build() + .map(move |glob_set| kind_fn(globs, glob_set)) + .map_err(Into::into) + }) + .map_err(Into::into) + } + + pub fn new_accept_files_by_globs_str( + globs_str: impl IntoIterator>, + ) -> Result { + Self::new_files_by_globs_str_and_kind(globs_str, Self::AcceptFilesByGlob) + } + + pub fn new_reject_files_by_glob( + globs_str: impl IntoIterator>, + ) -> Result { + Self::new_files_by_globs_str_and_kind(globs_str, Self::RejectFilesByGlob) + } +} + +/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on +/// serialization +impl Serialize for ParametersPerKind { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + match *self { + ParametersPerKind::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer + .serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs), + ParametersPerKind::RejectFilesByGlob(ref globs, ref _glob_set) => serializer + .serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs), + ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer + .serialize_newtype_variant( + "ParametersPerKind", + 2, + "AcceptIfChildrenDirectoriesArePresent", + children, + ), + ParametersPerKind::RejectIfChildrenDirectoriesArePresent(ref children) => serializer + .serialize_newtype_variant( + "ParametersPerKind", + 3, + "RejectIfChildrenDirectoriesArePresent", + children, + ), + } + } +} + +impl<'de> Deserialize<'de> for ParametersPerKind { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + const VARIANTS: &[&str] = &[ + "AcceptFilesByGlob", + "RejectFilesByGlob", + "AcceptIfChildrenDirectoriesArePresent", + "RejectIfChildrenDirectoriesArePresent", + ]; + + enum Fields { + AcceptFilesByGlob, + RejectFilesByGlob, + AcceptIfChildrenDirectoriesArePresent, + RejectIfChildrenDirectoriesArePresent, + } + + struct FieldsVisitor; + + impl<'de> de::Visitor<'de> for FieldsVisitor { + type Value = Fields; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str( + "`AcceptFilesByGlob` \ + or `RejectFilesByGlob` \ + or `AcceptIfChildrenDirectoriesArePresent` \ + or `RejectIfChildrenDirectoriesArePresent`", + ) + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + match value { + 0 => Ok(Fields::AcceptFilesByGlob), + 1 => Ok(Fields::RejectFilesByGlob), + 2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent), + 3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent), + _ => Err(de::Error::invalid_value( + de::Unexpected::Unsigned(value), + &"variant index 0 <= i < 3", + )), + } + } + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + match value { + "AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), + "RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), + "AcceptIfChildrenDirectoriesArePresent" => { + Ok(Fields::AcceptIfChildrenDirectoriesArePresent) + } + "RejectIfChildrenDirectoriesArePresent" => { + Ok(Fields::RejectIfChildrenDirectoriesArePresent) + } + _ => Err(de::Error::unknown_variant(value, VARIANTS)), + } + } + fn visit_bytes(self, bytes: &[u8]) -> Result + where + E: de::Error, + { + match bytes { + b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), + b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), + b"AcceptIfChildrenDirectoriesArePresent" => { + Ok(Fields::AcceptIfChildrenDirectoriesArePresent) + } + b"RejectIfChildrenDirectoriesArePresent" => { + Ok(Fields::RejectIfChildrenDirectoriesArePresent) + } + _ => Err(de::Error::unknown_variant( + &String::from_utf8_lossy(bytes), + VARIANTS, + )), + } + } + } + + impl<'de> Deserialize<'de> for Fields { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + deserializer.deserialize_identifier(FieldsVisitor) + } + } + + struct ParametersPerKindVisitor<'de> { + marker: PhantomData, + lifetime: PhantomData<&'de ()>, + } + + impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> { + type Value = ParametersPerKind; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("enum ParametersPerKind") + } + + fn visit_enum(self, data: PPK) -> Result + where + PPK: de::EnumAccess<'de>, + { + use de::Error; + + de::EnumAccess::variant(data).and_then(|value| match value { + (Fields::AcceptFilesByGlob, accept_files_by_glob) => { + de::VariantAccess::newtype_variant::>(accept_files_by_glob) + .and_then(|globs| { + globs + .iter() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob.to_owned()) + }) + .build() + .map_or_else( + |e| Err(PPK::Error::custom(e)), + |glob_set| { + Ok(Self::Value::AcceptFilesByGlob(globs, glob_set)) + }, + ) + }) + } + (Fields::RejectFilesByGlob, reject_files_by_glob) => { + de::VariantAccess::newtype_variant::>(reject_files_by_glob) + .and_then(|globs| { + globs + .iter() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob.to_owned()) + }) + .build() + .map_or_else( + |e| Err(PPK::Error::custom(e)), + |glob_set| { + Ok(Self::Value::RejectFilesByGlob(globs, glob_set)) + }, + ) + }) + } + ( + Fields::AcceptIfChildrenDirectoriesArePresent, + accept_if_children_directories_are_present, + ) => de::VariantAccess::newtype_variant::>( + accept_if_children_directories_are_present, + ) + .map(Self::Value::AcceptIfChildrenDirectoriesArePresent), + ( + Fields::RejectIfChildrenDirectoriesArePresent, + reject_if_children_directories_are_present, + ) => de::VariantAccess::newtype_variant::>( + reject_if_children_directories_are_present, + ) + .map(Self::Value::RejectIfChildrenDirectoriesArePresent), + }) + } + } + + deserializer.deserialize_enum( + "ParametersPerKind", + VARIANTS, + ParametersPerKindVisitor { + marker: PhantomData::, + lifetime: PhantomData, + }, + ) + } +} + +impl ParametersPerKind { + async fn apply(&self, source: impl AsRef) -> Result { match self { ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(children) => { accept_dir_for_its_children(source, children).await @@ -127,25 +421,17 @@ impl ParametersPerKind { reject_dir_for_its_children(source, children).await } - ParametersPerKind::AcceptFilesByGlob(glob) => accept_by_glob(source, glob), - ParametersPerKind::RejectFilesByGlob(glob) => reject_by_glob(source, glob), - } - } - - fn serialize(self) -> Result, IndexerError> { - match self { - Self::AcceptFilesByGlob(glob) | Self::RejectFilesByGlob(glob) => { - rmp_serde::to_vec_named(&glob).map_err(Into::into) + ParametersPerKind::AcceptFilesByGlob(_globs, accept_glob_set) => { + Ok(accept_by_glob(source, accept_glob_set)) } - Self::AcceptIfChildrenDirectoriesArePresent(children) - | Self::RejectIfChildrenDirectoriesArePresent(children) => { - rmp_serde::to_vec(&children.into_iter().collect::>()).map_err(Into::into) + ParametersPerKind::RejectFilesByGlob(_globs, reject_glob_set) => { + Ok(reject_by_glob(source, reject_glob_set)) } } } } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct IndexerRule { pub id: Option, pub kind: RuleKind, @@ -169,11 +455,11 @@ impl IndexerRule { } } - pub async fn apply(&self, source: impl AsRef) -> Result { + pub async fn apply(&self, source: impl AsRef) -> Result { self.parameters.apply(source).await } - pub async fn save(self, client: &PrismaClient) -> Result<(), IndexerError> { + pub async fn save(self, client: &PrismaClient) -> Result<(), IndexerRuleError> { if let Some(id) = self.id { client .indexer_rule() @@ -182,7 +468,7 @@ impl IndexerRule { indexer_rule::create( self.kind as i32, self.name, - self.parameters.serialize()?, + rmp_serde::to_vec_named(&self.parameters)?, vec![indexer_rule::default::set(self.default)], ), vec![indexer_rule::date_modified::set(Utc::now().into())], @@ -195,7 +481,7 @@ impl IndexerRule { .create( self.kind as i32, self.name, - self.parameters.serialize()?, + rmp_serde::to_vec_named(&self.parameters)?, vec![indexer_rule::default::set(self.default)], ) .exec() @@ -207,7 +493,7 @@ impl IndexerRule { } impl TryFrom<&indexer_rule::Data> for IndexerRule { - type Error = IndexerError; + type Error = IndexerRuleError; fn try_from(data: &indexer_rule::Data) -> Result { let kind = RuleKind::try_from(data.kind)?; @@ -217,27 +503,7 @@ impl TryFrom<&indexer_rule::Data> for IndexerRule { kind, name: data.name.clone(), default: data.default, - parameters: match kind { - RuleKind::AcceptFilesByGlob | RuleKind::RejectFilesByGlob => { - let glob_str = rmp_serde::from_slice(&data.parameters)?; - if matches!(kind, RuleKind::AcceptFilesByGlob) { - ParametersPerKind::AcceptFilesByGlob(glob_str) - } else { - ParametersPerKind::RejectFilesByGlob(glob_str) - } - } - RuleKind::AcceptIfChildrenDirectoriesArePresent - | RuleKind::RejectIfChildrenDirectoriesArePresent => { - let childrens = rmp_serde::from_slice::>(&data.parameters)? - .into_iter() - .collect(); - if matches!(kind, RuleKind::AcceptIfChildrenDirectoriesArePresent) { - ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(childrens) - } else { - ParametersPerKind::RejectIfChildrenDirectoriesArePresent(childrens) - } - } - }, + parameters: rmp_serde::from_slice(&data.parameters)?, date_created: data.date_created.into(), date_modified: data.date_modified.into(), }) @@ -245,43 +511,46 @@ impl TryFrom<&indexer_rule::Data> for IndexerRule { } impl TryFrom for IndexerRule { - type Error = IndexerError; + type Error = IndexerRuleError; fn try_from(data: indexer_rule::Data) -> Result { Self::try_from(&data) } } -// TODO: memoize this -fn globset_from_globs(globs: &[Glob]) -> Result { - globs - .iter() - .fold(&mut GlobSetBuilder::new(), |builder, glob| { - builder.add(glob.to_owned()) - }) - .build() +fn accept_by_glob(source: impl AsRef, accept_glob_set: &GlobSet) -> bool { + accept_glob_set.is_match(source.as_ref()) } -fn accept_by_glob(source: impl AsRef, globs: &[Glob]) -> Result { - globset_from_globs(globs) - .map(|glob_set| glob_set.is_match(source.as_ref())) - .map_err(IndexerError::GlobBuilderError) -} - -fn reject_by_glob(source: impl AsRef, reject_globs: &[Glob]) -> Result { - accept_by_glob(source.as_ref(), reject_globs).map(|accept| !accept) +fn reject_by_glob(source: impl AsRef, reject_glob_set: &GlobSet) -> bool { + !accept_by_glob(source.as_ref(), reject_glob_set) } async fn accept_dir_for_its_children( source: impl AsRef, children: &HashSet, -) -> Result { +) -> Result { let source = source.as_ref(); - let mut read_dir = fs::read_dir(source).await?; - while let Some(entry) = read_dir.next_entry().await? { - if entry.metadata().await?.is_dir() - && children.contains(entry.file_name().to_str().expect("Found non-UTF-8 path")) - { + let mut read_dir = fs::read_dir(source) + .await + .map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?; + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))? + { + if entry + .metadata() + .await + .map_err(|e| { + IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))) + })? + .is_dir() && children.contains( + entry + .file_name() + .to_str() + .ok_or_else(|| NonUtf8PathError(entry.path().into()))?, + ) { return Ok(true); } } @@ -292,13 +561,28 @@ async fn accept_dir_for_its_children( async fn reject_dir_for_its_children( source: impl AsRef, children: &HashSet, -) -> Result { +) -> Result { let source = source.as_ref(); - let mut read_dir = fs::read_dir(source).await?; - while let Some(entry) = read_dir.next_entry().await? { - if entry.metadata().await?.is_dir() - && children.contains(entry.file_name().to_str().expect("Found non-UTF-8 path")) - { + let mut read_dir = fs::read_dir(source) + .await + .map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?; + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))? + { + if entry + .metadata() + .await + .map_err(|e| { + IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))) + })? + .is_dir() && children.contains( + entry + .file_name() + .to_str() + .ok_or_else(|| NonUtf8PathError(entry.path().into()))?, + ) { return Ok(false); } } @@ -306,6 +590,20 @@ async fn reject_dir_for_its_children( Ok(true) } +pub fn aggregate_rules_by_kind<'r>( + mut rules: impl Iterator, +) -> Result>, IndexerRuleError> { + rules.try_fold( + HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()), + |mut rules_by_kind, location_rule| { + IndexerRule::try_from(&location_rule.indexer_rule).map(|rule| { + rules_by_kind.entry(rule.kind).or_default().push(rule); + rules_by_kind + }) + }, + ) +} + #[cfg(test)] mod tests { use super::*; @@ -324,7 +622,13 @@ mod tests { RuleKind::RejectFilesByGlob, "ignore hidden files".to_string(), false, - ParametersPerKind::RejectFilesByGlob(vec![Glob::new("**/.*").unwrap()]), + ParametersPerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("**/.*").unwrap()) + .build() + .unwrap(), + ), ); assert!(!rule.apply(hidden).await.unwrap()); assert!(rule.apply(normal).await.unwrap()); @@ -344,9 +648,13 @@ mod tests { RuleKind::RejectFilesByGlob, "ignore build directory".to_string(), false, - ParametersPerKind::RejectFilesByGlob(vec![ - Glob::new("{**/target/*,**/target}").unwrap() - ]), + ParametersPerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{**/target/*,**/target}").unwrap()) + .build() + .unwrap(), + ), ); assert!(rule.apply(project_file).await.unwrap()); @@ -370,7 +678,13 @@ mod tests { RuleKind::AcceptFilesByGlob, "only photos".to_string(), false, - ParametersPerKind::AcceptFilesByGlob(vec![Glob::new("*.{jpg,png,jpeg}").unwrap()]), + ParametersPerKind::AcceptFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("*.{jpg,png,jpeg}").unwrap()) + .build() + .unwrap(), + ), ); assert!(!rule.apply(text).await.unwrap()); assert!(rule.apply(png).await.unwrap()); @@ -443,4 +757,65 @@ mod tests { assert!(!rule.apply(project2).await.unwrap()); assert!(rule.apply(not_project).await.unwrap()); } + + impl PartialEq for ParametersPerKind { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + ( + ParametersPerKind::AcceptFilesByGlob(self_globs, _), + ParametersPerKind::AcceptFilesByGlob(other_globs, _), + ) => self_globs == other_globs, + ( + ParametersPerKind::RejectFilesByGlob(self_globs, _), + ParametersPerKind::RejectFilesByGlob(other_globs, _), + ) => self_globs == other_globs, + ( + ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(self_childrens), + ParametersPerKind::AcceptIfChildrenDirectoriesArePresent(other_childrens), + ) => self_childrens == other_childrens, + ( + ParametersPerKind::RejectIfChildrenDirectoriesArePresent(self_childrens), + ParametersPerKind::RejectIfChildrenDirectoriesArePresent(other_childrens), + ) => self_childrens == other_childrens, + _ => false, + } + } + } + + impl Eq for ParametersPerKind {} + + impl PartialEq for IndexerRule { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + && self.kind == other.kind + && self.name == other.name + && self.default == other.default + && self.parameters == other.parameters + && self.date_created == other.date_created + && self.date_modified == other.date_modified + } + } + + impl Eq for IndexerRule {} + + #[test] + fn serde_smoke_test() { + let actual = IndexerRule::new( + RuleKind::RejectFilesByGlob, + "No Hidden".to_string(), + true, + ParametersPerKind::RejectFilesByGlob( + vec![Glob::new("**/.*").unwrap()], + Glob::new("**/.*") + .and_then(|glob| GlobSetBuilder::new().add(glob).build()) + .unwrap(), + ), + ); + + let expected = + rmp_serde::from_slice::(&rmp_serde::to_vec_named(&actual).unwrap()) + .unwrap(); + + assert_eq!(actual, expected); + } } diff --git a/core/src/location/indexer/shallow_indexer_job.rs b/core/src/location/indexer/shallow_indexer_job.rs index cbcddd3f9..5b0b61c1f 100644 --- a/core/src/location/indexer/shallow_indexer_job.rs +++ b/core/src/location/indexer/shallow_indexer_job.rs @@ -1,32 +1,28 @@ use crate::{ + file_paths_db_fetcher_fn, job::{JobError, JobInitData, JobResult, JobState, StatefulJob, WorkerContext}, location::file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_just_id_materialized_path, filter_existing_file_path_params, - filter_file_paths_by_many_full_path_params, retain_file_paths_in_location, - MaterializedPath, + check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + IsolatedFilePathData, }, - prisma::location, + to_remove_db_fetcher_fn, }; use std::{ - collections::{HashMap, HashSet}, hash::{Hash, Hasher}, path::{Path, PathBuf}, + sync::Arc, }; -use chrono::Utc; use itertools::Itertools; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::error; -use uuid::Uuid; use super::{ - execute_indexer_step, finalize_indexer, location_with_indexer_rules, - rules::{IndexerRule, RuleKind}, - walk::walk_single_dir, - IndexerError, IndexerJobData, IndexerJobStep, IndexerJobStepEntry, ScanProgress, + execute_indexer_save_step, finalize_indexer, iso_file_path_factory, + location_with_indexer_rules, remove_non_existing_file_paths, rules::aggregate_rules_by_kind, + update_notifier_fn, walk::walk_single_dir, IndexerError, IndexerJobData, IndexerJobSaveStep, + ScanProgress, }; /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. @@ -61,7 +57,7 @@ impl JobInitData for ShallowIndexerJobInit { impl StatefulJob for ShallowIndexerJob { type Init = ShallowIndexerJobInit; type Data = IndexerJobData; - type Step = IndexerJobStep; + type Step = IndexerJobSaveStep; const NAME: &'static str = "shallow_indexer"; const IS_BACKGROUND: bool = true; @@ -79,21 +75,12 @@ impl StatefulJob for ShallowIndexerJob { let location_id = state.init.location.id; let location_path = Path::new(&state.init.location.path); - let db = ctx.library.db.clone(); + let db = Arc::clone(&ctx.library.db); - let mut indexer_rules_by_kind: HashMap> = - HashMap::with_capacity(state.init.location.indexer_rules.len()); + let rules_by_kind = aggregate_rules_by_kind(state.init.location.indexer_rules.iter()) + .map_err(IndexerError::from)?; - for location_rule in &state.init.location.indexer_rules { - let indexer_rule = IndexerRule::try_from(&location_rule.indexer_rule)?; - - indexer_rules_by_kind - .entry(indexer_rule.kind) - .or_default() - .push(indexer_rule); - } - - let (to_walk_path, parent_file_path) = if state.init.sub_path != Path::new("") { + let (add_root, to_walk_path) = if state.init.sub_path != Path::new("") { let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path) .await .map_err(IndexerError::from)?; @@ -101,171 +88,105 @@ impl StatefulJob for ShallowIndexerJob { .await .map_err(IndexerError::from)?; - let materialized_path = - MaterializedPath::new(location_id, location_path, &full_path, true) - .map_err(IndexerError::from)?; - ( + !check_file_path_exists::( + &IsolatedFilePathData::new(location_id, location_path, &full_path, true) + .map_err(IndexerError::from)?, + &db, + ) + .await?, full_path, - db.file_path() - .find_first(filter_existing_file_path_params(&materialized_path)) - .select(file_path_just_id_materialized_path::select()) - .exec() - .await - .map_err(IndexerError::from)? - .expect("Sub path should already exist in the database"), ) } else { - ( - location_path.to_path_buf(), - db.file_path() - .find_first(filter_existing_file_path_params( - &MaterializedPath::new(location_id, location_path, location_path, true) - .map_err(IndexerError::from)?, - )) - .select(file_path_just_id_materialized_path::select()) - .exec() - .await - .map_err(IndexerError::from)? - .expect("Location root path should already exist in the database"), - ) + (false, location_path.to_path_buf()) }; let scan_start = Instant::now(); - let found_paths = { - let ctx = &mut ctx; // Borrow outside of closure so it's not moved + let (walked, to_remove, errors) = { + let ctx = &mut ctx; walk_single_dir( &to_walk_path, - &indexer_rules_by_kind, - |path, total_entries| { - IndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::Message(format!("Scanning {}", path.display())), - ScanProgress::ChunkCount(total_entries / BATCH_SIZE), - ], - ); - }, + &rules_by_kind, + update_notifier_fn(BATCH_SIZE, ctx), + file_paths_db_fetcher_fn!(&db), + to_remove_db_fetcher_fn!(location_id, location_path, &db), + iso_file_path_factory(location_id, location_path), + add_root, ) .await? }; - let (already_existing_file_paths, mut to_retain): (HashSet<_>, Vec<_>) = db - .file_path() - .find_many( - filter_file_paths_by_many_full_path_params( - &location::Data::from(&state.init.location), - &found_paths - .iter() - .map(|entry| &entry.path) - .collect::>(), - ) - .await - .map_err(IndexerError::from)?, - ) - .select(file_path_just_id_materialized_path::select()) - .exec() - .await? - .into_iter() - .map(|file_path| { - ( - file_path.materialized_path, - Uuid::from_slice(&file_path.pub_id).unwrap(), - ) - }) - .unzip(); + let db_delete_start = Instant::now(); + // TODO pass these uuids to sync system + let removed_count = remove_non_existing_file_paths(to_remove, &db).await?; + let db_delete_time = db_delete_start.elapsed(); - let parent_pub_id = Uuid::from_slice(&parent_file_path.pub_id).unwrap(); + let total_paths = &mut 0; - // Adding our parent path id - to_retain.push(parent_pub_id); + state.steps.extend( + walked + .chunks(BATCH_SIZE) + .into_iter() + .enumerate() + .map(|(i, chunk)| { + let chunk_steps = chunk.collect::>(); - // Removing all other file paths that are not in the filesystem anymore - let removed_paths = - retain_file_paths_in_location(location_id, to_retain, Some(parent_file_path), &db) - .await - .map_err(IndexerError::from)?; + *total_paths += chunk_steps.len() as u64; + + IndexerJobSaveStep { + chunk_idx: i, + walked: chunk_steps, + } + }), + ); ctx.library.orphan_remover.invoke().await; - // Filter out paths that are already in the databases - let new_paths = found_paths - .into_iter() - .filter_map(|entry| { - MaterializedPath::new(location_id, location_path, &entry.path, entry.is_dir) - .map_or_else( - |e| { - error!("Failed to create materialized path: {e}"); - None - }, - |materialized_path| { - (!already_existing_file_paths - .contains::(materialized_path.as_ref())) - .then_some(IndexerJobStepEntry { - full_path: entry.path, - materialized_path, - file_id: Uuid::new_v4(), - parent_id: Some(parent_pub_id), - metadata: entry.metadata, - }) - }, - ) - }) - // Sadly we have to collect here to be able to check the length so we can set - // the max file path id later - .collect::>(); - - let total_paths = new_paths.len(); + IndexerJobData::on_scan_progress( + &mut ctx, + vec![ScanProgress::Message(format!( + "Saving {total_paths} files or directories" + ))], + ); state.data = Some(IndexerJobData { indexed_path: to_walk_path, - db_write_start: Utc::now(), + rules_by_kind, + db_write_time: db_delete_time, scan_read_time: scan_start.elapsed(), - total_paths, - indexed_paths: 0, - removed_paths, + total_paths: *total_paths, + indexed_count: 0, + removed_count, + total_save_steps: state.steps.len() as u64, }); - state.steps = new_paths - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let chunk_steps = chunk.collect::>(); - IndexerJobData::on_scan_progress( - &mut ctx, - vec![ - ScanProgress::SavedChunks(i), - ScanProgress::Message(format!( - "Writing {} of {} to db", - i * chunk_steps.len(), - total_paths, - )), - ], - ); - chunk_steps - }) - .collect(); - - Ok(()) + if !errors.is_empty() { + Err(JobError::StepCompletedWithErrors( + errors.into_iter().map(|e| format!("{e}")).collect(), + )) + } else { + Ok(()) + } } /// Process each chunk of entries in the indexer job, writing to the `file_path` table async fn execute_step( &self, - ctx: WorkerContext, + mut ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - execute_indexer_step(&state.init.location, &state.steps[0], ctx) + let data = state + .data + .as_mut() + .expect("critical error: missing data on job state"); + + execute_indexer_save_step(&state.init.location, &state.steps[0], data, &mut ctx) .await - .map(|indexed_paths| { - state - .data - .as_mut() - .expect("critical error: missing data on job state") - .indexed_paths = indexed_paths; + .map(|(indexed_paths, elapsed_time)| { + data.indexed_count += indexed_paths; + data.db_write_time += elapsed_time; }) + .map_err(Into::into) } /// Logs some metadata about the indexer job diff --git a/core/src/location/indexer/walk.rs b/core/src/location/indexer/walk.rs index eeb45c05d..b1c6af6e6 100644 --- a/core/src/location/indexer/walk.rs +++ b/core/src/location/indexer/walk.rs @@ -1,4 +1,11 @@ -use crate::location::file_path_helper::{FilePathMetadata, MetadataExt}; +use crate::{ + location::file_path_helper::{ + file_path_just_pub_id, file_path_to_isolate, FilePathMetadata, IsolatedFilePathData, + MetadataExt, + }, + prisma::file_path, + util::error::FileIOError, +}; #[cfg(target_family = "unix")] use crate::location::file_path_helper::get_inode_and_device; @@ -7,122 +14,367 @@ use crate::location::file_path_helper::get_inode_and_device; use crate::location::file_path_helper::get_inode_and_device_from_path; use std::{ - cmp::Ordering, - collections::{HashMap, VecDeque}, + collections::{HashMap, HashSet, VecDeque}, + future::Future, hash::{Hash, Hasher}, path::{Path, PathBuf}, }; +use prisma_client_rust::operator; +use serde::{Deserialize, Serialize}; use tokio::fs; -use tracing::{error, trace}; +use tracing::trace; +use uuid::Uuid; use super::{ rules::{IndexerRule, RuleKind}, IndexerError, }; +const TO_WALK_QUEUE_INITIAL_CAPACITY: usize = 32; +const WALKER_PATHS_BUFFER_INITIAL_CAPACITY: usize = 256; +const WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY: usize = 32; + /// `WalkEntry` represents a single path in the filesystem, for any comparison purposes, we only /// consider the path itself, not the metadata. -#[derive(Clone, Debug)] -pub(super) struct WalkEntry { - pub(super) path: PathBuf, - pub(super) is_dir: bool, - pub(super) metadata: FilePathMetadata, +#[derive(Debug, Serialize, Deserialize)] +pub struct WalkedEntry { + pub pub_id: Uuid, + pub iso_file_path: IsolatedFilePathData<'static>, + pub metadata: FilePathMetadata, } -impl PartialEq for WalkEntry { +#[derive(Debug, Serialize, Deserialize)] +pub struct ToWalkEntry { + path: PathBuf, + parent_dir_accepted_by_its_children: Option, +} + +struct WalkingEntry { + iso_file_path: IsolatedFilePathData<'static>, + maybe_metadata: Option, +} + +impl PartialEq for WalkingEntry { fn eq(&self, other: &Self) -> bool { - self.path == other.path + self.iso_file_path == other.iso_file_path } } -impl Eq for WalkEntry {} +impl Eq for WalkingEntry {} -impl Hash for WalkEntry { +impl Hash for WalkingEntry { fn hash(&self, state: &mut H) { - self.path.hash(state); + self.iso_file_path.hash(state); } } -impl PartialOrd for WalkEntry { - fn partial_cmp(&self, other: &Self) -> Option { - self.path.partial_cmp(&other.path) - } +pub struct WalkResult +where + Walked: Iterator, + ToRemove: Iterator, +{ + pub walked: Walked, + pub to_walk: VecDeque, + pub to_remove: ToRemove, + pub errors: Vec, } -impl Ord for WalkEntry { - fn cmp(&self, other: &Self) -> Ordering { - self.path.cmp(&other.path) - } -} - -type ToWalkEntry = (PathBuf, Option); - /// This function walks through the filesystem, applying the rules to each entry and then returning /// a list of accepted entries. There are some useful comments in the implementation of this function /// in case of doubts. -pub(super) async fn walk( +pub(super) async fn walk( + root: impl AsRef, + rules_per_kind: &HashMap>, + mut update_notifier: impl FnMut(&Path, usize), + file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, + to_remove_db_fetcher: impl Fn( + IsolatedFilePathData<'static>, + Vec, + ) -> ToRemoveDbFetcherFut, + iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, + limit: u64, +) -> Result< + WalkResult< + impl Iterator, + impl Iterator, + >, + IndexerError, +> +where + FilePathDBFetcherFut: Future, IndexerError>>, + ToRemoveDbFetcherFut: Future, IndexerError>>, +{ + let root = root.as_ref(); + + let mut to_walk = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY); + to_walk.push_back(ToWalkEntry { + path: root.to_path_buf(), + parent_dir_accepted_by_its_children: None, + }); + let mut indexed_paths = HashSet::with_capacity(WALKER_PATHS_BUFFER_INITIAL_CAPACITY); + let mut errors = vec![]; + let mut paths_buffer = Vec::with_capacity(WALKER_PATHS_BUFFER_INITIAL_CAPACITY); + let mut to_remove = vec![]; + + while let Some(ref entry) = to_walk.pop_front() { + let current_to_remove = inner_walk_single_dir( + root, + entry, + rules_per_kind, + &mut update_notifier, + &to_remove_db_fetcher, + &iso_file_path_factory, + WorkingTable { + indexed_paths: &mut indexed_paths, + paths_buffer: &mut paths_buffer, + maybe_to_walk: Some(&mut to_walk), + errors: &mut errors, + }, + ) + .await; + to_remove.push(current_to_remove); + + if indexed_paths.len() >= limit as usize { + break; + } + } + + Ok(WalkResult { + walked: filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?, + to_walk, + to_remove: to_remove.into_iter().flatten(), + errors, + }) +} + +pub(super) async fn keep_walking( + to_walk_entry: &ToWalkEntry, + rules_per_kind: &HashMap>, + mut update_notifier: impl FnMut(&Path, usize), + file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, + to_remove_db_fetcher: impl Fn( + IsolatedFilePathData<'static>, + Vec, + ) -> ToRemoveDbFetcherFut, + iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, +) -> Result< + WalkResult< + impl Iterator, + impl Iterator, + >, + IndexerError, +> +where + FilePathDBFetcherFut: Future, IndexerError>>, + ToRemoveDbFetcherFut: Future, IndexerError>>, +{ + let mut to_keep_walking = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY); + let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); + let mut paths_buffer = Vec::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); + let mut errors = vec![]; + + let to_remove = inner_walk_single_dir( + to_walk_entry.path.clone(), + to_walk_entry, + rules_per_kind, + &mut update_notifier, + &to_remove_db_fetcher, + &iso_file_path_factory, + WorkingTable { + indexed_paths: &mut indexed_paths, + paths_buffer: &mut paths_buffer, + maybe_to_walk: Some(&mut to_keep_walking), + errors: &mut errors, + }, + ) + .await; + + Ok(WalkResult { + walked: filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?, + to_walk: to_keep_walking, + to_remove: to_remove.into_iter(), + errors, + }) +} + +pub(super) async fn walk_single_dir( root: impl AsRef, rules_per_kind: &HashMap>, mut update_notifier: impl FnMut(&Path, usize) + '_, - include_root: bool, -) -> Result, IndexerError> { - let root = root.as_ref().to_path_buf(); + file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, + to_remove_db_fetcher: impl Fn( + IsolatedFilePathData<'static>, + Vec, + ) -> ToRemoveDbFetcherFut, + iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, + add_root: bool, +) -> Result< + ( + impl Iterator, + Vec, + Vec, + ), + IndexerError, +> +where + FilePathDBFetcherFut: Future, IndexerError>>, + ToRemoveDbFetcherFut: Future, IndexerError>>, +{ + let root = root.as_ref(); - let mut to_walk = VecDeque::with_capacity(1); - to_walk.push_back((root.clone(), None)); - let mut indexed_paths = HashMap::new(); + let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); - while let Some((current_path, parent_dir_accepted_by_its_children)) = to_walk.pop_front() { - let mut read_dir = match fs::read_dir(¤t_path).await { - Ok(read_dir) => read_dir, - Err(e) => { - error!( - "Error reading directory {}: {:#?}", - current_path.display(), - e - ); - continue; + if add_root { + let metadata = fs::metadata(root) + .await + .map_err(|e| FileIOError::from((root, e)))?; + + let (inode, device) = { + #[cfg(target_family = "unix")] + { + get_inode_and_device(&metadata) } - }; - inner_walk_single_dir( - &root, - (current_path, parent_dir_accepted_by_its_children), - &mut read_dir, - rules_per_kind, - &mut update_notifier, - &mut indexed_paths, - Some(&mut to_walk), - ) - .await?; + #[cfg(target_family = "windows")] + { + get_inode_and_device_from_path(&root).await + } + }?; + + indexed_paths.insert(WalkingEntry { + iso_file_path: iso_file_path_factory(root, true)?, + maybe_metadata: Some(FilePathMetadata { + inode, + device, + size_in_bytes: metadata.len(), + created_at: metadata.created_or_now().into(), + modified_at: metadata.modified_or_now().into(), + }), + }); } - prepared_indexed_paths(root, indexed_paths, include_root).await + let mut paths_buffer = Vec::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); + let mut errors = vec![]; + + let to_remove = inner_walk_single_dir( + root, + &ToWalkEntry { + path: root.to_path_buf(), + parent_dir_accepted_by_its_children: None, + }, + rules_per_kind, + &mut update_notifier, + &to_remove_db_fetcher, + &iso_file_path_factory, + WorkingTable { + indexed_paths: &mut indexed_paths, + paths_buffer: &mut paths_buffer, + maybe_to_walk: None, + errors: &mut errors, + }, + ) + .await; + + Ok(( + filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?, + to_remove, + errors, + )) } -async fn inner_walk_single_dir( +async fn filter_existing_paths( + indexed_paths: HashSet, + file_paths_db_fetcher: impl Fn(Vec) -> F, +) -> Result, IndexerError> +where + F: Future, IndexerError>>, +{ + if !indexed_paths.is_empty() { + file_paths_db_fetcher( + indexed_paths + .iter() + .map(|entry| &entry.iso_file_path) + .map(Into::into) + .collect(), + ) + .await + } else { + Ok(vec![]) + } + .map(move |file_paths| { + let isolated_paths_already_in_db = file_paths + .into_iter() + .map(IsolatedFilePathData::from) + .collect::>(); + + indexed_paths.into_iter().filter_map(move |entry| { + (!isolated_paths_already_in_db.contains(&entry.iso_file_path)).then(|| WalkedEntry { + pub_id: Uuid::new_v4(), + iso_file_path: entry.iso_file_path, + metadata: entry + .maybe_metadata + .expect("we always use Some in `the inner_walk_single_dir` function"), + }) + }) + }) +} + +struct WorkingTable<'a> { + indexed_paths: &'a mut HashSet, + paths_buffer: &'a mut Vec, + maybe_to_walk: Option<&'a mut VecDeque>, + errors: &'a mut Vec, +} + +async fn inner_walk_single_dir( root: impl AsRef, - (current_path, parent_dir_accepted_by_its_children): ToWalkEntry, - read_dir: &mut fs::ReadDir, + ToWalkEntry { + path, + parent_dir_accepted_by_its_children, + }: &ToWalkEntry, rules_per_kind: &HashMap>, update_notifier: &mut impl FnMut(&Path, usize), - indexed_paths: &mut HashMap, - mut maybe_to_walk: Option<&mut VecDeque<(PathBuf, Option)>>, -) -> Result<(), IndexerError> { + to_remove_db_fetcher: &impl Fn( + IsolatedFilePathData<'static>, + Vec, + ) -> ToRemoveDbFetcherFut, + iso_file_path_factory: &impl Fn(&Path, bool) -> Result, IndexerError>, + WorkingTable { + indexed_paths, + paths_buffer, + mut maybe_to_walk, + errors, + }: WorkingTable<'_>, +) -> Vec +where + ToRemoveDbFetcherFut: Future, IndexerError>>, +{ + let Ok(iso_file_path_to_walk) = iso_file_path_factory(path, true).map_err(|e| errors.push(e)) + else { + return vec![]; + }; + + let Ok(mut read_dir) = fs::read_dir(path).await + .map_err(|e| errors.push(FileIOError::from((path.clone(), e)).into())) + else { + return vec![]; + }; + let root = root.as_ref(); + // Just to make sure... + paths_buffer.clear(); + + let mut found_paths_counts = 0; + // Marking with a loop label here in case of rejection or erros, to continue with next entry 'entries: loop { let entry = match read_dir.next_entry().await { Ok(Some(entry)) => entry, Ok(None) => break, Err(e) => { - error!( - "Error reading entry in {}: {:#?}", - current_path.display(), - e - ); + errors.push(FileIOError::from((path.clone(), e)).into()); continue; } }; @@ -132,11 +384,19 @@ async fn inner_walk_single_dir( // Some(true) if this check applies and it passes // Some(false) if this check applies and it was rejected // and we pass the current parent state to its children - let mut accept_by_children_dir = parent_dir_accepted_by_its_children; + let mut accept_by_children_dir = *parent_dir_accepted_by_its_children; let current_path = entry.path(); - update_notifier(¤t_path, indexed_paths.len()); + // Just sending updates if we found more paths since the last loop + let current_found_paths_count = paths_buffer.len(); + if found_paths_counts != current_found_paths_count { + update_notifier( + ¤t_path, + indexed_paths.len() + current_found_paths_count, + ); + found_paths_counts = current_found_paths_count; + } trace!( "Current filesystem path: {}, accept_by_children_dir: {:#?}", @@ -145,8 +405,11 @@ async fn inner_walk_single_dir( ); if let Some(reject_rules) = rules_per_kind.get(&RuleKind::RejectFilesByGlob) { for reject_rule in reject_rules { - // SAFETY: It's ok to unwrap here, reject rules of this kind are infallible - if !reject_rule.apply(¤t_path).await.unwrap() { + if !reject_rule + .apply(¤t_path) + .await + .expect("reject rules of this kind must be infallible") + { trace!( "Path {} rejected by rule {}", current_path.display(), @@ -157,7 +420,13 @@ async fn inner_walk_single_dir( } } - let metadata = entry.metadata().await?; + let Ok(metadata) = entry + .metadata() + .await + .map_err(|e| errors.push(FileIOError::from((entry.path(), e)).into())) + else { + continue 'entries; + }; // TODO: Hard ignoring symlinks for now, but this should be configurable if metadata.is_symlink() { @@ -166,7 +435,7 @@ async fn inner_walk_single_dir( let is_dir = metadata.is_dir(); - let (inode, device) = match { + let Ok((inode, device)) = { #[cfg(target_family = "unix")] { get_inode_and_device(&metadata) @@ -176,15 +445,9 @@ async fn inner_walk_single_dir( { get_inode_and_device_from_path(¤t_path).await } - } { - Ok(inode_and_device) => inode_and_device, - Err(e) => { - error!( - "Error getting inode and device for {}: {e}", - current_path.display(), - ); - continue 'entries; - } + }.map_err(|e| errors.push(e.into())) + else { + continue 'entries; }; if is_dir { @@ -204,12 +467,7 @@ async fn inner_walk_single_dir( } Ok(true) => {} Err(e) => { - trace!( - "Error applying rule {} to path {}: {:#?}", - reject_by_children_rule.name, - current_path.display(), - e - ); + errors.push(e.into()); continue 'entries; } } @@ -228,13 +486,7 @@ async fn inner_walk_single_dir( } Ok(false) => {} Err(e) => { - error!( - "Error applying rule {} to path {}: {:#?}", - accept_by_children_rule.name, - current_path.display(), - e - ); - continue 'entries; + errors.push(e.into()); } } } @@ -251,15 +503,21 @@ async fn inner_walk_single_dir( // Then we mark this directory the be walked in too if let Some(ref mut to_walk) = maybe_to_walk { - to_walk.push_back((entry.path(), accept_by_children_dir)); + to_walk.push_back(ToWalkEntry { + path: entry.path(), + parent_dir_accepted_by_its_children: accept_by_children_dir, + }); } } let mut accept_by_glob = false; if let Some(accept_rules) = rules_per_kind.get(&RuleKind::AcceptFilesByGlob) { for accept_rule in accept_rules { - // It's ok to unwrap here, accept rules are infallible - if accept_rule.apply(¤t_path).await.unwrap() { + if accept_rule + .apply(¤t_path) + .await + .expect("accept rules by glob must be infallible") + { trace!( "Path {} accepted by rule {}", current_path.display(), @@ -281,21 +539,24 @@ async fn inner_walk_single_dir( accept_by_glob = true; } - if accept_by_glob && (accept_by_children_dir.is_none() || accept_by_children_dir.unwrap()) { - indexed_paths.insert( - current_path.clone(), - WalkEntry { - path: current_path.clone(), - is_dir, - metadata: FilePathMetadata { - inode, - device, - size_in_bytes: metadata.len(), - created_at: metadata.created_or_now().into(), - modified_at: metadata.modified_or_now().into(), - }, - }, - ); + if accept_by_glob + && (accept_by_children_dir.is_none() || accept_by_children_dir.expect("<-- checked")) + { + let Ok(iso_file_path) = iso_file_path_factory(¤t_path, is_dir) + .map_err(|e| errors.push(e)) + else { + continue 'entries; + }; + paths_buffer.push(WalkingEntry { + iso_file_path, + maybe_metadata: Some(FilePathMetadata { + inode, + device, + size_in_bytes: metadata.len(), + created_at: metadata.created_or_now().into(), + modified_at: metadata.modified_or_now().into(), + }), + }); // If the ancestors directories wasn't indexed before, now we do for ancestor in current_path @@ -303,35 +564,50 @@ async fn inner_walk_single_dir( .skip(1) // Skip the current directory as it was already indexed .take_while(|&ancestor| ancestor != root) { + let Ok(iso_file_path) = iso_file_path_factory(ancestor, true) + .map_err(|e| errors.push(e)) + else { + // Checking the next ancestor, as this one we got an error + continue; + }; + + let mut ancestor_iso_walking_entry = WalkingEntry { + iso_file_path, + maybe_metadata: None, + }; trace!("Indexing ancestor {}", ancestor.display()); - if !indexed_paths.contains_key(ancestor) { - let metadata = fs::metadata(ancestor).await?; - let (inode, device) = { + if !indexed_paths.contains(&ancestor_iso_walking_entry) { + let Ok(metadata) = fs::metadata(ancestor) + .await + .map_err(|e| errors.push(FileIOError::from((&ancestor, e)).into())) + else { + // Checking the next ancestor, as this one we got an error + continue; + }; + let Ok((inode, device)) = { #[cfg(target_family = "unix")] { - get_inode_and_device(&metadata)? + get_inode_and_device(&metadata) } #[cfg(target_family = "windows")] { - get_inode_and_device_from_path(ancestor).await? + get_inode_and_device_from_path(ancestor).await } + }.map_err(|e| errors.push(e.into())) else { + // Checking the next ancestor, as this one we got an error + continue; }; - indexed_paths.insert( - ancestor.to_path_buf(), - WalkEntry { - path: ancestor.to_path_buf(), - is_dir: true, - metadata: FilePathMetadata { - inode, - device, - size_in_bytes: metadata.len(), - created_at: metadata.created_or_now().into(), - modified_at: metadata.modified_or_now().into(), - }, - }, - ); + ancestor_iso_walking_entry.maybe_metadata = Some(FilePathMetadata { + inode, + device, + size_in_bytes: metadata.len(), + created_at: metadata.created_or_now().into(), + modified_at: metadata.modified_or_now().into(), + }); + + paths_buffer.push(ancestor_iso_walking_entry); } else { // If indexed_paths contains the current ancestors, then it will contain // also all if its ancestors too, so we can stop here @@ -341,71 +617,30 @@ async fn inner_walk_single_dir( } } - Ok(()) -} - -async fn prepared_indexed_paths( - root: PathBuf, - indexed_paths: HashMap, - include_root: bool, -) -> Result, IndexerError> { - let mut indexed_paths = indexed_paths.into_values().collect::>(); - - if include_root { - // Also adding the root location path - let metadata = fs::metadata(&root).await?; - let (inode, device) = { - #[cfg(target_family = "unix")] - { - get_inode_and_device(&metadata)? - } - - #[cfg(target_family = "windows")] - { - get_inode_and_device_from_path(&root).await? - } - }; - indexed_paths.push(WalkEntry { - path: root, - is_dir: true, - metadata: FilePathMetadata { - inode, - device, - size_in_bytes: metadata.len(), - created_at: metadata.created_or_now().into(), - modified_at: metadata.modified_or_now().into(), - }, - }); - } - - // Sorting so we can give each path a crescent id given the filesystem hierarchy - indexed_paths.sort(); - - Ok(indexed_paths) -} - -pub(super) async fn walk_single_dir( - root: impl AsRef, - rules_per_kind: &HashMap>, - mut update_notifier: impl FnMut(&Path, usize) + '_, -) -> Result, IndexerError> { - let root = root.as_ref().to_path_buf(); - - let mut read_dir = fs::read_dir(&root).await?; - let mut indexed_paths = HashMap::new(); - - inner_walk_single_dir( - &root, - (root.clone(), None), - &mut read_dir, - rules_per_kind, - &mut update_notifier, - &mut indexed_paths, - None, + // We continue the function even if we fail to fetch `file_path`s to remove, + // the DB will have old `file_path`s but at least this is better than + // don't adding the newly indexed paths + let to_remove = to_remove_db_fetcher( + iso_file_path_to_walk, + vec![operator::or( + paths_buffer + .iter() + .map(|entry| &entry.iso_file_path) + .map(Into::into) + .collect(), + )], ) - .await?; + .await + .unwrap_or_else(|e| { + errors.push(e); + vec![] + }); - prepared_indexed_paths(root, indexed_paths, false).await + // Just merging the `found_paths` with `indexed_paths` here in the end to avoid possibly + // multiple rehashes during function execution + indexed_paths.extend(paths_buffer.drain(..)); + + to_remove } #[cfg(test)] @@ -413,12 +648,25 @@ mod tests { use super::super::rules::ParametersPerKind; use super::*; use chrono::Utc; - use globset::Glob; - use std::collections::BTreeSet; + use globset::{Glob, GlobSetBuilder}; use tempfile::{tempdir, TempDir}; use tokio::fs; use tracing_test::traced_test; + impl PartialEq for WalkedEntry { + fn eq(&self, other: &Self) -> bool { + self.iso_file_path == other.iso_file_path + } + } + + impl Eq for WalkedEntry {} + + impl Hash for WalkedEntry { + fn hash(&self, state: &mut H) { + self.iso_file_path.hash(state); + } + } + async fn prepare_location() -> TempDir { let root = tempdir().unwrap(); let root_path = root.path(); @@ -489,40 +737,52 @@ mod tests { modified_at: Utc::now(), }; + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + #[rustfmt::skip] let expected = [ - WalkEntry { path: root_path.to_path_buf(), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/Cargo.toml"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("rust_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/src/main.rs"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("rust_project/target"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/target/debug"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/target/debug/main"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/package.json"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src/App.tsx"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules/react"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules/react/package.json"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("photos/photo1.png"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos/photo2.jpg"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos/photo3.jpeg"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos/text.txt"), is_dir: false, metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/text.txt"), false), metadata }, ] .into_iter() - .collect::>(); + .collect::>(); - let actual = walk(root_path.to_path_buf(), &HashMap::new(), |_, _| {}, true) - .await - .unwrap() - .into_iter() - .collect::>(); + let actual = walk( + root_path.to_path_buf(), + &HashMap::new(), + |_, _| {}, + |_| async { Ok(vec![]) }, + |_, _| async { Ok(vec![]) }, + |path, is_dir| { + IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) + }, + 420, + ) + .await + .unwrap() + .walked + .collect::>(); assert_eq!(actual, expected); } @@ -541,16 +801,18 @@ mod tests { modified_at: Utc::now(), }; + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + #[rustfmt::skip] let expected = [ - WalkEntry { path: root_path.to_path_buf(), is_dir: true, metadata }, - WalkEntry { path: root_path.join("photos"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("photos/photo1.png"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos/photo2.jpg"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("photos/photo3.jpeg"), is_dir: false, metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, ] .into_iter() - .collect::>(); + .collect::>(); let only_photos_rule = [( RuleKind::AcceptFilesByGlob, @@ -558,19 +820,33 @@ mod tests { RuleKind::AcceptFilesByGlob, "only photos".to_string(), false, - ParametersPerKind::AcceptFilesByGlob(vec![ - Glob::new("{*.png,*.jpg,*.jpeg}").unwrap() - ]), + ParametersPerKind::AcceptFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()) + .build() + .unwrap(), + ), )], )] .into_iter() .collect::>(); - let actual = walk(root_path.to_path_buf(), &only_photos_rule, |_, _| {}, true) - .await - .unwrap() - .into_iter() - .collect::>(); + let actual = walk( + root_path.to_path_buf(), + &only_photos_rule, + |_, _| {}, + |_| async { Ok(vec![]) }, + |_, _| async { Ok(vec![]) }, + |path, is_dir| { + IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) + }, + 420, + ) + .await + .unwrap() + .walked + .collect::>(); assert_eq!(actual, expected); } @@ -589,29 +865,31 @@ mod tests { modified_at: Utc::now(), }; + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + #[rustfmt::skip] let expected = [ - WalkEntry { path: root_path.to_path_buf(), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/Cargo.toml"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("rust_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/src/main.rs"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("rust_project/target"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/target/debug"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/target/debug/main"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/package.json"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src/App.tsx"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules/react"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/node_modules/react/package.json"), is_dir: false, metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, ] .into_iter() - .collect::>(); + .collect::>(); let git_repos = [( RuleKind::AcceptIfChildrenDirectoriesArePresent, @@ -627,11 +905,21 @@ mod tests { .into_iter() .collect::>(); - let actual = walk(root_path.to_path_buf(), &git_repos, |_, _| {}, true) - .await - .unwrap() - .into_iter() - .collect::>(); + let actual = walk( + root_path.to_path_buf(), + &git_repos, + |_, _| {}, + |_| async { Ok(vec![]) }, + |_, _| async { Ok(vec![]) }, + |path, is_dir| { + IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) + }, + 420, + ) + .await + .unwrap() + .walked + .collect::>(); assert_eq!(actual, expected); } @@ -650,23 +938,25 @@ mod tests { modified_at: Utc::now(), }; + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + #[rustfmt::skip] let expected = [ - WalkEntry { path: root_path.to_path_buf(), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/Cargo.toml"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("rust_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("rust_project/src/main.rs"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/.git"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/package.json"), is_dir: false, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src"), is_dir: true, metadata }, - WalkEntry { path: root_path.join("inner/node_project/src/App.tsx"), is_dir: false, metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, ] .into_iter() - .collect::>(); + .collect::>(); let git_repos_no_deps_no_build_dirs = [ ( @@ -687,19 +977,25 @@ mod tests { RuleKind::RejectFilesByGlob, "reject node_modules".to_string(), false, - ParametersPerKind::RejectFilesByGlob(vec![Glob::new( - "{**/node_modules/*,**/node_modules}", - ) - .unwrap()]), + ParametersPerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{**/node_modules/*,**/node_modules}").unwrap()) + .build() + .unwrap(), + ), ), IndexerRule::new( RuleKind::RejectFilesByGlob, "reject rust build dir".to_string(), false, - ParametersPerKind::RejectFilesByGlob(vec![Glob::new( - "{**/target/*,**/target}", - ) - .unwrap()]), + ParametersPerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{**/target/*,**/target}").unwrap()) + .build() + .unwrap(), + ), ), ], ), @@ -711,12 +1007,17 @@ mod tests { root_path.to_path_buf(), &git_repos_no_deps_no_build_dirs, |_, _| {}, - true, + |_| async { Ok(vec![]) }, + |_, _| async { Ok(vec![]) }, + |path, is_dir| { + IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) + }, + 420, ) .await .unwrap() - .into_iter() - .collect::>(); + .walked + .collect::>(); assert_eq!(actual, expected); } diff --git a/core/src/location/manager/mod.rs b/core/src/location/manager/mod.rs index ae1e9c306..565efc663 100644 --- a/core/src/location/manager/mod.rs +++ b/core/src/location/manager/mod.rs @@ -1,4 +1,4 @@ -use crate::{job::JobManagerError, library::Library}; +use crate::{job::JobManagerError, library::Library, util::error::FileIOError}; use std::{ collections::BTreeSet, @@ -8,12 +8,9 @@ use std::{ use futures::executor::block_on; use thiserror::Error; -use tokio::{ - io, - sync::{ - broadcast::{self, Receiver}, - oneshot, RwLock, - }, +use tokio::sync::{ + broadcast::{self, Receiver}, + oneshot, RwLock, }; use tracing::{debug, error}; @@ -92,18 +89,22 @@ pub enum LocationManagerError { #[error("Non local location: ")] NonLocalLocation(LocationId), + #[error("failed to move file '{}' for reason: {reason}", .path.display())] + MoveError { path: Box, reason: String }, + #[error("Tried to update a non-existing file: ")] UpdateNonExistingFile(PathBuf), #[error("Database error: {0}")] DatabaseError(#[from] prisma_client_rust::QueryError), - #[error("I/O error: {0}")] - IOError(#[from] io::Error), #[error("File path related error (error: {0})")] FilePathError(#[from] FilePathError), #[error("Corrupted location pub_id on database: (error: {0})")] CorruptedLocationPubId(#[from] uuid::Error), #[error("Job Manager error: (error: {0})")] JobManager(#[from] JobManagerError), + + #[error(transparent)] + FileIO(#[from] FileIOError), } type OnlineLocations = BTreeSet>; diff --git a/core/src/location/manager/watcher/linux.rs b/core/src/location/manager/watcher/linux.rs index c1eb2dcc8..2f4b4d43f 100644 --- a/core/src/location/manager/watcher/linux.rs +++ b/core/src/location/manager/watcher/linux.rs @@ -6,7 +6,10 @@ //! Aside from that, when a directory is moved to our watched location from the outside, we receive //! a Create Dir event, this one is actually ok at least. -use crate::{invalidate_query, library::Library, location::manager::LocationManagerError}; +use crate::{ + invalidate_query, library::Library, location::manager::LocationManagerError, + util::error::FileIOError, +}; use std::{ collections::{BTreeMap, HashMap}, @@ -67,7 +70,9 @@ impl<'lib> EventHandler<'lib> for LinuxEventHandler<'lib> { create_dir( self.location_id, path, - &fs::metadata(path).await?, + &fs::metadata(path) + .await + .map_err(|e| FileIOError::from((path, e)))?, self.library, ) .await?; diff --git a/core/src/location/manager/watcher/macos.rs b/core/src/location/manager/watcher/macos.rs index 409e46d8b..40c3c8c9a 100644 --- a/core/src/location/manager/watcher/macos.rs +++ b/core/src/location/manager/watcher/macos.rs @@ -13,10 +13,11 @@ use crate::{ invalidate_query, library::Library, location::{ - file_path_helper::{check_existing_file_path, get_inode_and_device, MaterializedPath}, + file_path_helper::{check_existing_file_path, get_inode_and_device, IsolatedFilePathData}, manager::LocationManagerError, LocationId, }, + util::error::FileIOError, }; use std::{ @@ -81,8 +82,9 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> { match kind { EventKind::Create(CreateKind::Folder) => { - if let Some(latest_created_dir) = self.latest_created_dir.take() { - if paths[0] == latest_created_dir { + let path = &paths[0]; + if let Some(ref latest_created_dir) = self.latest_created_dir.take() { + if path == latest_created_dir { // NOTE: This is a MacOS specific event that happens when a folder is created // trough Finder. It creates a folder but 2 events are triggered in // FSEvents. So we store and check the latest created folder to avoid @@ -93,18 +95,23 @@ impl<'lib> EventHandler<'lib> for MacOsEventHandler<'lib> { create_dir( self.location_id, - &paths[0], - &fs::metadata(&paths[0]).await?, + path, + &fs::metadata(path) + .await + .map_err(|e| FileIOError::from((path, e)))?, self.library, ) .await?; self.latest_created_dir = Some(paths.remove(0)); } EventKind::Create(CreateKind::File) => { + let path = &paths[0]; create_file( self.location_id, - &paths[0], - &fs::metadata(&paths[0]).await?, + path, + &fs::metadata(path) + .await + .map_err(|e| FileIOError::from((path, e)))?, self.library, ) .await?; @@ -209,7 +216,12 @@ impl MacOsEventHandler<'_> { let location_path = extract_location_path(self.location_id, self.library).await?; if !check_existing_file_path( - &MaterializedPath::new(self.location_id, &location_path, &path, meta.is_dir())?, + &IsolatedFilePathData::new( + self.location_id, + &location_path, + &path, + meta.is_dir(), + )?, &self.library.db, ) .await? @@ -261,7 +273,7 @@ impl MacOsEventHandler<'_> { .insert(inode_and_device, (Instant::now(), path)); } } - Err(e) => return Err(e.into()), + Err(e) => return Err(FileIOError::from((path, e)).into()), } Ok(()) diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 048a4cbd2..064120c47 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -4,10 +4,11 @@ use crate::{ location::{ delete_directory, file_path_helper::{ - create_file_path, extract_materialized_path, file_path_with_object, - filter_existing_file_path_params, get_parent_dir, get_parent_dir_id, + check_existing_file_path, create_file_path, file_path_with_object, + filter_existing_file_path_params, get_parent_dir, + isolated_file_path_data::extract_normalized_materialized_path_str, loose_find_existing_file_path_params, FilePathError, FilePathMetadata, - MaterializedPath, MetadataExt, + IsolatedFilePathData, MetadataExt, }, find_location, location_with_indexer_rules, manager::LocationManagerError, @@ -21,7 +22,7 @@ use crate::{ }, prisma::{file_path, location, object}, sync, - util::db::uuid_to_bytes, + util::error::FileIOError, }; #[cfg(target_family = "unix")] @@ -33,7 +34,7 @@ use crate::location::file_path_helper::get_inode_and_device_from_path; use std::{ collections::HashSet, fs::Metadata, - path::{Path, PathBuf, MAIN_SEPARATOR, MAIN_SEPARATOR_STR}, + path::{Path, PathBuf}, str::FromStr, }; @@ -80,7 +81,7 @@ pub(super) async fn create_dir( path.display() ); - let materialized_path = MaterializedPath::new(location.id, &location.path, path, true)?; + let materialized_path = IsolatedFilePathData::new(location.id, &location.path, path, true)?; let (inode, device) = { #[cfg(target_family = "unix")] @@ -100,15 +101,14 @@ pub(super) async fn create_dir( trace!("parent_directory: {:?}", parent_directory); - let Some(parent_directory) = parent_directory else { + if parent_directory.is_none() { warn!("Watcher found a directory without parent"); - return Ok(()) + return Ok(()); }; let created_path = create_file_path( library, materialized_path, - Some(Uuid::from_slice(&parent_directory.pub_id).unwrap()), None, FilePathMetadata { inode, @@ -147,7 +147,7 @@ pub(super) async fn create_file( let db = &library.db; - let materialized_path = MaterializedPath::new(location_id, &location_path, path, false)?; + let iso_file_path = IsolatedFilePathData::new(location_id, &location_path, path, false)?; let (inode, device) = { #[cfg(target_family = "unix")] @@ -163,24 +163,23 @@ pub(super) async fn create_file( } }; - let Some(parent_directory) = - get_parent_dir(&materialized_path, db).await? - else { + if get_parent_dir(&iso_file_path, db).await?.is_none() { warn!("Watcher found a file without parent"); - return Ok(()) - }; + return Ok(()); + }; // generate provisional object let FileMetadata { cas_id, kind, fs_metadata, - } = FileMetadata::new(&location_path, &materialized_path).await?; + } = FileMetadata::new(&location_path, &iso_file_path) + .await + .map_err(|e| FileIOError::from((location_path.join(&iso_file_path), e)))?; let created_file = create_file_path( library, - materialized_path, - Some(Uuid::from_slice(&parent_directory.pub_id).unwrap()), + iso_file_path, Some(cas_id.clone()), FilePathMetadata { inode, @@ -249,7 +248,11 @@ pub(super) async fn create_dir_or_file( path: impl AsRef, library: &Library, ) -> Result { - let metadata = fs::metadata(path.as_ref()).await?; + let path = path.as_ref(); + let metadata = fs::metadata(path) + .await + .map_err(|e| FileIOError::from((path, e)))?; + if metadata.is_dir() { create_dir(location_id, path, &metadata, library).await } else { @@ -269,12 +272,9 @@ pub(super) async fn file_creation_or_update( if let Some(ref file_path) = library .db .file_path() - .find_first(filter_existing_file_path_params(&MaterializedPath::new( - location_id, - &location_path, - full_path, - false, - )?)) + .find_first(filter_existing_file_path_params( + &IsolatedFilePathData::new(location_id, &location_path, full_path, false)?, + )) // include object for orphan check .include(file_path_with_object::include()) .exec() @@ -285,7 +285,9 @@ pub(super) async fn file_creation_or_update( create_file( location_id, full_path, - &fs::metadata(full_path).await?, + &fs::metadata(full_path) + .await + .map_err(|e| FileIOError::from((full_path, e)))?, library, ) .await @@ -303,12 +305,9 @@ pub(super) async fn update_file( if let Some(ref file_path) = library .db .file_path() - .find_first(filter_existing_file_path_params(&MaterializedPath::new( - location_id, - &location_path, - full_path, - false, - )?)) + .find_first(filter_existing_file_path_params( + &IsolatedFilePathData::new(location_id, &location_path, full_path, false)?, + )) // include object for orphan check .include(file_path_with_object::include()) .exec() @@ -318,6 +317,7 @@ pub(super) async fn update_file( invalidate_query!(library, "search.paths"); ret } else { + // FIXME(fogodev): Have to handle files excluded by indexer rules Err(LocationManagerError::UpdateNonExistingFile( full_path.to_path_buf(), )) @@ -346,15 +346,15 @@ async fn inner_update_file( full_path.display() ); + let iso_file_path = IsolatedFilePathData::from(file_path); + let FileMetadata { cas_id, fs_metadata, kind, - } = FileMetadata::new( - &location_path, - &MaterializedPath::from((location_id, &file_path.materialized_path)), - ) - .await?; + } = FileMetadata::new(&location_path, &iso_file_path) + .await + .map_err(|e| FileIOError::from((location_path.join(&iso_file_path), e)))?; if let Some(old_cas_id) = &file_path.cas_id { if old_cas_id != &cas_id { @@ -379,7 +379,11 @@ async fn inner_update_file( // TODO: Should this be a skip rather than a null-set? let checksum = if file_path.integrity_checksum.is_some() { // If a checksum was already computed, we need to recompute it - Some(file_checksum(full_path).await?) + Some( + file_checksum(full_path) + .await + .map_err(|e| FileIOError::from((full_path, e)))?, + ) } else { None }; @@ -424,7 +428,10 @@ async fn inner_update_file( generate_thumbnail(&file_path.extension, &cas_id, full_path, library).await; // remove the old thumbnail as we're generating a new one - fs::remove_file(get_thumbnail_path(library, old_cas_id)).await?; + let thumb_path = get_thumbnail_path(library, old_cas_id); + fs::remove_file(&thumb_path) + .await + .map_err(|e| FileIOError::from((thumb_path, e)))?; } let int_kind = kind as i32; @@ -462,68 +469,46 @@ pub(super) async fn rename( library: &Library, ) -> Result<(), LocationManagerError> { let location_path = extract_location_path(location_id, library).await?; + let old_path = old_path.as_ref(); + let new_path = new_path.as_ref(); + let Library { db, .. } = library; - let old_path_materialized = - extract_materialized_path(location_id, &location_path, old_path.as_ref())?; - let mut old_path_materialized_str = format!( - "{MAIN_SEPARATOR_STR}{}", - old_path_materialized - .to_str() - .expect("Found non-UTF-8 path") - ); + let old_path_materialized_str = + extract_normalized_materialized_path_str(location_id, &location_path, old_path)?; - let new_path_materialized = - extract_materialized_path(location_id, &location_path, new_path.as_ref())?; - let mut new_path_materialized_str = format!( - "{MAIN_SEPARATOR_STR}{}", - new_path_materialized - .to_str() - .expect("Found non-UTF-8 path") - ); - - let old_materialized_path_parent = old_path_materialized - .parent() - .unwrap_or_else(|| Path::new(MAIN_SEPARATOR_STR)); - let new_materialized_path_parent = new_path_materialized - .parent() - .unwrap_or_else(|| Path::new(MAIN_SEPARATOR_STR)); + let new_path_materialized_str = + extract_normalized_materialized_path_str(location_id, &location_path, new_path)?; // Renaming a file could potentially be a move to another directory, so we check if our parent changed - let changed_parent_id = if old_materialized_path_parent != new_materialized_path_parent { - Some( - get_parent_dir_id( - &MaterializedPath::new( - location_id, - &location_path, - new_path, - true, - )?, - &library.db, - ) - .await? - .expect("CRITICAL ERROR: If we're puting a file in a directory inside our location, then this directory must exist"), + if old_path_materialized_str != new_path_materialized_str + && !check_existing_file_path( + &IsolatedFilePathData::new(location_id, &location_path, new_path, true)?.parent(), + db, ) - } else { - None - }; + .await? + { + return Err(LocationManagerError::MoveError { + path: new_path.into(), + reason: "parent directory does not exist".into(), + }); + } - if let Some(file_path) = library - .db + if let Some(file_path) = db .file_path() .find_first(loose_find_existing_file_path_params( - &MaterializedPath::new(location_id, &location_path, old_path, true)?, + &IsolatedFilePathData::new(location_id, &location_path, old_path, true)?, )) .exec() .await? { + let new = + IsolatedFilePathData::new(location_id, &location_path, new_path, file_path.is_dir)?; + // If the renamed path is a directory, we have to update every successor if file_path.is_dir { - if !old_path_materialized_str.ends_with(MAIN_SEPARATOR) { - old_path_materialized_str += MAIN_SEPARATOR_STR; - } - if !new_path_materialized_str.ends_with(MAIN_SEPARATOR) { - new_path_materialized_str += MAIN_SEPARATOR_STR; - } + let old = + IsolatedFilePathData::new(location_id, &location_path, old_path, file_path.is_dir)?; + // TODO: Fetch all file_paths that will be updated and dispatch sync events let updated = library .db @@ -531,8 +516,8 @@ pub(super) async fn rename( "UPDATE file_path \ SET materialized_path = REPLACE(materialized_path, {}, {}) \ WHERE location_id = {}", - PrismaValue::String(old_path_materialized_str.clone()), - PrismaValue::String(new_path_materialized_str.clone()), + PrismaValue::String(format!("{}/{}/", old.materialized_path, old.name)), + PrismaValue::String(format!("{}/{}/", new.materialized_path, new.name)), PrismaValue::Int(location_id as i64) )) .exec() @@ -540,38 +525,17 @@ pub(super) async fn rename( trace!("Updated {updated} file_paths"); } - let mut update_params = vec![ - file_path::materialized_path::set(new_path_materialized_str), - file_path::name::set( - new_path_materialized - .file_stem() - .unwrap() - .to_str() - .expect("Found non-UTF-8 path") - .to_string(), - ), - file_path::extension::set( - new_path_materialized - .extension() - .map(|s| { - s.to_str() - .expect("Found non-UTF-8 extension in path") - .to_string() - }) - .unwrap_or_default(), - ), - ]; - - if changed_parent_id.is_some() { - update_params.push(file_path::parent_id::set( - changed_parent_id.map(uuid_to_bytes), - )); - } - library .db .file_path() - .update(file_path::pub_id::equals(file_path.pub_id), update_params) + .update( + file_path::pub_id::equals(file_path.pub_id), + vec![ + file_path::materialized_path::set(new_path_materialized_str), + file_path::name::set(new.name.to_string()), + file_path::extension::set(new.extension.to_string()), + ], + ) .exec() .await?; @@ -593,7 +557,7 @@ pub(super) async fn remove( let Some(file_path) = library.db .file_path() .find_first(loose_find_existing_file_path_params( - &MaterializedPath::new(location_id, &location_path, full_path, true)?, + &IsolatedFilePathData::new(location_id, &location_path, full_path, true)?, )) .exec() .await? else { @@ -610,7 +574,7 @@ pub(super) async fn remove_by_file_path( library: &Library, ) -> Result<(), LocationManagerError> { // check file still exists on disk - match fs::metadata(path).await { + match fs::metadata(path.as_ref()).await { Ok(_) => { todo!("file has changed in some way, re-identify it") } @@ -645,7 +609,7 @@ pub(super) async fn remove_by_file_path( library.orphan_remover.invoke().await; } - Err(e) => return Err(e.into()), + Err(e) => return Err(FileIOError::from((path, e)).into()), } invalidate_query!(library, "search.paths"); @@ -716,7 +680,7 @@ pub(super) async fn extract_inode_and_device_from_path( .db .file_path() .find_first(loose_find_existing_file_path_params( - &MaterializedPath::new(location_id, &location.path, path, true)?, + &IsolatedFilePathData::new(location_id, &location.path, path, true)?, )) .select(file_path::select!( {inode device} )) .exec() @@ -727,7 +691,7 @@ pub(super) async fn extract_inode_and_device_from_path( u64::from_le_bytes(file_path.device[0..8].try_into().unwrap()), ) }) - .ok_or_else(|| FilePathError::NotFound(path.to_path_buf()).into()) + .ok_or_else(|| FilePathError::NotFound(path.into()).into()) } pub(super) async fn extract_location_path( diff --git a/core/src/location/manager/watcher/windows.rs b/core/src/location/manager/watcher/windows.rs index 705de4b92..e040de308 100644 --- a/core/src/location/manager/watcher/windows.rs +++ b/core/src/location/manager/watcher/windows.rs @@ -13,6 +13,7 @@ use crate::{ location::{ file_path_helper::get_inode_and_device_from_path, manager::LocationManagerError, LocationId, }, + util::error::FileIOError, }; use std::{ @@ -100,11 +101,14 @@ impl<'lib> EventHandler<'lib> for WindowsEventHandler<'lib> { } } EventKind::Modify(ModifyKind::Any) => { + let path = &paths[0]; // Windows emite events of update right after create events - if !self.recently_created_files.contains_key(&paths[0]) { - let metadata = fs::metadata(&paths[0]).await?; + if !self.recently_created_files.contains_key(path) { + let metadata = fs::metadata(path) + .await + .map_err(|e| FileIOError::from((path, e)))?; if metadata.is_file() { - update_file(self.location_id, &paths[0], self.library).await?; + update_file(self.location_id, path, self.library).await?; } } } diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index a34a7b58a..2ce20c91a 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -13,7 +13,7 @@ use crate::{ }, prisma::{file_path, indexer_rules_in_location, location, node, object, PrismaClient}, sync, - util::db::uuid_to_bytes, + util::{db::uuid_to_bytes, error::FileIOError}, }; use std::{ @@ -71,7 +71,7 @@ impl LocationCreateArgs { } Err(e) => { return Err(LocationError::LocationPathFilesystemMetadataAccess( - e, self.path, + FileIOError::from((self.path, e)), )); } }; @@ -378,7 +378,6 @@ pub async fn scan_location( .queue_next(ThumbnailerJobInit { location: location_base_data, sub_path: None, - background: true, }), ) .await @@ -413,7 +412,6 @@ pub async fn scan_location_sub_path( .queue_next(ThumbnailerJobInit { location: location_base_data, sub_path: Some(sub_path), - background: true, }), ) .await @@ -775,24 +773,3 @@ async fn check_nested_location( Ok(parents_count > 0 || children_count > 0) } - -// check if a path exists in our database at that location -// pub async fn check_virtual_path_exists( -// library: &Library, -// location_id: i32, -// subpath: impl AsRef, -// ) -> Result { -// let path = subpath.as_ref().to_str().unwrap().to_string(); - -// let file_path = library -// .db -// .file_path() -// .find_first(vec![ -// file_path::location_id::equals(location_id), -// file_path::materialized_path::equals(path), -// ]) -// .exec() -// .await?; - -// Ok(file_path.is_some()) -// } diff --git a/core/src/object/file_identifier/file_identifier_job.rs b/core/src/object/file_identifier/file_identifier_job.rs index 629ee0f55..da95edc1e 100644 --- a/core/src/object/file_identifier/file_identifier_job.rs +++ b/core/src/object/file_identifier/file_identifier_job.rs @@ -4,8 +4,8 @@ use crate::{ }, library::Library, location::file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_file_identifier, MaterializedPath, + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_file_identifier, IsolatedFilePathData, }, prisma::{file_path, location, PrismaClient}, util::db::chain_optional_iter, @@ -51,7 +51,7 @@ impl Hash for FileIdentifierJobInit { pub struct FileIdentifierJobState { cursor: i32, report: FileIdentifierReport, - maybe_sub_materialized_path: Option>, + maybe_sub_iso_file_path: Option>, } impl JobInitData for FileIdentifierJobInit { @@ -78,7 +78,7 @@ impl StatefulJob for FileIdentifierJob { let location_id = state.init.location.id; let location_path = Path::new(&state.init.location.path); - let maybe_sub_materialized_path = if let Some(ref sub_path) = state.init.sub_path { + let maybe_sub_iso_file_path = if let Some(ref sub_path) = state.init.sub_path { let full_path = ensure_sub_path_is_in_location(location_path, sub_path) .await .map_err(FileIdentifierJobError::from)?; @@ -86,16 +86,25 @@ impl StatefulJob for FileIdentifierJob { .await .map_err(FileIdentifierJobError::from)?; - Some( - MaterializedPath::new(location_id, location_path, &full_path, true) - .map_err(FileIdentifierJobError::from)?, + let sub_iso_file_path = + IsolatedFilePathData::new(location_id, location_path, &full_path, true) + .map_err(FileIdentifierJobError::from)?; + + ensure_file_path_exists( + sub_path, + &sub_iso_file_path, + db, + FileIdentifierJobError::SubPathNotFound, ) + .await?; + + Some(sub_iso_file_path) } else { None }; let orphan_count = - count_orphan_file_paths(db, location_id, &maybe_sub_materialized_path).await?; + count_orphan_file_paths(db, location_id, &maybe_sub_iso_file_path).await?; // Initializing `state.data` here because we need a complete state in case of early finish state.data = Some(FileIdentifierJobState { @@ -105,10 +114,13 @@ impl StatefulJob for FileIdentifierJob { ..Default::default() }, cursor: 0, - maybe_sub_materialized_path, + maybe_sub_iso_file_path, }); - let data = state.data.as_mut().unwrap(); // SAFETY: We just initialized it + let data = state + .data + .as_mut() + .expect("critical error: missing data on job state"); if orphan_count == 0 { return Err(JobError::EarlyFinish { @@ -133,7 +145,7 @@ impl StatefulJob for FileIdentifierJob { .find_first(orphan_path_filters( location_id, None, - &data.maybe_sub_materialized_path, + &data.maybe_sub_iso_file_path, )) .select(file_path::select!({ id })) .exec() @@ -142,7 +154,7 @@ impl StatefulJob for FileIdentifierJob { data.cursor = first_path.id; - state.steps = (0..task_count).map(|_| ()).collect(); + state.steps.extend((0..task_count).map(|_| ())); Ok(()) } @@ -155,20 +167,21 @@ impl StatefulJob for FileIdentifierJob { let FileIdentifierJobState { ref mut cursor, ref mut report, - ref maybe_sub_materialized_path, + ref maybe_sub_iso_file_path, } = state .data .as_mut() - .expect("Critical error: missing data on job state"); + .expect("critical error: missing data on job state"); + let step_number = state.step_number; let location = &state.init.location; // get chunk of orphans to process let file_paths = get_orphan_file_paths( &ctx.library.db, - state.init.location.id, + location.id, *cursor, - maybe_sub_materialized_path, + maybe_sub_iso_file_path, ) .await?; @@ -176,7 +189,7 @@ impl StatefulJob for FileIdentifierJob { ::NAME, location, &file_paths, - state.step_number, + step_number, cursor, report, ctx, @@ -199,7 +212,7 @@ impl StatefulJob for FileIdentifierJob { fn orphan_path_filters( location_id: i32, file_path_id: Option, - maybe_sub_materialized_path: &Option>, + maybe_sub_iso_file_path: &Option>, ) -> Vec { chain_optional_iter( [ @@ -210,9 +223,13 @@ fn orphan_path_filters( [ // this is a workaround for the cursor not working properly file_path_id.map(file_path::id::gte), - maybe_sub_materialized_path - .as_ref() - .map(|p| file_path::materialized_path::starts_with(p.into())), + maybe_sub_iso_file_path.as_ref().map(|sub_iso_file_path| { + file_path::materialized_path::starts_with( + sub_iso_file_path + .materialized_path_for_children() + .expect("sub path iso_file_path must be a directory"), + ) + }), ], ) } @@ -220,7 +237,7 @@ fn orphan_path_filters( async fn count_orphan_file_paths( db: &PrismaClient, location_id: i32, - maybe_sub_materialized_path: &Option>, + maybe_sub_materialized_path: &Option>, ) -> Result { db.file_path() .count(orphan_path_filters( @@ -237,7 +254,7 @@ async fn get_orphan_file_paths( db: &PrismaClient, location_id: i32, file_path_id: i32, - maybe_sub_materialized_path: &Option>, + maybe_sub_materialized_path: &Option>, ) -> Result, prisma_client_rust::QueryError> { info!( "Querying {} orphan Paths at cursor: {:?}", diff --git a/core/src/object/file_identifier/mod.rs b/core/src/object/file_identifier/mod.rs index 2e8dd37d4..3b4afb108 100644 --- a/core/src/object/file_identifier/mod.rs +++ b/core/src/object/file_identifier/mod.rs @@ -2,7 +2,9 @@ use crate::{ invalidate_query, job::{JobError, JobReportUpdate, JobResult, WorkerContext}, library::Library, - location::file_path_helper::{file_path_for_file_identifier, FilePathError, MaterializedPath}, + location::file_path_helper::{ + file_path_for_file_identifier, FilePathError, IsolatedFilePathData, + }, object::{cas::generate_cas_id, object_for_file_identifier}, prisma::{file_path, location, object, PrismaClient}, sync, @@ -33,8 +35,14 @@ const CHUNK_SIZE: usize = 100; #[derive(Error, Debug)] pub enum FileIdentifierJobError { - #[error("File path related error (error: {0})")] + #[error("received sub path not in database: ", .0.display())] + SubPathNotFound(Box), + + // Internal Errors + #[error(transparent)] FilePathError(#[from] FilePathError), + #[error("database error")] + Database(#[from] prisma_client_rust::QueryError), } #[derive(Debug, Clone)] @@ -48,9 +56,9 @@ impl FileMetadata { /// Assembles `create_unchecked` params for a given file path pub async fn new( location_path: impl AsRef, - materialized_path: &MaterializedPath<'_>, // TODO: use dedicated CreateUnchecked type + iso_file_path: &IsolatedFilePathData<'_>, // TODO: use dedicated CreateUnchecked type ) -> Result { - let path = location_path.as_ref().join(materialized_path); + let path = location_path.as_ref().join(iso_file_path); let fs_metadata = fs::metadata(&path).await?; @@ -95,7 +103,7 @@ async fn identifier_job_step( // NOTE: `file_path`'s `materialized_path` begins with a `/` character so we remove it to join it with `location.path` FileMetadata::new( &location.path, - &MaterializedPath::from((location.id, &file_path.materialized_path)), + &IsolatedFilePathData::from((location.id, file_path)), ) .await .map(|params| { diff --git a/core/src/object/file_identifier/shallow_file_identifier_job.rs b/core/src/object/file_identifier/shallow_file_identifier_job.rs index a41ea40c4..cc8f5fab8 100644 --- a/core/src/object/file_identifier/shallow_file_identifier_job.rs +++ b/core/src/object/file_identifier/shallow_file_identifier_job.rs @@ -4,11 +4,11 @@ use crate::{ }, library::Library, location::file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_file_identifier, get_existing_file_path_id, MaterializedPath, + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_file_identifier, IsolatedFilePathData, }, prisma::{file_path, location, PrismaClient}, - util::db::{chain_optional_iter, uuid_to_bytes}, + util::db::chain_optional_iter, }; use std::{ @@ -19,7 +19,6 @@ use std::{ use prisma_client_rust::Direction; use serde::{Deserialize, Serialize}; use tracing::info; -use uuid::Uuid; use super::{ finalize_file_identifier, process_identifier_file_paths, FileIdentifierJobError, @@ -49,7 +48,7 @@ impl Hash for ShallowFileIdentifierJobInit { pub struct ShallowFileIdentifierJobState { cursor: i32, report: FileIdentifierReport, - sub_path_id: Uuid, + sub_iso_file_path: IsolatedFilePathData<'static>, } impl JobInitData for ShallowFileIdentifierJobInit { @@ -77,7 +76,7 @@ impl StatefulJob for ShallowFileIdentifierJob { let location_id = state.init.location.id; let location_path = Path::new(&state.init.location.path); - let sub_path_id = if state.init.sub_path != Path::new("") { + let sub_iso_file_path = if state.init.sub_path != Path::new("") { let full_path = ensure_sub_path_is_in_location(location_path, &state.init.sub_path) .await .map_err(FileIdentifierJobError::from)?; @@ -85,26 +84,25 @@ impl StatefulJob for ShallowFileIdentifierJob { .await .map_err(FileIdentifierJobError::from)?; - get_existing_file_path_id( - &MaterializedPath::new(location_id, location_path, &full_path, true) - .map_err(FileIdentifierJobError::from)?, + let sub_iso_file_path = + IsolatedFilePathData::new(location_id, location_path, &full_path, true) + .map_err(FileIdentifierJobError::from)?; + + ensure_file_path_exists( + &state.init.sub_path, + &sub_iso_file_path, db, + FileIdentifierJobError::SubPathNotFound, ) - .await - .map_err(FileIdentifierJobError::from)? - .expect("Sub path should already exist in the database") + .await?; + + sub_iso_file_path } else { - get_existing_file_path_id( - &MaterializedPath::new(location_id, location_path, location_path, true) - .map_err(FileIdentifierJobError::from)?, - db, - ) - .await - .map_err(FileIdentifierJobError::from)? - .expect("Location root path should already exist in the database") + IsolatedFilePathData::new(location_id, location_path, location_path, true) + .map_err(FileIdentifierJobError::from)? }; - let orphan_count = count_orphan_file_paths(db, location_id, sub_path_id).await?; + let orphan_count = count_orphan_file_paths(db, location_id, &sub_iso_file_path).await?; // Initializing `state.data` here because we need a complete state in case of early finish state.data = Some(ShallowFileIdentifierJobState { @@ -114,7 +112,7 @@ impl StatefulJob for ShallowFileIdentifierJob { ..Default::default() }, cursor: 0, - sub_path_id, + sub_iso_file_path, }); if orphan_count == 0 { @@ -135,19 +133,27 @@ impl StatefulJob for ShallowFileIdentifierJob { // update job with total task count based on orphan file_paths count ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]); + let mut data = state + .data + .as_mut() + .expect("critical error: missing data on job state"); + let first_path = db .file_path() - .find_first(orphan_path_filters(location_id, None, sub_path_id)) + .find_first(orphan_path_filters( + location_id, + None, + &data.sub_iso_file_path, + )) // .order_by(file_path::id::order(Direction::Asc)) .select(file_path::select!({ id })) .exec() .await? .unwrap(); // SAFETY: We already validated before that there are orphans `file_path`s - // SAFETY: We just initialized `state.data` above - state.data.as_mut().unwrap().cursor = first_path.id; + data.cursor = first_path.id; - state.steps = (0..task_count).map(|_| ()).collect(); + state.steps.extend((0..task_count).map(|_| ())); Ok(()) } @@ -160,22 +166,17 @@ impl StatefulJob for ShallowFileIdentifierJob { let ShallowFileIdentifierJobState { ref mut cursor, ref mut report, - ref sub_path_id, + ref sub_iso_file_path, } = state .data .as_mut() - .expect("Critical error: missing data on job state"); + .expect("critical error: missing data on job state"); let location = &state.init.location; // get chunk of orphans to process - let file_paths = get_orphan_file_paths( - &ctx.library.db, - state.init.location.id, - *cursor, - *sub_path_id, - ) - .await?; + let file_paths = + get_orphan_file_paths(&ctx.library.db, location.id, *cursor, sub_iso_file_path).await?; process_identifier_file_paths( ::NAME, @@ -204,14 +205,18 @@ impl StatefulJob for ShallowFileIdentifierJob { fn orphan_path_filters( location_id: i32, file_path_id: Option, - sub_path_id: Uuid, + sub_iso_file_path: &IsolatedFilePathData<'_>, ) -> Vec { chain_optional_iter( [ file_path::object_id::equals(None), file_path::is_dir::equals(false), file_path::location_id::equals(location_id), - file_path::parent_id::equals(Some(uuid_to_bytes(sub_path_id))), + file_path::materialized_path::equals( + sub_iso_file_path + .materialized_path_for_children() + .expect("sub path for shallow identifier must be a directory"), + ), ], [file_path_id.map(file_path::id::gte)], ) @@ -220,10 +225,10 @@ fn orphan_path_filters( async fn count_orphan_file_paths( db: &PrismaClient, location_id: i32, - sub_path_id: Uuid, + sub_iso_file_path: &IsolatedFilePathData<'_>, ) -> Result { db.file_path() - .count(orphan_path_filters(location_id, None, sub_path_id)) + .count(orphan_path_filters(location_id, None, sub_iso_file_path)) .exec() .await .map(|c| c as usize) @@ -232,15 +237,19 @@ async fn count_orphan_file_paths( async fn get_orphan_file_paths( db: &PrismaClient, location_id: i32, - cursor: i32, - sub_path_id: Uuid, + file_path_id_cursor: i32, + sub_iso_file_path: &IsolatedFilePathData<'_>, ) -> Result, prisma_client_rust::QueryError> { info!( "Querying {} orphan Paths at cursor: {:?}", - CHUNK_SIZE, cursor + CHUNK_SIZE, file_path_id_cursor ); db.file_path() - .find_many(orphan_path_filters(location_id, Some(cursor), sub_path_id)) + .find_many(orphan_path_filters( + location_id, + Some(file_path_id_cursor), + sub_iso_file_path, + )) .order_by(file_path::id::order(Direction::Asc)) // .cursor(cursor.into()) .take(CHUNK_SIZE as i64) diff --git a/core/src/object/fs/copy.rs b/core/src/object/fs/copy.rs index 2b1262984..53e954203 100644 --- a/core/src/object/fs/copy.rs +++ b/core/src/object/fs/copy.rs @@ -3,6 +3,7 @@ use crate::{ job::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, + util::error::FileIOError, }; use std::{hash::Hash, path::PathBuf}; @@ -12,7 +13,7 @@ use specta::Type; use tokio::fs; use tracing::{trace, warn}; -use super::{context_menu_fs_info, get_path_from_location_id, osstr_to_string, FsInfo}; +use super::{context_menu_fs_info, get_location_path_from_location_id, osstr_to_string, FsInfo}; pub struct FileCopierJob {} @@ -76,7 +77,8 @@ impl StatefulJob for FileCopierJob { .await?; let mut full_target_path = - get_path_from_location_id(&ctx.library.db, state.init.target_location_id).await?; + get_location_path_from_location_id(&ctx.library.db, state.init.target_location_id) + .await?; // add the currently viewed subdirectory to the location root full_target_path.push(&state.init.target_path); @@ -109,7 +111,7 @@ impl StatefulJob for FileCopierJob { source_fs_info: source_fs_info.clone(), }); - state.steps = [source_fs_info.into()].into_iter().collect(); + state.steps.push_back(source_fs_info.into()); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -121,26 +123,32 @@ impl StatefulJob for FileCopierJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - let step = &state.steps[0]; + let data = state + .data + .as_ref() + .expect("critical error: missing data on job state"); - let job_state = state.data.as_ref().ok_or(JobError::MissingData { - value: String::from("job state"), - })?; - - match step { + match &state.steps[0] { FileCopierJobStep::File { path } => { - let mut target_path = job_state.target_path.clone(); + let mut target_path = data.target_path.clone(); - if job_state.source_fs_info.path_data.is_dir { + if data.source_fs_info.path_data.is_dir { // if root type is a dir, we need to preserve structure by making paths relative target_path.push( - path.strip_prefix(&job_state.source_fs_info.fs_path) + path.strip_prefix(&data.source_fs_info.fs_path) .map_err(|_| JobError::Path)?, ); } - if fs::canonicalize(path.parent().ok_or(JobError::Path)?).await? - == fs::canonicalize(target_path.parent().ok_or(JobError::Path)?).await? + let parent_path = path.parent().ok_or(JobError::Path)?; + let parent_target_path = target_path.parent().ok_or(JobError::Path)?; + + if fs::canonicalize(parent_path) + .await + .map_err(|e| FileIOError::from((parent_path, e)))? + == fs::canonicalize(parent_target_path) + .await + .map_err(|e| FileIOError::from((parent_target_path, e)))? { return Err(JobError::MatchingSrcDest(path.clone())); } @@ -159,35 +167,51 @@ impl StatefulJob for FileCopierJob { target_path.display() ); - fs::copy(&path, &target_path).await?; + fs::copy(&path, &target_path) + .await + .map_err(|e| FileIOError::from((&target_path, e)))?; } } FileCopierJobStep::Directory { path } => { // if this is the very first path, create the target dir // fixes copying dirs with no child directories - if job_state.source_fs_info.path_data.is_dir - && &job_state.source_fs_info.fs_path == path - { - fs::create_dir_all(&job_state.target_path).await?; + if data.source_fs_info.path_data.is_dir && &data.source_fs_info.fs_path == path { + fs::create_dir_all(&data.target_path) + .await + .map_err(|e| FileIOError::from((&data.target_path, e)))?; } - let mut dir = fs::read_dir(&path).await?; + let path = path.clone(); // To appease the borrowck - while let Some(entry) = dir.next_entry().await? { - if entry.metadata().await?.is_dir() { + let mut dir = fs::read_dir(&path) + .await + .map_err(|e| FileIOError::from((&path, e)))?; + + while let Some(entry) = dir + .next_entry() + .await + .map_err(|e| FileIOError::from((&path, e)))? + { + let entry_path = entry.path(); + if entry + .metadata() + .await + .map_err(|e| FileIOError::from((&entry_path, e)))? + .is_dir() + { state .steps .push_back(FileCopierJobStep::Directory { path: entry.path() }); - fs::create_dir_all( - job_state.target_path.join( - entry - .path() - .strip_prefix(&job_state.source_fs_info.fs_path) - .map_err(|_| JobError::Path)?, - ), - ) - .await?; + let full_path = data.target_path.join( + entry_path + .strip_prefix(&data.source_fs_info.fs_path) + .map_err(|_| JobError::Path)?, + ); + + fs::create_dir_all(&full_path) + .await + .map_err(|e| FileIOError::from((full_path, e)))?; } else { state .steps diff --git a/core/src/object/fs/cut.rs b/core/src/object/fs/cut.rs index 9c43890a5..2c409c454 100644 --- a/core/src/object/fs/cut.rs +++ b/core/src/object/fs/cut.rs @@ -3,6 +3,7 @@ use crate::{ job::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, + util::error::FileIOError, }; use std::{hash::Hash, path::PathBuf}; @@ -12,13 +13,10 @@ use specta::Type; use tokio::fs; use tracing::{trace, warn}; -use super::{context_menu_fs_info, get_path_from_location_id, FsInfo}; +use super::{context_menu_fs_info, get_location_path_from_location_id, FsInfo}; pub struct FileCutterJob {} -#[derive(Serialize, Deserialize, Debug)] -pub struct FileCutterJobState {} - #[derive(Serialize, Deserialize, Hash, Type)] pub struct FileCutterJobInit { pub source_location_id: i32, @@ -40,7 +38,7 @@ impl JobInitData for FileCutterJobInit { #[async_trait::async_trait] impl StatefulJob for FileCutterJob { type Init = FileCutterJobInit; - type Data = FileCutterJobState; + type Data = (); type Step = FileCutterJobStep; const NAME: &'static str = "file_cutter"; @@ -58,15 +56,14 @@ impl StatefulJob for FileCutterJob { .await?; let mut full_target_path = - get_path_from_location_id(&ctx.library.db, state.init.target_location_id).await?; + get_location_path_from_location_id(&ctx.library.db, state.init.target_location_id) + .await?; full_target_path.push(&state.init.target_path); - state.steps = [FileCutterJobStep { + state.steps.push_back(FileCutterJobStep { source_fs_info, target_directory: full_target_path, - }] - .into_iter() - .collect(); + }); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -85,14 +82,16 @@ impl StatefulJob for FileCutterJob { .target_directory .join(source_info.fs_path.file_name().ok_or(JobError::OsStr)?); - if fs::canonicalize( - source_info - .fs_path - .parent() - .map_or(Err(JobError::Path), Ok)?, - ) - .await? == fs::canonicalize(full_output.parent().map_or(Err(JobError::Path), Ok)?) - .await? + let parent_source = source_info.fs_path.parent().ok_or(JobError::Path)?; + + let parent_output = full_output.parent().ok_or(JobError::Path)?; + + if fs::canonicalize(parent_source) + .await + .map_err(|e| FileIOError::from((parent_source, e)))? + == fs::canonicalize(parent_output) + .await + .map_err(|e| FileIOError::from((parent_output, e)))? { return Err(JobError::MatchingSrcDest(source_info.fs_path.clone())); } @@ -112,7 +111,9 @@ impl StatefulJob for FileCutterJob { full_output.display() ); - fs::rename(&source_info.fs_path, &full_output).await?; + fs::rename(&source_info.fs_path, &full_output) + .await + .map_err(|e| FileIOError::from((&source_info.fs_path, e)))?; ctx.progress(vec![JobReportUpdate::CompletedTaskCount( state.step_number + 1, diff --git a/core/src/object/fs/decrypt.rs b/core/src/object/fs/decrypt.rs index cbe806171..b5a172bcf 100644 --- a/core/src/object/fs/decrypt.rs +++ b/core/src/object/fs/decrypt.rs @@ -1,7 +1,7 @@ use sd_crypto::{crypto::Decryptor, header::file::FileHeader, Protected}; use serde::{Deserialize, Serialize}; use specta::Type; -use std::{collections::VecDeque, path::PathBuf}; +use std::path::PathBuf; use tokio::fs::File; use crate::{ @@ -9,6 +9,7 @@ use crate::{ job::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, + util::error::FileIOError, }; use super::{context_menu_fs_info, FsInfo, BYTES_EXT}; @@ -55,7 +56,6 @@ impl StatefulJob for FileDecryptorJob { context_menu_fs_info(&ctx.library.db, state.init.location_id, state.init.path_id) .await?; - state.steps = VecDeque::new(); state.steps.push_back(FileDecryptorJobStep { fs_info }); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -68,8 +68,7 @@ impl StatefulJob for FileDecryptorJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - let step = &state.steps[0]; - let info = &step.fs_info; + let info = &&state.steps[0].fs_info; let key_manager = &ctx.library.key_manager; // handle overwriting checks, and making sure there's enough available space @@ -89,8 +88,12 @@ impl StatefulJob for FileDecryptorJob { |p| p, ); - let mut reader = File::open(info.fs_path.clone()).await?; - let mut writer = File::create(output_path).await?; + let mut reader = File::open(info.fs_path.clone()) + .await + .map_err(|e| FileIOError::from((&info.fs_path, e)))?; + let mut writer = File::create(&output_path) + .await + .map_err(|e| FileIOError::from((output_path, e)))?; let (header, aad) = FileHeader::from_reader(&mut reader).await?; diff --git a/core/src/object/fs/delete.rs b/core/src/object/fs/delete.rs index e7aec74b3..d71d6c938 100644 --- a/core/src/object/fs/delete.rs +++ b/core/src/object/fs/delete.rs @@ -3,6 +3,7 @@ use crate::{ job::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, + util::error::FileIOError, }; use std::hash::Hash; @@ -44,7 +45,7 @@ impl StatefulJob for FileDeleterJob { context_menu_fs_info(&ctx.library.db, state.init.location_id, state.init.path_id) .await?; - state.steps = [fs_info].into_iter().collect(); + state.steps.push_back(fs_info); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -62,10 +63,11 @@ impl StatefulJob for FileDeleterJob { // maybe a files.countOccurances/and or files.getPath(location_id, path_id) to show how many of these files would be deleted (and where?) if info.path_data.is_dir { - tokio::fs::remove_dir_all(info.fs_path.clone()).await + tokio::fs::remove_dir_all(&info.fs_path).await } else { - tokio::fs::remove_file(info.fs_path.clone()).await - }?; + tokio::fs::remove_file(&info.fs_path).await + } + .map_err(|e| FileIOError::from((&info.fs_path, e)))?; ctx.progress(vec![JobReportUpdate::CompletedTaskCount( state.step_number + 1, diff --git a/core/src/object/fs/encrypt.rs b/core/src/object/fs/encrypt.rs index 576dd83c6..ae72d6fb1 100644 --- a/core/src/object/fs/encrypt.rs +++ b/core/src/object/fs/encrypt.rs @@ -1,4 +1,4 @@ -use crate::{invalidate_query, job::*, library::Library}; +use crate::{invalidate_query, job::*, library::Library, util::error::FileIOError}; use std::path::PathBuf; @@ -19,9 +19,6 @@ use super::{context_menu_fs_info, FsInfo, BYTES_EXT}; pub struct FileEncryptorJob; -#[derive(Serialize, Deserialize, Debug)] -pub struct FileEncryptorJobState {} - #[derive(Serialize, Deserialize, Type, Hash)] pub struct FileEncryptorJobInit { pub location_id: i32, @@ -51,7 +48,7 @@ impl JobInitData for FileEncryptorJobInit { #[async_trait::async_trait] impl StatefulJob for FileEncryptorJob { type Init = FileEncryptorJobInit; - type Data = FileEncryptorJobState; + type Data = (); type Step = FsInfo; const NAME: &'static str = "file_encryptor"; @@ -61,14 +58,13 @@ impl StatefulJob for FileEncryptorJob { } async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { - let step = + state.steps.push_back( context_menu_fs_info(&ctx.library.db, state.init.location_id, state.init.path_id) .await .map_err(|_| JobError::MissingData { value: String::from("file_path that matches both location id and path id"), - })?; - - state.steps = [step].into_iter().collect(); + })?, + ); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -139,8 +135,12 @@ impl StatefulJob for FileEncryptorJob { Some, ); - let mut reader = File::open(&info.fs_path).await?; - let mut writer = File::create(output_path).await?; + let mut reader = File::open(&info.fs_path) + .await + .map_err(|e| FileIOError::from((&info.fs_path, e)))?; + let mut writer = File::create(&output_path) + .await + .map_err(|e| FileIOError::from((output_path, e)))?; let master_key = Key::generate(); @@ -199,8 +199,13 @@ impl StatefulJob for FileEncryptorJob { if tokio::fs::metadata(&pvm_path).await.is_ok() { let mut pvm_bytes = Vec::new(); - let mut pvm_file = File::open(pvm_path).await?; - pvm_file.read_to_end(&mut pvm_bytes).await?; + let mut pvm_file = File::open(&pvm_path) + .await + .map_err(|e| FileIOError::from((&pvm_path, e)))?; + pvm_file + .read_to_end(&mut pvm_bytes) + .await + .map_err(|e| FileIOError::from((pvm_path, e)))?; header .add_preview_media( diff --git a/core/src/object/fs/erase.rs b/core/src/object/fs/erase.rs index 956b50af2..fd84ea327 100644 --- a/core/src/object/fs/erase.rs +++ b/core/src/object/fs/erase.rs @@ -3,6 +3,7 @@ use crate::{ job::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, + util::error::FileIOError, }; use std::{hash::Hash, path::PathBuf}; @@ -11,7 +12,7 @@ use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use specta::Type; use tokio::{fs::OpenOptions, io::AsyncWriteExt}; -use tracing::{trace, warn}; +use tracing::trace; use super::{context_menu_fs_info, FsInfo}; @@ -70,7 +71,7 @@ impl StatefulJob for FileEraserJob { state.data = Some(fs_info.clone()); - state.steps = [fs_info.into()].into_iter().collect(); + state.steps.push_back(fs_info.into()); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); @@ -82,39 +83,65 @@ impl StatefulJob for FileEraserJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - let step = &state.steps[0]; - // need to handle stuff such as querying prisma for all paths of a file, and deleting all of those if requested (with a checkbox in the ui) // maybe a files.countOccurances/and or files.getPath(location_id, path_id) to show how many of these files would be erased (and where?) - match step { + match &state.steps[0] { FileEraserJobStep::File { path } => { let mut file = OpenOptions::new() .read(true) .write(true) - .open(&path) - .await?; - let file_len = file.metadata().await?.len(); + .open(path) + .await + .map_err(|e| FileIOError::from((path, e)))?; + let file_len = file + .metadata() + .await + .map_err(|e| FileIOError::from((path, e)))? + .len(); sd_crypto::fs::erase::erase(&mut file, file_len as usize, state.init.passes) .await?; - file.set_len(0).await?; - file.flush().await?; + + file.set_len(0) + .await + .map_err(|e| FileIOError::from((path, e)))?; + file.flush() + .await + .map_err(|e| FileIOError::from((path, e)))?; drop(file); trace!("Erasing file: {:?}", path); - tokio::fs::remove_file(&path).await?; + tokio::fs::remove_file(path) + .await + .map_err(|e| FileIOError::from((path, e)))?; } FileEraserJobStep::Directory { path } => { - let mut dir = tokio::fs::read_dir(&path).await?; + let path = path.clone(); // To appease the borrowck - while let Some(entry) = dir.next_entry().await? { - state.steps.push_back(if entry.metadata().await?.is_dir() { - FileEraserJobStep::Directory { path: entry.path() } - } else { - FileEraserJobStep::File { path: entry.path() } - }); + let mut dir = tokio::fs::read_dir(&path) + .await + .map_err(|e| FileIOError::from((&path, e)))?; + + while let Some(entry) = dir + .next_entry() + .await + .map_err(|e| FileIOError::from((&path, e)))? + { + let entry_path = entry.path(); + state.steps.push_back( + if entry + .metadata() + .await + .map_err(|e| FileIOError::from((&entry_path, e)))? + .is_dir() + { + FileEraserJobStep::Directory { path: entry_path } + } else { + FileEraserJobStep::File { path: entry_path } + }, + ); ctx.progress(vec![JobReportUpdate::TaskCount(state.steps.len())]); } @@ -128,12 +155,14 @@ impl StatefulJob for FileEraserJob { } async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { - if let Some(ref info) = state.data { - if info.path_data.is_dir { - tokio::fs::remove_dir_all(&info.fs_path).await?; - } - } else { - warn!("missing job state, unable to fully finalise erase job"); + let data = state + .data + .as_ref() + .expect("critical error: missing data on job state"); + if data.path_data.is_dir { + tokio::fs::remove_dir_all(&data.fs_path) + .await + .map_err(|e| FileIOError::from((&data.fs_path, e)))?; } invalidate_query!(ctx.library, "search.paths"); diff --git a/core/src/object/fs/mod.rs b/core/src/object/fs/mod.rs index 2c7e15511..a3752eb4a 100644 --- a/core/src/object/fs/mod.rs +++ b/core/src/object/fs/mod.rs @@ -1,6 +1,6 @@ use crate::{ job::JobError, - location::file_path_helper::{file_path_with_object, MaterializedPath}, + location::file_path_helper::{file_path_with_object, IsolatedFilePathData}, prisma::{file_path, location, PrismaClient}, }; @@ -42,7 +42,7 @@ pub fn osstr_to_string(os_str: Option<&OsStr>) -> Result { .ok_or(JobError::OsStr) } -pub async fn get_path_from_location_id( +pub async fn get_location_path_from_location_id( db: &PrismaClient, location_id: i32, ) -> Result { @@ -74,12 +74,9 @@ pub async fn context_menu_fs_info( })?; Ok(FsInfo { - fs_path: get_path_from_location_id(db, location_id) + fs_path: get_location_path_from_location_id(db, location_id) .await? - .join(&MaterializedPath::from(( - location_id, - &path_data.materialized_path, - ))), + .join(IsolatedFilePathData::from(&path_data)), path_data, }) } diff --git a/core/src/object/preview/thumbnail/mod.rs b/core/src/object/preview/thumbnail/mod.rs index ff3b11041..0ca653150 100644 --- a/core/src/object/preview/thumbnail/mod.rs +++ b/core/src/object/preview/thumbnail/mod.rs @@ -1,14 +1,15 @@ use crate::{ api::CoreEvent, invalidate_query, - job::{JobError, JobReportUpdate, JobResult, WorkerContext}, + job::{ + JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, + }, library::Library, location::{ - file_path_helper::{ - file_path_just_materialized_path_cas_id, FilePathError, MaterializedPath, - }, + file_path_helper::{file_path_for_thumbnailer, FilePathError, IsolatedFilePathData}, LocationId, }, + util::error::FileIOError, }; use std::{ @@ -75,16 +76,22 @@ pub struct ThumbnailerJobState { #[derive(Error, Debug)] pub enum ThumbnailerError { - #[error("File path related error (error: {0})")] - FilePathError(#[from] FilePathError), - #[error("IO error (error: {0})")] - IOError(#[from] io::Error), + #[error("sub path not found: ", .0.display())] + SubPathNotFound(Box), + + // Internal errors + #[error("database error")] + Database(#[from] prisma_client_rust::QueryError), + #[error(transparent)] + FilePath(#[from] FilePathError), + #[error(transparent)] + FileIO(#[from] FileIOError), } #[derive(Debug, Serialize, Deserialize)] pub struct ThumbnailerJobReport { location_id: LocationId, - materialized_path: String, + path: PathBuf, thumbnails_created: u32, } @@ -97,7 +104,7 @@ enum ThumbnailerJobStepKind { #[derive(Debug, Serialize, Deserialize)] pub struct ThumbnailerJobStep { - file_path: file_path_just_materialized_path_cas_id::Data, + file_path: file_path_for_thumbnailer::Data, kind: ThumbnailerJobStepKind, } @@ -160,12 +167,7 @@ fn finalize_thumbnailer(data: &ThumbnailerJobState, ctx: WorkerContext) -> JobRe info!( "Finished thumbnail generation for location {} at {}", data.report.location_id, - data.location_path - .join(&MaterializedPath::from(( - data.report.location_id, - &data.report.materialized_path - ))) - .display() + data.report.path.display() ); if data.report.thumbnails_created > 0 { @@ -175,43 +177,56 @@ fn finalize_thumbnailer(data: &ThumbnailerJobState, ctx: WorkerContext) -> JobRe Ok(Some(serde_json::to_value(&data.report)?)) } -async fn process_step( - is_background: bool, - step_number: usize, - step: &ThumbnailerJobStep, - data: &mut ThumbnailerJobState, +async fn process_step( + state: &mut JobState, ctx: WorkerContext, -) -> Result<(), JobError> { +) -> Result<(), JobError> +where + SJob: StatefulJob, + Init: JobInitData, +{ + let step = &state.steps[0]; + ctx.progress(vec![JobReportUpdate::Message(format!( "Processing {}", step.file_path.materialized_path ))]); - let step_result = inner_process_step(is_background, step, data, &ctx).await; + let step_result = inner_process_step(state, &ctx).await; - ctx.progress(vec![JobReportUpdate::CompletedTaskCount(step_number + 1)]); + ctx.progress(vec![JobReportUpdate::CompletedTaskCount( + state.step_number + 1, + )]); step_result } -async fn inner_process_step( - is_background: bool, - step: &ThumbnailerJobStep, - data: &mut ThumbnailerJobState, +async fn inner_process_step( + state: &mut JobState, ctx: &WorkerContext, -) -> Result<(), JobError> { +) -> Result<(), JobError> +where + SJob: StatefulJob, + Init: JobInitData, +{ + let ThumbnailerJobStep { file_path, kind } = &state.steps[0]; + let data = state + .data + .as_ref() + .expect("critical error: missing data on job state"); + // assemble the file path - let path = data.location_path.join(&MaterializedPath::from(( + let path = data.location_path.join(IsolatedFilePathData::from(( data.report.location_id, - &step.file_path.materialized_path, + file_path, ))); - trace!("image_file {:?}", step); + trace!("image_file {:?}", file_path); // get cas_id, if none found skip - let Some(cas_id) = &step.file_path.cas_id else { + let Some(cas_id) = &file_path.cas_id else { warn!( "skipping thumbnail generation for {}", - step.file_path.materialized_path + file_path.materialized_path ); return Ok(()); @@ -227,7 +242,7 @@ async fn inner_process_step( Err(e) if e.kind() == io::ErrorKind::NotFound => { info!("Writing {:?} to {:?}", path, output_path); - match step.kind { + match kind { ThumbnailerJobStepKind::Image => { if let Err(e) = generate_image_thumbnail(&path, &output_path).await { error!("Error generating thumb for image {:#?}", e); @@ -246,9 +261,14 @@ async fn inner_process_step( cas_id: cas_id.clone(), }); - data.report.thumbnails_created += 1; + state + .data + .as_mut() + .expect("critical error: missing data on job state") + .report + .thumbnails_created += 1; } - Err(e) => return Err(ThumbnailerError::from(e).into()), + Err(e) => return Err(ThumbnailerError::from(FileIOError::from((output_path, e))).into()), } Ok(()) diff --git a/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs b/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs index 1e5052c33..5669cc251 100644 --- a/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs +++ b/core/src/object/preview/thumbnail/shallow_thumbnailer_job.rs @@ -5,19 +5,19 @@ use crate::{ library::Library, location::{ file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_just_materialized_path_cas_id, get_existing_file_path_id, MaterializedPath, + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_thumbnailer, IsolatedFilePathData, }, LocationId, }, prisma::{file_path, location, PrismaClient}, - util::db::uuid_to_bytes, + util::error::FileIOError, }; use std::{ collections::VecDeque, hash::Hash, - path::{Path, PathBuf, MAIN_SEPARATOR_STR}, + path::{Path, PathBuf}, }; use sd_file_ext::extensions::Extension; @@ -25,7 +25,6 @@ use sd_file_ext::extensions::Extension; use serde::{Deserialize, Serialize}; use tokio::fs; use tracing::info; -use uuid::Uuid; use super::{ finalize_thumbnailer, process_step, ThumbnailerError, ThumbnailerJobReport, @@ -80,7 +79,7 @@ impl StatefulJob for ShallowThumbnailerJob { let location_id = state.init.location.id; let location_path = PathBuf::from(&state.init.location.path); - let sub_path_id = if state.init.sub_path != Path::new("") { + let (path, iso_file_path) = if state.init.sub_path != Path::new("") { let full_path = ensure_sub_path_is_in_location(&location_path, &state.init.sub_path) .await .map_err(ThumbnailerError::from)?; @@ -88,35 +87,42 @@ impl StatefulJob for ShallowThumbnailerJob { .await .map_err(ThumbnailerError::from)?; - get_existing_file_path_id( - &MaterializedPath::new(location_id, &location_path, &full_path, true) - .map_err(ThumbnailerError::from)?, + let sub_iso_file_path = + IsolatedFilePathData::new(location_id, &location_path, &full_path, true) + .map_err(ThumbnailerError::from)?; + + ensure_file_path_exists( + &state.init.sub_path, + &sub_iso_file_path, db, + ThumbnailerError::SubPathNotFound, ) - .await - .map_err(ThumbnailerError::from)? - .expect("Sub path should already exist in the database") + .await?; + + (full_path, sub_iso_file_path) } else { - get_existing_file_path_id( - &MaterializedPath::new(location_id, &location_path, &location_path, true) + ( + location_path.to_path_buf(), + IsolatedFilePathData::new(location_id, &location_path, &location_path, true) .map_err(ThumbnailerError::from)?, - db, ) - .await - .map_err(ThumbnailerError::from)? - .expect("Location root path should already exist in the database") }; - info!("Searching for images in location {location_id} at parent directory with id {sub_path_id}"); + info!( + "Searching for images in location {location_id} at path {}", + path.display() + ); // create all necessary directories if they don't exist - fs::create_dir_all(&thumbnail_dir).await?; + fs::create_dir_all(&thumbnail_dir) + .await + .map_err(|e| FileIOError::from((&thumbnail_dir, e)))?; // query database for all image files in this location that need thumbnails let image_files = get_files_by_extensions( db, location_id, - sub_path_id, + &iso_file_path, &FILTERED_IMAGE_EXTENSIONS, ThumbnailerJobStepKind::Image, ) @@ -129,7 +135,7 @@ impl StatefulJob for ShallowThumbnailerJob { let video_files = get_files_by_extensions( db, location_id, - sub_path_id, + &iso_file_path, &FILTERED_VIDEO_EXTENSIONS, ThumbnailerJobStepKind::Video, ) @@ -154,16 +160,11 @@ impl StatefulJob for ShallowThumbnailerJob { location_path, report: ThumbnailerJobReport { location_id, - materialized_path: if state.init.sub_path != Path::new("") { - // SAFETY: We know that the sub_path is a valid UTF-8 string because we validated it before - state.init.sub_path.to_str().unwrap().to_string() - } else { - MAIN_SEPARATOR_STR.to_string() - }, + path, thumbnails_created: 0, }, }); - state.steps = all_files; + state.steps.extend(all_files); Ok(()) } @@ -173,17 +174,7 @@ impl StatefulJob for ShallowThumbnailerJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - process_step( - false, // On shallow thumbnailer, we want to show thumbnails ASAP - state.step_number, - &state.steps[0], - state - .data - .as_mut() - .expect("critical error: missing data on job state"), - ctx, - ) - .await + process_step(state, ctx).await } async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { @@ -200,7 +191,7 @@ impl StatefulJob for ShallowThumbnailerJob { async fn get_files_by_extensions( db: &PrismaClient, location_id: LocationId, - parent_id: Uuid, + parent_isolated_file_path_data: &IsolatedFilePathData<'_>, extensions: &[Extension], kind: ThumbnailerJobStepKind, ) -> Result, JobError> { @@ -209,9 +200,13 @@ async fn get_files_by_extensions( .find_many(vec![ file_path::location_id::equals(location_id), file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()), - file_path::parent_id::equals(Some(uuid_to_bytes(parent_id))), + file_path::materialized_path::equals( + parent_isolated_file_path_data + .materialized_path_for_children() + .expect("sub path iso_file_path must be a directory"), + ), ]) - .select(file_path_just_materialized_path_cas_id::select()) + .select(file_path_for_thumbnailer::select()) .exec() .await? .into_iter() diff --git a/core/src/object/preview/thumbnail/thumbnailer_job.rs b/core/src/object/preview/thumbnail/thumbnailer_job.rs index 87648ebb8..2edb9191f 100644 --- a/core/src/object/preview/thumbnail/thumbnailer_job.rs +++ b/core/src/object/preview/thumbnail/thumbnailer_job.rs @@ -4,10 +4,11 @@ use crate::{ }, library::Library, location::file_path_helper::{ - ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_just_materialized_path_cas_id, MaterializedPath, + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + file_path_for_thumbnailer, IsolatedFilePathData, }, prisma::{file_path, location, PrismaClient}, + util::error::FileIOError, }; use std::{collections::VecDeque, hash::Hash, path::PathBuf}; @@ -33,7 +34,6 @@ pub struct ThumbnailerJob {} pub struct ThumbnailerJobInit { pub location: location::Data, pub sub_path: Option, - pub background: bool, } impl Hash for ThumbnailerJobInit { @@ -73,7 +73,7 @@ impl StatefulJob for ThumbnailerJob { let location_id = state.init.location.id; let location_path = PathBuf::from(&state.init.location.path); - let materialized_path = if let Some(ref sub_path) = state.init.sub_path { + let (path, iso_file_path) = if let Some(ref sub_path) = state.init.sub_path { let full_path = ensure_sub_path_is_in_location(&location_path, sub_path) .await .map_err(ThumbnailerError::from)?; @@ -81,22 +81,38 @@ impl StatefulJob for ThumbnailerJob { .await .map_err(ThumbnailerError::from)?; - MaterializedPath::new(location_id, &location_path, &full_path, true) - .map_err(ThumbnailerError::from)? + let sub_iso_file_path = + IsolatedFilePathData::new(location_id, &location_path, &full_path, true) + .map_err(ThumbnailerError::from)?; + + ensure_file_path_exists( + sub_path, + &sub_iso_file_path, + db, + ThumbnailerError::SubPathNotFound, + ) + .await?; + + (full_path, sub_iso_file_path) } else { - MaterializedPath::new(location_id, &location_path, &location_path, true) - .map_err(ThumbnailerError::from)? + ( + location_path.to_path_buf(), + IsolatedFilePathData::new(location_id, &location_path, &location_path, true) + .map_err(ThumbnailerError::from)?, + ) }; - info!("Searching for images in location {location_id} at directory {materialized_path}"); + info!("Searching for images in location {location_id} at directory {iso_file_path}"); // create all necessary directories if they don't exist - fs::create_dir_all(&thumbnail_dir).await?; + fs::create_dir_all(&thumbnail_dir) + .await + .map_err(|e| FileIOError::from((&thumbnail_dir, e)))?; // query database for all image files in this location that need thumbnails let image_files = get_files_by_extensions( db, - &materialized_path, + &iso_file_path, &FILTERED_IMAGE_EXTENSIONS, ThumbnailerJobStepKind::Image, ) @@ -108,7 +124,7 @@ impl StatefulJob for ThumbnailerJob { // query database for all video files in this location that need thumbnails let video_files = get_files_by_extensions( db, - &materialized_path, + &iso_file_path, &FILTERED_VIDEO_EXTENSIONS, ThumbnailerJobStepKind::Video, ) @@ -133,11 +149,11 @@ impl StatefulJob for ThumbnailerJob { location_path, report: ThumbnailerJobReport { location_id, - materialized_path: materialized_path.into(), + path, thumbnails_created: 0, }, }); - state.steps = all_files; + state.steps.extend(all_files); Ok(()) } @@ -147,17 +163,7 @@ impl StatefulJob for ThumbnailerJob { ctx: WorkerContext, state: &mut JobState, ) -> Result<(), JobError> { - process_step( - state.init.background, - state.step_number, - &state.steps[0], - state - .data - .as_mut() - .expect("critical error: missing data on job state"), - ctx, - ) - .await + process_step(state, ctx).await } async fn finalize(&mut self, ctx: WorkerContext, state: &mut JobState) -> JobResult { @@ -173,18 +179,22 @@ impl StatefulJob for ThumbnailerJob { async fn get_files_by_extensions( db: &PrismaClient, - materialized_path: &MaterializedPath<'_>, + iso_file_path: &IsolatedFilePathData<'_>, extensions: &[Extension], kind: ThumbnailerJobStepKind, ) -> Result, JobError> { Ok(db .file_path() .find_many(vec![ - file_path::location_id::equals(materialized_path.location_id()), + file_path::location_id::equals(iso_file_path.location_id()), file_path::extension::in_vec(extensions.iter().map(ToString::to_string).collect()), - file_path::materialized_path::starts_with(materialized_path.into()), + file_path::materialized_path::starts_with( + iso_file_path + .materialized_path_for_children() + .expect("sub path iso_file_path must be a directory"), + ), ]) - .select(file_path_just_materialized_path_cas_id::select()) + .select(file_path_for_thumbnailer::select()) .exec() .await? .into_iter() diff --git a/core/src/object/validation/validator_job.rs b/core/src/object/validation/validator_job.rs index c499b33f5..af1880e46 100644 --- a/core/src/object/validation/validator_job.rs +++ b/core/src/object/validation/validator_job.rs @@ -3,12 +3,13 @@ use crate::{ JobError, JobInitData, JobReportUpdate, JobResult, JobState, StatefulJob, WorkerContext, }, library::Library, - location::file_path_helper::{file_path_for_object_validator, MaterializedPath}, + location::file_path_helper::{file_path_for_object_validator, IsolatedFilePathData}, prisma::{file_path, location}, sync, + util::error::FileIOError, }; -use std::{collections::VecDeque, path::PathBuf}; +use std::path::PathBuf; use serde::{Deserialize, Serialize}; use serde_json::json; @@ -55,18 +56,17 @@ impl StatefulJob for ObjectValidatorJob { async fn init(&self, ctx: WorkerContext, state: &mut JobState) -> Result<(), JobError> { let Library { db, .. } = &ctx.library; - state.steps = db - .file_path() - .find_many(vec![ - file_path::location_id::equals(state.init.location_id), - file_path::is_dir::equals(false), - file_path::integrity_checksum::equals(None), - ]) - .select(file_path_for_object_validator::select()) - .exec() - .await? - .into_iter() - .collect::>(); + state.steps.extend( + db.file_path() + .find_many(vec![ + file_path::location_id::equals(state.init.location_id), + file_path::is_dir::equals(false), + file_path::integrity_checksum::equals(None), + ]) + .select(file_path_for_object_validator::select()) + .exec() + .await?, + ); let location = db .location() @@ -93,18 +93,23 @@ impl StatefulJob for ObjectValidatorJob { let Library { db, sync, .. } = &ctx.library; let file_path = &state.steps[0]; - let data = state.data.as_ref().expect("fatal: missing job state"); + let data = state + .data + .as_ref() + .expect("critical error: missing data on job state"); // this is to skip files that already have checksums // i'm unsure what the desired behaviour is in this case // we can also compare old and new checksums here // This if is just to make sure, we already queried objects where integrity_checksum is null if file_path.integrity_checksum.is_none() { - let checksum = file_checksum(data.root_path.join(&MaterializedPath::from(( + let path = data.root_path.join(IsolatedFilePathData::from(( file_path.location.id, - &file_path.materialized_path, - )))) - .await?; + file_path, + ))); + let checksum = file_checksum(&path) + .await + .map_err(|e| FileIOError::from((path, e)))?; sync.write_op( db, diff --git a/core/src/util/error.rs b/core/src/util/error.rs new file mode 100644 index 000000000..1116a4f2f --- /dev/null +++ b/core/src/util/error.rs @@ -0,0 +1,24 @@ +use std::{io, path::Path}; + +use thiserror::Error; + +#[derive(Debug, Error)] +#[error("error accessing path: '{}'", .path.display())] +pub struct FileIOError { + path: Box, + #[source] + source: io::Error, +} + +impl> From<(P, io::Error)> for FileIOError { + fn from((path, source): (P, io::Error)) -> Self { + Self { + path: path.as_ref().into(), + source, + } + } +} + +#[derive(Debug, Error)] +#[error("received a non UTF-8 path: ", .0.to_string_lossy())] +pub struct NonUtf8PathError(pub Box); diff --git a/core/src/util/mod.rs b/core/src/util/mod.rs index c5fe7f4f7..e535dd3b0 100644 --- a/core/src/util/mod.rs +++ b/core/src/util/mod.rs @@ -1,5 +1,6 @@ pub mod db; #[cfg(debug_assertions)] pub mod debug_initializer; +pub mod error; pub mod migrator; pub mod seeder; diff --git a/core/src/util/seeder.rs b/core/src/util/seeder.rs index 2f4823141..ebb57ca26 100644 --- a/core/src/util/seeder.rs +++ b/core/src/util/seeder.rs @@ -1,17 +1,13 @@ use crate::{ - location::indexer::{ - rules::{IndexerRule, ParametersPerKind, RuleKind}, - IndexerError, - }, + location::indexer::rules::{IndexerRule, IndexerRuleError, ParametersPerKind, RuleKind}, prisma::PrismaClient, }; -use globset::Glob; use thiserror::Error; #[derive(Error, Debug)] pub enum SeederError { #[error("Failed to run indexer rules seeder: {0}")] - IndexerRules(#[from] IndexerError), + IndexerRules(#[from] IndexerRuleError), #[error("An error occurred with the database while applying migrations: {0}")] DatabaseError(#[from] prisma_client_rust::QueryError), } @@ -25,106 +21,105 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr // https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants#FILE_ATTRIBUTE_SYSTEM "No OS protected".to_string(), true, - ParametersPerKind::RejectFilesByGlob([ - vec![ - "**/.spacedrive", - ], - // Globset, even on Windows, requires the use of / as a separator - // https://github.com/github/gitignore/blob/main/Global/Windows.gitignore - // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file - #[cfg(target_os = "windows")] - vec![ - // Windows thumbnail cache files - "**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}", - // Dump file - "**/*.stackdump", - // Folder config file - "**/[Dd]esktop.ini", - // Recycle Bin used on file shares - "**/$RECYCLE.BIN", - // Chkdsk recovery directory - "**/FOUND.[0-9][0-9][0-9]", - // Reserved names - "**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}", - "**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*", - // User special files - "C:/Users/*/NTUSER.DAT*", - "C:/Users/*/ntuser.dat*", - "C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}", - // User special folders (most of these the user dont even have permission to access) - "C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings}", - // System special folders - "C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}", - // NTFS internal dir, can exists on any drive - "[A-Z]:/System Volume Information", - // System special files - "C:/{config,pagefile,hiberfil}.sys", - // Windows can create a swapfile on any drive - "[A-Z]:/swapfile.sys", - "C:/DumpStack.log.tmp", - ], - // https://github.com/github/gitignore/blob/main/Global/macOS.gitignore - // https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14 - #[cfg(any(target_os = "ios", target_os = "macos"))] - vec![ - "**/.{DS_Store,AppleDouble,LSOverride}", - // Icon must end with two \r - "**/Icon\r\r", - // Thumbnails - "**/._*", - ], - #[cfg(target_os = "macos")] - vec![ - "/{System,Network,Library,Applications}", - "/Users/*/{Library,Applications}", - // Files that might appear in the root of a volume - "**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}", - // Directories potentially created on remote AFP share - "**/.{AppleDB,AppleDesktop,apdisk}", - "**/{Network Trash Folder,Temporary Items}", - ], - // https://github.com/github/gitignore/blob/main/Global/Linux.gitignore - #[cfg(target_os = "linux")] - vec![ - "**/*~", - // temporary files which can be created if a process still has a handle open of a deleted file - "**/.fuse_hidden*", - // KDE directory preferences - "**/.directory", - // Linux trash folder which might appear on any partition or disk - "**/.Trash-*", - // .nfs files are created when an open file is removed but is still being accessed - "**/.nfs*", - ], - #[cfg(target_os = "android")] - vec![ - "**/.nomedia", - "**/.thumbnails", - ], - // https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout - // https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard - #[cfg(target_family = "unix")] - vec![ - // Directories containing unix memory/device mapped files/dirs - "/{dev,sys,proc}", - // Directories containing special files for current running programs - "/{run,var,boot}", - // ext2-4 recovery directory - "**/lost+found", - ], - ] - .into_iter() - .flatten() - .map(Glob::new) - .collect::, _>>().map_err(IndexerError::GlobBuilderError)?), + ParametersPerKind::new_reject_files_by_glob([ + vec![ + "**/.spacedrive", + ], + // Globset, even on Windows, requires the use of / as a separator + // https://github.com/github/gitignore/blob/main/Global/Windows.gitignore + // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file + #[cfg(target_os = "windows")] + vec![ + // Windows thumbnail cache files + "**/{Thumbs.db,Thumbs.db:encryptable,ehthumbs.db,ehthumbs_vista.db}", + // Dump file + "**/*.stackdump", + // Folder config file + "**/[Dd]esktop.ini", + // Recycle Bin used on file shares + "**/$RECYCLE.BIN", + // Chkdsk recovery directory + "**/FOUND.[0-9][0-9][0-9]", + // Reserved names + "**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}", + "**/{CON,PRN,AUX,NUL,COM0,COM1,COM2,COM3,COM4,COM5,COM6,COM7,COM8,COM9,LPT0,LPT1,LPT2,LPT3,LPT4,LPT5,LPT6,LPT7,LPT8,LPT9}.*", + // User special files + "C:/Users/*/NTUSER.DAT*", + "C:/Users/*/ntuser.dat*", + "C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}", + // User special folders (most of these the user dont even have permission to access) + "C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings}", + // System special folders + "C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}", + // NTFS internal dir, can exists on any drive + "[A-Z]:/System Volume Information", + // System special files + "C:/{config,pagefile,hiberfil}.sys", + // Windows can create a swapfile on any drive + "[A-Z]:/swapfile.sys", + "C:/DumpStack.log.tmp", + ], + // https://github.com/github/gitignore/blob/main/Global/macOS.gitignore + // https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW14 + #[cfg(any(target_os = "ios", target_os = "macos"))] + vec![ + "**/.{DS_Store,AppleDouble,LSOverride}", + // Icon must end with two \r + "**/Icon\r\r", + // Thumbnails + "**/._*", + ], + #[cfg(target_os = "macos")] + vec![ + "/{System,Network,Library,Applications}", + "/Users/*/{Library,Applications}", + // Files that might appear in the root of a volume + "**/.{DocumentRevisions-V100,fseventsd,Spotlight-V100,TemporaryItems,Trashes,VolumeIcon.icns,com.apple.timemachine.donotpresent}", + // Directories potentially created on remote AFP share + "**/.{AppleDB,AppleDesktop,apdisk}", + "**/{Network Trash Folder,Temporary Items}", + ], + // https://github.com/github/gitignore/blob/main/Global/Linux.gitignore + #[cfg(target_os = "linux")] + vec![ + "**/*~", + // temporary files which can be created if a process still has a handle open of a deleted file + "**/.fuse_hidden*", + // KDE directory preferences + "**/.directory", + // Linux trash folder which might appear on any partition or disk + "**/.Trash-*", + // .nfs files are created when an open file is removed but is still being accessed + "**/.nfs*", + ], + #[cfg(target_os = "android")] + vec![ + "**/.nomedia", + "**/.thumbnails", + ], + // https://en.wikipedia.org/wiki/Unix_filesystem#Conventional_directory_layout + // https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard + #[cfg(target_family = "unix")] + vec![ + // Directories containing unix memory/device mapped files/dirs + "/{dev,sys,proc}", + // Directories containing special files for current running programs + "/{run,var,boot}", + // ext2-4 recovery directory + "**/lost+found", + ], + ] + .into_iter() + .flatten() + )? ), IndexerRule::new( RuleKind::RejectFilesByGlob, "No Hidden".to_string(), true, - ParametersPerKind::RejectFilesByGlob(vec![ - Glob::new("**/.*").map_err(IndexerError::GlobBuilderError)? - ]), + ParametersPerKind::new_reject_files_by_glob( + ["**/.*"], + )? ), IndexerRule::new( RuleKind::AcceptIfChildrenDirectoriesArePresent, @@ -138,10 +133,9 @@ pub async fn indexer_rules_seeder(client: &PrismaClient) -> Result<(), SeederErr RuleKind::AcceptFilesByGlob, "Only Images".to_string(), false, - ParametersPerKind::AcceptFilesByGlob(vec![Glob::new( - "*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}", - ) - .map_err(IndexerError::GlobBuilderError)?]), + ParametersPerKind::new_accept_files_by_globs_str( + ["*.{avif,bmp,gif,ico,jpeg,jpg,png,svg,tif,tiff,webp}"], + )?, ), ] { rule.save(client).await?; diff --git a/crates/file-ext/Cargo.toml b/crates/file-ext/Cargo.toml index 6a40d934e..3d24f347f 100644 --- a/crates/file-ext/Cargo.toml +++ b/crates/file-ext/Cargo.toml @@ -14,7 +14,7 @@ serde_json = "1.0.85" strum = { version = "0.24", features = ["derive"] } strum_macros = "0.24" tokio = { workspace = true, features = ["fs", "rt", "io-util"] } -specta.workspace = true +specta = { workspace = true } [dev-dependencies] tokio = { workspace = true, features = ["fs", "rt", "macros"] } diff --git a/interface/app/$libraryId/Explorer/Inspector/index.tsx b/interface/app/$libraryId/Explorer/Inspector/index.tsx index e7046484a..42c60e59e 100644 --- a/interface/app/$libraryId/Explorer/Inspector/index.tsx +++ b/interface/app/$libraryId/Explorer/Inspector/index.tsx @@ -3,7 +3,15 @@ import clsx from 'clsx'; import dayjs from 'dayjs'; import { Barcode, CircleWavyCheck, Clock, Cube, Hash, Link, Lock, Snowflake } from 'phosphor-react'; import { ComponentProps, useEffect, useState } from 'react'; -import { ExplorerItem, ObjectKind, Tag, formatBytes, isPath, useLibraryQuery } from '@sd/client'; +import { + ExplorerItem, + Location, + ObjectKind, + Tag, + formatBytes, + isPath, + useLibraryQuery +} from '@sd/client'; import { Button, Divider, DropdownMenu, Tooltip, tw } from '@sd/ui'; import { useExplorerStore } from '~/hooks/useExplorerStore'; import { TOP_BAR_HEIGHT } from '../../TopBar'; @@ -57,7 +65,7 @@ export const Inspector = ({ data, context, ...elementProps }: Props) => { enabled: readyToFetch && objectData?.id !== undefined }); - const item = data?.item; + const { item } = data; // map array of numbers into string const pub_id = fullObjectData?.data?.pub_id.map((n: number) => n.toString(16)).join(''); @@ -68,174 +76,150 @@ export const Inspector = ({ data, context, ...elementProps }: Props) => { className="custom-scroll inspector-scroll h-screen w-full overflow-x-hidden pb-4 pl-1.5 pr-1" style={{ paddingTop: TOP_BAR_HEIGHT + 12 }} > - {data && ( - <> - {explorerStore.layoutMode !== 'media' && ( -
- -
+ {explorerStore.layoutMode !== 'media' && ( +
-

- {filePathData?.name} - {filePathData?.extension && `.${filePathData.extension}`} -

- {objectData && ( -
- - - + > + +
+ )} +
+

+ {filePathData?.name} + {filePathData?.extension && `.${filePathData.extension}`} +

+ {objectData && ( +
+ + + - - - - - - -
- )} - {isPath(data) && } - - -
- - {isDir ? 'Folder' : ObjectKind[objectData?.kind || 0]} + + + + + + +
+ )} + {isPath(data) && context && 'path' in context && ( + + URI + + {`${context.path}/${data.item.materialized_path}${data.item.name}${ + data.item.is_dir ? `.${data.item.extension}` : '/' + }`} + + + )} + + +
+ {isDir ? 'Folder' : ObjectKind[objectData?.kind || 0]} + {filePathData?.extension && {filePathData.extension}} + {tags.data?.map((tag) => ( + + + {tag.name} - {filePathData?.extension && ( - {filePathData.extension} - )} - {tags?.data?.map((tag) => ( - - - {tag.name} - - - ))} - {objectData?.id && ( - Add Tag} - side="left" - sideOffset={5} - alignOffset={-10} - > - - - )} -
-
- - - - - Size - - {formatBytes(Number(filePathData?.size_in_bytes || 0))} - - - {fullObjectData.data?.media_data?.duration_seconds && ( - - - Duration - - {fullObjectData.data.media_data.duration_seconds} - - - )} - - - - - - - Created - - {dayjs(item?.date_created).format('MMM Do YYYY')} - - - - - - Indexed - - {dayjs(filePathData?.date_indexed).format('MMM Do YYYY')} - - - - - - {!isDir && objectData && ( - <> - - - - - - - Content ID - {filePathData?.cas_id || ''} - - - {filePathData?.integrity_checksum && ( - - - - - Checksum - - - {filePathData?.integrity_checksum} - - - - )} - {pub_id && ( - - - - - Object ID - - {pub_id} - - - )} - - + ))} + {objectData?.id && ( + Add Tag} + side="left" + sideOffset={5} + alignOffset={-10} + > + + )}
- - )} + + + + + + Size + + {formatBytes(Number(filePathData?.size_in_bytes || 0))} + + + {fullObjectData.data?.media_data?.duration_seconds && ( + + + Duration + {fullObjectData.data.media_data.duration_seconds} + + )} + + + + + + + Created + {dayjs(item.date_created).format('MMM Do YYYY')} + + + + + + Indexed + + {dayjs(filePathData?.date_indexed).format('MMM Do YYYY')} + + + + + + {!isDir && objectData && ( + <> + + + + + + + Content ID + {filePathData?.cas_id || ''} + + + {filePathData?.integrity_checksum && ( + + + + Checksum + {filePathData?.integrity_checksum} + + + )} + {pub_id && ( + + + + Object ID + {pub_id} + + + )} + + + )} +
); }; - -const PathDisplay = ({ data }: { data: Extract }) => { - const location = useLibraryQuery(['locations.get', data.item.location_id]); - - return ( - <> - {location.data && ( - - URI - {`${location.data.path}/${data.item.materialized_path}`} - - )} - - ); -}; diff --git a/interface/app/$libraryId/Explorer/View.tsx b/interface/app/$libraryId/Explorer/View.tsx index 25f29749e..5a7016a19 100644 --- a/interface/app/$libraryId/Explorer/View.tsx +++ b/interface/app/$libraryId/Explorer/View.tsx @@ -1,7 +1,7 @@ +import { ExplorerItem, isPath, useLibraryContext } from '@sd/client'; import clsx from 'clsx'; import { HTMLAttributes, PropsWithChildren, memo, useRef } from 'react'; import { createSearchParams, useMatch, useNavigate } from 'react-router-dom'; -import { ExplorerItem, isPath, useLibraryContext } from '@sd/client'; import { getExplorerStore, useExplorerStore } from '~/hooks/useExplorerStore'; import { TOP_BAR_HEIGHT } from '../TopBar'; import DismissibleNotice from './DismissibleNotice'; @@ -32,7 +32,7 @@ export const ViewItem = ({ if (isPath(data) && data.item.is_dir) { navigate({ pathname: `/${library.uuid}/location/${getItemFilePath(data)?.location_id}`, - search: createSearchParams({ path: data.item.materialized_path }).toString() + search: createSearchParams({ path: `${data.item.materialized_path}${data.item.name}/` }).toString() }); getExplorerStore().selectedRowIndex = null; diff --git a/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx b/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx index 89ab29cca..3dad0f4ac 100644 --- a/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx +++ b/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx @@ -103,6 +103,7 @@ const StatusColors: Record = { Running: 'text-blue-500', Failed: 'text-red-500', Completed: 'text-green-500', + CompletedWithErrors: 'text-orange-500', Queued: 'text-yellow-500', Canceled: 'text-gray-500', Paused: 'text-gray-500' diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 62b67c463..c08738f1b 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -126,8 +126,6 @@ export type LibraryConfigWrapped = { uuid: string; config: LibraryConfig } */ export type Params = "Standard" | "Hardened" | "Paranoid" -export type Tag = { id: number; pub_id: number[]; name: string | null; color: string | null; total_objects: number | null; redundancy_goal: number | null; date_created: string; date_modified: string } - /** * `LocationUpdateArgs` is the argument received from the client using `rspc` to update a location. * It contains the id of the location to be updated, possible a name to change the current location's name @@ -140,12 +138,16 @@ export type LocationUpdateArgs = { id: number; name: string | null; generate_pre export type SetFavoriteArgs = { id: number; favorite: boolean } +export type FilePath = { id: number; pub_id: number[]; is_dir: boolean; cas_id: string | null; integrity_checksum: string | null; location_id: number; materialized_path: string; name: string; extension: string; size_in_bytes: string; inode: number[]; device: number[]; object_id: number | null; key_id: number | null; date_created: string; date_modified: string; date_indexed: string } + /** * Represents the operating system which the remote peer is running. * This is not used internally and predominantly is designed to be used for display purposes by the embedding application. */ export type OperatingSystem = "Windows" | "Linux" | "MacOS" | "Ios" | "Android" | { Other: string } +export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent" + /** * This is a stored key, and can be freely written to the database. * @@ -155,28 +157,20 @@ export type StoredKey = { uuid: string; version: StoredKeyVersion; key_type: Sto export type OnboardingConfig = { password: Protected; algorithm: Algorithm; hashing_algorithm: HashingAlgorithm } -export type Object = { id: number; pub_id: number[]; kind: number; key_id: number | null; hidden: boolean; favorite: boolean; important: boolean; has_thumbnail: boolean; has_thumbstrip: boolean; has_video_preview: boolean; ipfs_id: string | null; note: string | null; date_created: string; date_accessed: string | null } +export type FileDecryptorJobInit = { location_id: number; path_id: number; mount_associated_key: boolean; output_path: string | null; password: string | null; save_to_library: boolean | null } export type Volume = { name: string; mount_point: string; total_capacity: string; available_capacity: string; is_removable: boolean; disk_type: string | null; file_system: string | null; is_root_filesystem: boolean } export type TagCreateArgs = { name: string; color: string } -/** - * `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule. - * Note that `parameters` field **MUST** be a JSON object serialized to bytes. - * - * In case of `RuleKind::AcceptFilesByGlob` or `RuleKind::RejectFilesByGlob`, it will be a - * single string containing a glob pattern. - * - * In case of `RuleKind::AcceptIfChildrenDirectoriesArePresent` or `RuleKind::RejectIfChildrenDirectoriesArePresent` the - * `parameters` field must be a vector of strings containing the names of the directories. - */ -export type IndexerRuleCreateArgs = { kind: RuleKind; name: string; dry_run: boolean; parameters: string[] } - export type EditLibraryArgs = { id: string; name: string | null; description: string | null } export type LightScanArgs = { location_id: number; sub_path: string } +export type JobStatus = "Queued" | "Running" | "Completed" | "Canceled" | "Failed" | "Paused" | "CompletedWithErrors" + +export type FileEraserJobInit = { location_id: number; path_id: number; passes: string } + /** * This should be used for providing a nonce to encrypt/decrypt functions. * @@ -186,10 +180,6 @@ export type Nonce = { XChaCha20Poly1305: number[] } | { Aes256Gcm: number[] } export type UnlockKeyManagerArgs = { password: Protected; secret_key: Protected } -export type OptionalRange = { from: T | null; to: T | null } - -export type FileEncryptorJobInit = { location_id: number; path_id: number; key_uuid: string; algorithm: Algorithm; metadata: boolean; preview_media: boolean; output_path: string | null } - export type InvalidateOperationEvent = { key: string; arg: any; result: any | null } export type Location = { id: number; pub_id: number[]; node_id: number; name: string; path: string; total_capacity: number | null; available_capacity: number | null; is_archived: boolean; generate_preview_media: boolean; sync_preview_media: boolean; hidden: boolean; date_created: string } @@ -205,26 +195,22 @@ export type CRDTOperation = { node: string; timestamp: number; id: string; typ: */ export type Salt = number[] -export type FileCutterJobInit = { source_location_id: number; source_path_id: number; target_location_id: number; target_path: string } +export type Ordering = { name: boolean } -export type JobStatus = "Queued" | "Running" | "Completed" | "Canceled" | "Failed" | "Paused" +export type FileCopierJobInit = { source_location_id: number; source_path_id: number; target_location_id: number; target_path: string; target_file_name_suffix: string | null } -export type ObjectValidatorArgs = { id: number; path: string } +export type IndexerRule = { id: number; kind: number; name: string; default: boolean; parameters: number[]; date_created: string; date_modified: string } -export type FileEraserJobInit = { location_id: number; path_id: number; passes: string } - -export type MediaData = { id: number; pixel_width: number | null; pixel_height: number | null; longitude: number | null; latitude: number | null; fps: number | null; capture_device_make: string | null; capture_device_model: string | null; capture_device_software: string | null; duration_seconds: number | null; codecs: string | null; streams: number | null } - -export type FileDeleterJobInit = { location_id: number; path_id: number } - -export type FilePath = { id: number; pub_id: number[]; is_dir: boolean; cas_id: string | null; integrity_checksum: string | null; location_id: number; materialized_path: string; name: string; extension: string; size_in_bytes: string; inode: number[]; device: number[]; object_id: number | null; parent_id: number[] | null; key_id: number | null; date_created: string; date_modified: string; date_indexed: string } +export type IdentifyUniqueFilesArgs = { id: number; path: string } /** * These are all possible algorithms that can be used for encryption and decryption */ export type Algorithm = "XChaCha20Poly1305" | "Aes256Gcm" -export type JobReport = { id: string; name: string; action: string | null; data: number[] | null; metadata: any | null; is_background: boolean; created_at: string | null; started_at: string | null; completed_at: string | null; parent_id: string | null; status: JobStatus; task_count: number; completed_task_count: number; message: string } +export type JobReport = { id: string; name: string; action: string | null; data: number[] | null; metadata: any | null; is_background: boolean; errors_text: string[]; created_at: string | null; started_at: string | null; completed_at: string | null; parent_id: string | null; status: JobStatus; task_count: number; completed_task_count: number; message: string } + +export type Object = { id: number; pub_id: number[]; kind: number; key_id: number | null; hidden: boolean; favorite: boolean; important: boolean; has_thumbnail: boolean; has_thumbstrip: boolean; has_video_preview: boolean; ipfs_id: string | null; note: string | null; date_created: string; date_accessed: string | null } export type OwnedOperationItem = { id: any; data: OwnedOperationData } @@ -247,17 +233,33 @@ export type NodeState = ({ id: string; name: string; p2p_port: number | null; p2 export type RelationOperationData = "Create" | { Update: { field: string; value: any } } | "Delete" +export type FileDeleterJobInit = { location_id: number; path_id: number } + export type Node = { id: number; pub_id: number[]; name: string; platform: number; version: string | null; last_seen: string; timezone: string | null; date_created: string } +/** + * `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule. + * Note that `parameters` field **MUST** be a JSON object serialized to bytes. + * + * In case of `RuleKind::AcceptFilesByGlob` or `RuleKind::RejectFilesByGlob`, it will be a + * single string containing a glob pattern. + * + * In case of `RuleKind::AcceptIfChildrenDirectoriesArePresent` or `RuleKind::RejectIfChildrenDirectoriesArePresent` the + * `parameters` field must be a vector of strings containing the names of the directories. + */ +export type IndexerRuleCreateArgs = { kind: RuleKind; name: string; dry_run: boolean; parameters: string[] } + export type SharedOperationCreateData = { u: { [key: string]: any } } | "a" export type KeyAddArgs = { algorithm: Algorithm; hashing_algorithm: HashingAlgorithm; key: Protected; library_sync: boolean; automount: boolean } export type BuildInfo = { version: string; commit: string } +export type MediaData = { id: number; pixel_width: number | null; pixel_height: number | null; longitude: number | null; latitude: number | null; fps: number | null; capture_device_make: string | null; capture_device_model: string | null; capture_device_software: string | null; duration_seconds: number | null; codecs: string | null; streams: number | null } + export type SetNoteArgs = { id: number; note: string | null } -export type RuleKind = "AcceptFilesByGlob" | "RejectFilesByGlob" | "AcceptIfChildrenDirectoriesArePresent" | "RejectIfChildrenDirectoriesArePresent" +export type FileEncryptorJobInit = { location_id: number; path_id: number; key_uuid: string; algorithm: Algorithm; metadata: boolean; preview_media: boolean; output_path: string | null } /** * `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location. @@ -273,15 +275,19 @@ export type ExplorerItem = { type: "Path"; has_thumbnail: boolean; item: FilePat */ export type LibraryArgs = { library_id: string; arg: T } -export type IdentifyUniqueFilesArgs = { id: number; path: string } +export type FileCutterJobInit = { source_location_id: number; source_path_id: number; target_location_id: number; target_path: string } export type OwnedOperationData = { Create: { [key: string]: any } } | { CreateMany: { values: ([any, { [key: string]: any }])[]; skip_duplicates: boolean } } | { Update: { [key: string]: any } } | "Delete" export type SharedOperationData = SharedOperationCreateData | { field: string; value: any } | null +export type SearchData = { cursor: number[] | null; items: T[] } + +export type OptionalRange = { from: T | null; to: T | null } + export type TagUpdateArgs = { id: number; name: string | null; color: string | null } -export type FileCopierJobInit = { source_location_id: number; source_path_id: number; target_location_id: number; target_path: string; target_file_name_suffix: string | null } +export type ObjectValidatorArgs = { id: number; path: string } export type TagAssignArgs = { object_id: number; tag_id: number; unassign: boolean } @@ -294,7 +300,9 @@ export type HashingAlgorithm = { name: "Argon2id"; params: Params } | { name: "B export type RenameFileArgs = { location_id: number; file_name: string; new_file_name: string } -export type FilePathWithObject = { id: number; pub_id: number[]; is_dir: boolean; cas_id: string | null; integrity_checksum: string | null; location_id: number; materialized_path: string; name: string; extension: string; size_in_bytes: string; inode: number[]; device: number[]; object_id: number | null; parent_id: number[] | null; key_id: number | null; date_created: string; date_modified: string; date_indexed: string; object: Object | null } +export type Tag = { id: number; pub_id: number[]; name: string | null; color: string | null; total_objects: number | null; redundancy_goal: number | null; date_created: string; date_modified: string } + +export type FilePathWithObject = { id: number; pub_id: number[]; is_dir: boolean; cas_id: string | null; integrity_checksum: string | null; location_id: number; materialized_path: string; name: string; extension: string; size_in_bytes: string; inode: number[]; device: number[]; object_id: number | null; key_id: number | null; date_created: string; date_modified: string; date_indexed: string; object: Object | null } export type LocationWithIndexerRules = { id: number; pub_id: number[]; node_id: number; name: string; path: string; total_capacity: number | null; available_capacity: number | null; is_archived: boolean; generate_preview_media: boolean; sync_preview_media: boolean; hidden: boolean; date_created: string; indexer_rules: ({ indexer_rule: IndexerRule })[] } @@ -303,24 +311,16 @@ export type LocationWithIndexerRules = { id: number; pub_id: number[]; node_id: */ export type LibraryConfig = { name: string; description: string } -export type SearchData = { cursor: number[] | null; items: T[] } - export type CreateLibraryArgs = { name: string } -export type FileDecryptorJobInit = { location_id: number; path_id: number; mount_associated_key: boolean; output_path: string | null; password: string | null; save_to_library: boolean | null } - export type AutomountUpdateArgs = { uuid: string; status: boolean } export type Protected = T -export type Ordering = { name: boolean } - export type Statistics = { id: number; date_captured: string; total_object_count: number; library_db_size: string; total_bytes_used: string; total_bytes_capacity: string; total_unique_bytes: string; total_bytes_free: string; preview_media_bytes: string } export type RestoreBackupArgs = { password: Protected; secret_key: Protected; path: string } -export type IndexerRule = { id: number; kind: number; name: string; default: boolean; parameters: number[]; date_created: string; date_modified: string } - export type RelationOperation = { relation_item: string; relation_group: string; relation: string; data: RelationOperationData } /**