From b2bee3d2232f09b14f3b023e37d55f3d2b5d2de6 Mon Sep 17 00:00:00 2001 From: "Ericson \"Fogo\" Soares" Date: Mon, 8 Apr 2024 11:32:44 -0300 Subject: [PATCH] [ENG-1628] Write new indexer with the task system (#2161) * Moving file-path-helper to a sub-crate on core * Parallel walker tested and working * Change inner core crate names to sd-core-* naming scheme * Moving stuff around * Save and Update tasks for the new indexer job * Some initial drafts on the new job system * More drafts on job system * Changing walker task to a more explicit state machine Also drafting more of job system * More drafting on job resume * Completed the draft on job system inner workings * New job context abstraction to decouple library stuff from job system * Properly use composition on task dispatcher * First draft on indexer job * Job serialization * Handling ancestors in the distributed walker for indexer * Saving computed directories sizes on a location to db * Enable a way to check if jobs are running in a location * Progress reporting on indexer job * Reorganizing modules * Shallow indexer * Rust fmt * Attempting windows CI fix * Attempting windows CI fix again * Attempting windows CI fix again --- Cargo.lock | Bin 278949 -> 280450 bytes Cargo.toml | 6 + apps/cli/Cargo.toml | 1 + apps/desktop/src-tauri/Cargo.toml | 6 +- .../modules/sd-core/android/crate/Cargo.toml | 1 + apps/mobile/modules/sd-core/core/Cargo.toml | 1 + .../modules/sd-core/ios/crate/Cargo.toml | 1 + apps/server/Cargo.toml | 1 + core/Cargo.toml | 38 +- .../crates}/file-path-helper/Cargo.toml | 10 +- .../crates}/file-path-helper/README.md | 0 .../src/isolated_file_path_data.rs | 125 +- .../crates}/file-path-helper/src/lib.rs | 193 +-- core/crates/heavy-lifting/Cargo.toml | 50 + core/crates/heavy-lifting/src/indexer/job.rs | 738 ++++++++ core/crates/heavy-lifting/src/indexer/mod.rs | 542 ++++++ .../heavy-lifting/src/indexer/shallow.rs | 261 +++ .../heavy-lifting/src/indexer/tasks/mod.rs | 3 + .../heavy-lifting/src/indexer/tasks/saver.rs | 218 +++ .../src/indexer/tasks/updater.rs | 236 +++ .../heavy-lifting/src/indexer/tasks/walker.rs | 1516 +++++++++++++++++ .../heavy-lifting/src/job_system/error.rs | 61 + .../heavy-lifting/src/job_system/job.rs | 784 +++++++++ .../heavy-lifting/src/job_system/mod.rs | 313 ++++ .../heavy-lifting/src/job_system/report.rs | 359 ++++ .../heavy-lifting/src/job_system/runner.rs | 535 ++++++ .../heavy-lifting/src/job_system/store.rs | 219 +++ .../heavy-lifting/src/job_system/utils.rs | 16 + core/crates/heavy-lifting/src/lib.rs | 71 + core/crates/indexer-rules/Cargo.toml | 30 + .../indexer-rules/src/lib.rs} | 527 +++--- .../indexer-rules/src}/seed.rs | 29 +- core/crates/indexer-rules/src/serde_impl.rs | 214 +++ core/crates/prisma-helpers/Cargo.toml | 16 + core/crates/prisma-helpers/src/lib.rs | 226 +++ core/crates/sync/Cargo.toml | 1 + core/crates/sync/src/manager.rs | 7 + core/crates/sync/tests/lib.rs | 1 + core/prisma/schema.prisma | 9 +- core/src/api/ephemeral_files.rs | 12 +- core/src/api/files.rs | 21 +- core/src/api/jobs.rs | 4 +- core/src/api/labels.rs | 11 +- core/src/api/locations.rs | 25 +- core/src/api/search/file_path.rs | 3 +- core/src/api/search/mod.rs | 7 +- core/src/cloud/sync/ingest.rs | 6 +- core/src/cloud/sync/receive.rs | 18 +- core/src/cloud/sync/send.rs | 11 +- core/src/custom_uri/mod.rs | 10 +- core/src/library/config.rs | 4 +- core/src/library/library.rs | 4 +- core/src/library/manager/error.rs | 9 +- core/src/library/manager/mod.rs | 12 +- core/src/location/error.rs | 3 +- core/src/location/indexer/mod.rs | 26 +- core/src/location/indexer/old_indexer_job.rs | 9 +- core/src/location/indexer/old_shallow.rs | 7 +- core/src/location/indexer/old_walk.rs | 45 +- core/src/location/manager/mod.rs | 7 +- core/src/location/manager/watcher/ios.rs | 5 +- core/src/location/manager/watcher/macos.rs | 5 +- core/src/location/manager/watcher/utils.rs | 16 +- core/src/location/manager/watcher/windows.rs | 3 +- core/src/location/mod.rs | 65 +- core/src/location/non_indexed.rs | 19 +- core/src/object/fs/error.rs | 3 +- core/src/object/fs/mod.rs | 4 +- core/src/object/fs/old_copy.rs | 3 +- core/src/object/fs/old_cut.rs | 3 +- core/src/object/fs/old_erase.rs | 3 +- core/src/object/media/media_data_extractor.rs | 4 +- .../object/media/old_media_processor/job.rs | 8 +- .../object/media/old_media_processor/mod.rs | 4 +- .../media/old_media_processor/shallow.rs | 8 +- core/src/object/mod.rs | 6 - core/src/object/old_file_identifier/mod.rs | 10 +- .../old_file_identifier_job.rs | 6 +- .../src/object/old_file_identifier/shallow.rs | 6 +- core/src/object/validation/mod.rs | 2 +- .../object/validation/old_validator_job.rs | 6 +- core/src/old_job/report.rs | 18 +- crates/ai/Cargo.toml | 8 +- crates/ai/src/old_image_labeler/old_actor.rs | 3 +- crates/ai/src/old_image_labeler/process.rs | 4 +- crates/cloud-api/Cargo.toml | 22 +- crates/p2p-block/Cargo.toml | 10 +- crates/p2p-proto/Cargo.toml | 7 +- crates/p2p-tunnel/Cargo.toml | 2 + crates/prisma-cli/Cargo.toml | 1 + crates/prisma/Cargo.toml | 1 + crates/sync/example/Cargo.toml | 11 +- crates/task-system/Cargo.toml | 6 +- crates/task-system/src/lib.rs | 11 +- crates/task-system/src/message.rs | 4 +- crates/task-system/src/system.rs | 101 +- crates/task-system/src/task.rs | 180 +- crates/task-system/src/worker/mod.rs | 16 +- crates/task-system/src/worker/run.rs | 2 +- crates/task-system/src/worker/runner.rs | 14 +- crates/task-system/tests/common/actors.rs | 11 +- crates/task-system/tests/common/jobs.rs | 14 +- crates/task-system/tests/integration_test.rs | 8 +- crates/utils/Cargo.toml | 1 + crates/utils/src/db.rs | 17 + packages/client/src/core.ts | 2 +- 106 files changed, 7450 insertions(+), 832 deletions(-) rename {crates => core/crates}/file-path-helper/Cargo.toml (71%) rename {crates => core/crates}/file-path-helper/README.md (100%) rename {crates => core/crates}/file-path-helper/src/isolated_file_path_data.rs (89%) rename {crates => core/crates}/file-path-helper/src/lib.rs (78%) create mode 100644 core/crates/heavy-lifting/Cargo.toml create mode 100644 core/crates/heavy-lifting/src/indexer/job.rs create mode 100644 core/crates/heavy-lifting/src/indexer/mod.rs create mode 100644 core/crates/heavy-lifting/src/indexer/shallow.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/mod.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/saver.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/updater.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/walker.rs create mode 100644 core/crates/heavy-lifting/src/job_system/error.rs create mode 100644 core/crates/heavy-lifting/src/job_system/job.rs create mode 100644 core/crates/heavy-lifting/src/job_system/mod.rs create mode 100644 core/crates/heavy-lifting/src/job_system/report.rs create mode 100644 core/crates/heavy-lifting/src/job_system/runner.rs create mode 100644 core/crates/heavy-lifting/src/job_system/store.rs create mode 100644 core/crates/heavy-lifting/src/job_system/utils.rs create mode 100644 core/crates/heavy-lifting/src/lib.rs create mode 100644 core/crates/indexer-rules/Cargo.toml rename core/{src/location/indexer/rules/mod.rs => crates/indexer-rules/src/lib.rs} (67%) rename core/{src/location/indexer/rules => crates/indexer-rules/src}/seed.rs (94%) create mode 100644 core/crates/indexer-rules/src/serde_impl.rs create mode 100644 core/crates/prisma-helpers/Cargo.toml create mode 100644 core/crates/prisma-helpers/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index eb65a08d492e2d45b5ffa2e507444184333f1c66..8c376e82493db6232dcc5d92e741f31ec0b9653f 100644 GIT binary patch delta 653 zcmbV~PiWI{6vxT?P5UQBmen<_sEi_duyINMWEmcYCj~E}Q<&_~WWQh9Vz!l}&W#Px z&C@OpM*0--sNS}VEH@7#h=TvFir`>)@v`a&9S&v|Ptu))cl*BgK3~4?Jy{Uu?+bVC zanE6G1Rq_8IIfJsQEXo!3H(@wD5grB=zSf5YY`k8CYOgwnn}%qq7^AEsj{i4GNl8B z!F=9edY+cXsLC``R*QyNG?ap($htu_#gGf?;DAwN*wVNZJ{|=rWOxC?TEbNy(#f$& zn@M%M=8Wr7m05LWJJ{cXj8|@fcp&UKANEw4J~bm*HM3EhsAO~TzN|x~P^nE&HqC6w zzG*S%A7!7txZp$1d+oz-&V4}4{rIj=rg}P=$i>MiT(Y@?xVQ-kyv>ttY;Hn-tYS?X z4r^p{clP@%Rv-J_=fJ>kenH(!NyVEhSTmP z(u-@ma1t|Z5b)MRNaA7}y7AowlJ4n@2JfzONvzL8%KboiTx~)GH@4syKHY)zp3_74 zDiPR?aYRHOf=}>vL0QmyY}b8JEES0zQ|Gm;w!#Q2GH2mJ$OTw_o!Dr39C7_yRbWkNg4{ lx2X66`vJFP{{q$rm!tXuW|t)U0&cfD2Lm`%w}D#&0ql&@GP?i( diff --git a/Cargo.toml b/Cargo.toml index c335d2aa9..437a2dbf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,9 +59,12 @@ chrono = "0.4.31" clap = "4.4.7" futures = "0.3.30" futures-concurrency = "7.4.3" +globset = "^0.4.13" hex = "0.4.3" http = "0.2.9" image = "0.24.7" +itertools = "0.12.0" +lending-stream = "1.0.0" normpath = "1.1.1" once_cell = "1.18.0" pin-project-lite = "0.2.13" @@ -84,6 +87,9 @@ uhlc = "=0.5.2" uuid = "1.5.0" webp = "0.2.6" +[workspace.dev-dependencies] +tracing-test = { version = "^0.2.4" } + [patch.crates-io] # Proper IOS Support if-watch = { git = "https://github.com/oscartbeaumont/if-watch.git", rev = "a92c17d3f85c1c6fb0afeeaf6c2b24d0b147e8c3" } diff --git a/apps/cli/Cargo.toml b/apps/cli/Cargo.toml index 58596b805..1b436601f 100644 --- a/apps/cli/Cargo.toml +++ b/apps/cli/Cargo.toml @@ -6,6 +6,7 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] +# Spacedrive Sub-crates sd-crypto = { path = "../../crates/crypto" } anyhow = { workspace = true } diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml index 872838386..73b053dc0 100644 --- a/apps/desktop/src-tauri/Cargo.toml +++ b/apps/desktop/src-tauri/Cargo.toml @@ -9,7 +9,11 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] -sd-core = { path = "../../../core", features = ["ffmpeg", "heif"] } +# Spacedrive Sub-crates +sd-core = { path = "../../../core", features = [ + "ffmpeg", + "heif", +] } sd-fda = { path = "../../../crates/fda" } sd-prisma = { path = "../../../crates/prisma" } diff --git a/apps/mobile/modules/sd-core/android/crate/Cargo.toml b/apps/mobile/modules/sd-core/android/crate/Cargo.toml index 128d98e26..31a6cfb93 100644 --- a/apps/mobile/modules/sd-core/android/crate/Cargo.toml +++ b/apps/mobile/modules/sd-core/android/crate/Cargo.toml @@ -11,6 +11,7 @@ edition = { workspace = true } crate-type = ["cdylib"] [dependencies] +# Spacedrive Sub-crates sd-mobile-core = { path = "../../core" } # FFI diff --git a/apps/mobile/modules/sd-core/core/Cargo.toml b/apps/mobile/modules/sd-core/core/Cargo.toml index a32031d2f..25685d0f0 100644 --- a/apps/mobile/modules/sd-core/core/Cargo.toml +++ b/apps/mobile/modules/sd-core/core/Cargo.toml @@ -7,6 +7,7 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] +# Spacedrive Sub-crates sd-core = { path = "../../../../../core", features = [ "mobile", ], default-features = false } diff --git a/apps/mobile/modules/sd-core/ios/crate/Cargo.toml b/apps/mobile/modules/sd-core/ios/crate/Cargo.toml index e62a5ecaa..11ec5734c 100644 --- a/apps/mobile/modules/sd-core/ios/crate/Cargo.toml +++ b/apps/mobile/modules/sd-core/ios/crate/Cargo.toml @@ -14,4 +14,5 @@ edition = { workspace = true } crate-type = ["staticlib"] [dependencies] +# Spacedrive Sub-crates sd-mobile-core = { path = "../../core" } diff --git a/apps/server/Cargo.toml b/apps/server/Cargo.toml index 47fb96445..1ccf74706 100644 --- a/apps/server/Cargo.toml +++ b/apps/server/Cargo.toml @@ -11,6 +11,7 @@ assets = [] ai-models = ["sd-core/ai"] [dependencies] +# Spacedrive Sub-crates sd-core = { path = "../../core", features = [ "ffmpeg", "heif", diff --git a/core/Cargo.toml b/core/Cargo.toml index 3b5c0c1b1..4aa43b6ad 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,11 +19,18 @@ ai = ["dep:sd-ai"] crypto = ["dep:sd-crypto"] [dependencies] -# Sub-crates -sd-cache = { path = "../crates/cache" } +# Inner Core Sub-crates +sd-core-file-path-helper = { path = "./crates/file-path-helper" } +sd-core-heavy-lifting = { path = "./crates/heavy-lifting" } +sd-core-indexer-rules = { path = "./crates/indexer-rules" } +sd-core-prisma-helpers = { path = "./crates/prisma-helpers" } sd-core-sync = { path = "./crates/sync" } -# sd-cloud-api = { path = "../crates/cloud-api" } -sd-file-path-helper = { path = "../crates/file-path-helper" } + +# Spacedrive Sub-crates +sd-actors = { version = "0.1.0", path = "../crates/actors" } +sd-ai = { path = "../crates/ai", optional = true } +sd-cache = { path = "../crates/cache" } +sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" } sd-crypto = { path = "../crates/crypto", features = [ "sys", "tokio", @@ -41,10 +48,8 @@ sd-p2p-block = { path = "../crates/p2p-block" } sd-p2p-proto = { path = "../crates/p2p-proto" } sd-p2p-tunnel = { path = "../crates/p2p-tunnel" } sd-prisma = { path = "../crates/prisma" } -sd-ai = { path = "../crates/ai", optional = true } sd-sync = { path = "../crates/sync" } sd-utils = { path = "../crates/utils" } -sd-cloud-api = { version = "0.1.0", path = "../crates/cloud-api" } # Workspace dependencies async-channel = { workspace = true } @@ -56,6 +61,7 @@ chrono = { workspace = true, features = ["serde"] } futures = { workspace = true } futures-concurrency = { workspace = true } image = { workspace = true } +itertools = { workspace = true } normpath = { workspace = true, features = ["localization"] } once_cell = { workspace = true } pin-project-lite = { workspace = true } @@ -63,6 +69,7 @@ prisma-client-rust = { workspace = true, features = ["rspc"] } regex = { workspace = true } reqwest = { workspace = true, features = ["json", "native-tls-vendored"] } rmp-serde = { workspace = true } +rmpv = { workspace = true } rspc = { workspace = true, features = [ "axum", "uuid", @@ -98,22 +105,25 @@ webp = { workspace = true } # Specific Core dependencies async-recursion = "1.0.5" async-stream = "0.3.5" +aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] } +aws-config = "1.0.3" +aws-credential-types = "1.0.3" +base91 = "0.1.0" bytes = "1.5.0" ctor = "0.2.5" directories = "5.0.1" flate2 = "1.0.28" -globset = { version = "^0.4.13", features = ["serde1"] } hostname = "0.3.1" http-body = "0.4.5" http-range = "0.1.5" +hyper = { version = "=0.14.28", features = ["http1", "server", "client"] } int-enum = "0.5.0" -itertools = "0.12.0" libc = "0.2.153" mini-moka = "0.10.2" notify = { git = "https://github.com/notify-rs/notify.git", rev = "c3929ed114fbb0bc7457a9a498260461596b00ca", default-features = false, features = [ "macos_fsevent", ] } -rmpv = { workspace = true } +rmp = "0.8.12" serde-hashkey = "0.4.5" serde_repr = "0.1" serde_with = "3.4.0" @@ -121,14 +131,7 @@ slotmap = "1.0.6" static_assertions = "1.1.0" sysinfo = "0.29.10" tar = "0.4.40" -aws-sdk-s3 = { version = "1.5.0", features = ["behavior-version-latest"] } -aws-config = "1.0.3" -aws-credential-types = "1.0.3" -base91 = "0.1.0" -sd-actors = { version = "0.1.0", path = "../crates/actors" } tower-service = "0.3.2" -hyper = { version = "=0.14.28", features = ["http1", "server", "client"] } -rmp = "0.8.12" # Override features of transitive dependencies [dependencies.openssl] @@ -151,5 +154,6 @@ icrate = { version = "0.1.0", features = [ ] } [dev-dependencies] -tracing-test = "^0.2.4" +tracing-test = { workspace.dev-dependencies = true } aovec = "1.1.0" +globset = { workspace = true } diff --git a/crates/file-path-helper/Cargo.toml b/core/crates/file-path-helper/Cargo.toml similarity index 71% rename from crates/file-path-helper/Cargo.toml rename to core/crates/file-path-helper/Cargo.toml index 3529f40cb..aceebb563 100644 --- a/crates/file-path-helper/Cargo.toml +++ b/core/crates/file-path-helper/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "sd-file-path-helper" +name = "sd-core-file-path-helper" version = "0.1.0" authors = ["Ericson Soares "] readme = "README.md" @@ -9,8 +9,12 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] -sd-prisma = { path = "../prisma" } -sd-utils = { path = "../utils" } +# Inner Core Sub-crates +sd-core-prisma-helpers = { path = "../prisma-helpers" } + +# Spacedrive Sub-crates +sd-prisma = { path = "../../../crates/prisma" } +sd-utils = { path = "../../../crates/utils" } chrono = { workspace = true, features = ["serde"] } prisma-client-rust = { workspace = true } diff --git a/crates/file-path-helper/README.md b/core/crates/file-path-helper/README.md similarity index 100% rename from crates/file-path-helper/README.md rename to core/crates/file-path-helper/README.md diff --git a/crates/file-path-helper/src/isolated_file_path_data.rs b/core/crates/file-path-helper/src/isolated_file_path_data.rs similarity index 89% rename from crates/file-path-helper/src/isolated_file_path_data.rs rename to core/crates/file-path-helper/src/isolated_file_path_data.rs index b82022124..21852fe18 100644 --- a/crates/file-path-helper/src/isolated_file_path_data.rs +++ b/core/crates/file-path-helper/src/isolated_file_path_data.rs @@ -1,3 +1,10 @@ +use sd_core_prisma_helpers::{ + file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator, + file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file, + file_path_to_isolate, file_path_to_isolate_with_id, file_path_to_isolate_with_pub_id, + file_path_walker, file_path_with_object, +}; + use sd_prisma::prisma::{file_path, location}; use sd_utils::error::NonUtf8PathError; @@ -11,12 +18,7 @@ use std::{ use regex::RegexSet; use serde::{Deserialize, Serialize}; -use super::{ - file_path_for_file_identifier, file_path_for_media_processor, file_path_for_object_validator, - file_path_to_full_path, file_path_to_handle_custom_uri, file_path_to_handle_p2p_serve_file, - file_path_to_isolate, file_path_to_isolate_with_id, file_path_walker, file_path_with_object, - FilePathError, -}; +use super::FilePathError; static FORBIDDEN_FILE_NAMES: OnceLock = OnceLock::new(); @@ -30,7 +32,7 @@ pub struct IsolatedFilePathDataParts<'a> { relative_path: &'a str, } -#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq, Clone, Default)] #[non_exhaustive] pub struct IsolatedFilePathData<'a> { // WARN! These fields MUST NOT be changed outside the location module, that's why they have this visibility @@ -88,14 +90,22 @@ impl IsolatedFilePathData<'static> { } impl<'a> IsolatedFilePathData<'a> { - pub fn location_id(&self) -> location::id::Type { + #[must_use] + pub const fn location_id(&self) -> location::id::Type { self.location_id } + #[must_use] pub fn extension(&self) -> &str { self.extension.as_ref() } + #[must_use] + pub const fn is_dir(&self) -> bool { + self.is_dir + } + + #[must_use] pub fn is_root(&self) -> bool { self.is_dir && self.materialized_path == "/" @@ -103,6 +113,7 @@ impl<'a> IsolatedFilePathData<'a> { && self.relative_path.is_empty() } + #[must_use] pub fn to_parts(&self) -> IsolatedFilePathDataParts<'_> { IsolatedFilePathDataParts { location_id: self.location_id, @@ -114,6 +125,12 @@ impl<'a> IsolatedFilePathData<'a> { } } + /// Return the `IsolatedFilePath` for the parent of the current file or directory. + /// + /// # Panics + /// May panic if the materialized path was malformed, without a slash for the parent directory. + /// Considering that the parent can be just `/` for the root directory. + #[must_use] pub fn parent(&'a self) -> Self { let (parent_path_str, name, relative_path) = if self.materialized_path == "/" { ("/", "", "") @@ -124,7 +141,7 @@ impl<'a> IsolatedFilePathData<'a> { .expect("malformed materialized path at `parent` method"); ( - &self.materialized_path[..last_slash_idx + 1], + &self.materialized_path[..=last_slash_idx], &self.materialized_path[last_slash_idx + 1..trailing_slash_idx], &self.materialized_path[1..trailing_slash_idx], ) @@ -159,6 +176,7 @@ impl<'a> IsolatedFilePathData<'a> { } } + #[must_use] pub fn full_name(&self) -> String { if self.extension.is_empty() { self.name.to_string() @@ -167,6 +185,7 @@ impl<'a> IsolatedFilePathData<'a> { } } + #[must_use] pub fn materialized_path_for_children(&self) -> Option { if self.materialized_path == "/" && self.name.is_empty() && self.is_dir { // We're at the root file_path @@ -186,19 +205,21 @@ impl<'a> IsolatedFilePathData<'a> { )); } - if let Some(last_dot_idx) = source.rfind('.') { - if last_dot_idx == 0 { - // The dot is the first character, so it's a hidden file - Ok((source, "")) - } else { - Ok((&source[..last_dot_idx], &source[last_dot_idx + 1..])) - } - } else { - // It's a file without extension - Ok((source, "")) - } + source.rfind('.').map_or_else( + || Ok((source, "")), // It's a file without extension + |last_dot_idx| { + if last_dot_idx == 0 { + // The dot is the first character, so it's a hidden file + Ok((source, "")) + } else { + Ok((&source[..last_dot_idx], &source[last_dot_idx + 1..])) + } + }, + ) } + #[allow(clippy::missing_panics_doc)] // Don't actually panic as the regexes are hardcoded + #[must_use] pub fn accept_file_name(name: &str) -> bool { let reg = { // Maybe we should enforce windows more restrictive rules on all platforms? @@ -224,6 +245,7 @@ impl<'a> IsolatedFilePathData<'a> { !reg.is_match(name) } + #[must_use] pub fn separate_path_name_and_extension_from_str( source: &'a str, is_dir: bool, @@ -253,20 +275,23 @@ impl<'a> IsolatedFilePathData<'a> { } else { let first_name_char_idx = source.rfind('/').unwrap_or(0) + 1; let end_idx = first_name_char_idx - 1; - if let Some(last_dot_relative_idx) = source[first_name_char_idx..].rfind('.') { - let last_dot_idx = first_name_char_idx + last_dot_relative_idx; - ( - &source[..end_idx], - Some(&source[first_name_char_idx..last_dot_idx]), - Some(&source[last_dot_idx + 1..]), - ) - } else { - ( - &source[..end_idx], - Some(&source[first_name_char_idx..]), - None, - ) - } + source[first_name_char_idx..].rfind('.').map_or_else( + || { + ( + &source[..end_idx], + Some(&source[first_name_char_idx..]), + None, + ) + }, + |last_dot_relative_idx| { + let last_dot_idx = first_name_char_idx + last_dot_relative_idx; + ( + &source[..end_idx], + Some(&source[first_name_char_idx..last_dot_idx]), + Some(&source[last_dot_idx + 1..]), + ) + }, + ) } } @@ -282,6 +307,7 @@ impl<'a> IsolatedFilePathData<'a> { .unwrap_or_default() } + #[must_use] pub fn from_db_data( location_id: location::id::Type, is_dir: bool, @@ -465,6 +491,7 @@ mod macros { impl_from_db!( file_path, file_path_to_isolate, + file_path_to_isolate_with_pub_id, file_path_walker, file_path_to_isolate_with_id, file_path_with_object @@ -514,19 +541,21 @@ pub fn extract_normalized_materialized_path_str( path: path.into(), })? .parent() - .map(|materialized_path| { - materialized_path - .to_str() - .map(|materialized_path_str| { - if !materialized_path_str.is_empty() { - format!("/{}/", materialized_path_str.replace('\\', "/")) - } else { - "/".to_string() - } - }) - .ok_or_else(|| NonUtf8PathError(path.into())) - }) - .unwrap_or_else(|| Ok("/".to_string())) + .map_or_else( + || Ok("/".to_string()), + |materialized_path| { + materialized_path + .to_str() + .map(|materialized_path_str| { + if materialized_path_str.is_empty() { + "/".to_string() + } else { + format!("/{}/", materialized_path_str.replace('\\', "/")) + } + }) + .ok_or_else(|| NonUtf8PathError(path.into())) + }, + ) .map_err(Into::into) } @@ -544,6 +573,7 @@ fn assemble_relative_path( } } +#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect` pub fn join_location_relative_path( location_path: impl AsRef, relative_path: impl AsRef, @@ -561,6 +591,7 @@ pub fn join_location_relative_path( }) } +#[allow(clippy::missing_panics_doc)] // Don't actually panic as we check before `expect` pub fn push_location_relative_path( mut location_path: PathBuf, relative_path: impl AsRef, diff --git a/crates/file-path-helper/src/lib.rs b/core/crates/file-path-helper/src/lib.rs similarity index 78% rename from crates/file-path-helper/src/lib.rs rename to core/crates/file-path-helper/src/lib.rs index c38b1a2bd..ae6a3bbde 100644 --- a/crates/file-path-helper/src/lib.rs +++ b/core/crates/file-path-helper/src/lib.rs @@ -1,3 +1,32 @@ +#![warn( + clippy::all, + clippy::pedantic, + clippy::correctness, + clippy::perf, + clippy::style, + clippy::suspicious, + clippy::complexity, + clippy::nursery, + clippy::unwrap_used, + unused_qualifications, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unused_allocation, + clippy::unnecessary_cast, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::dbg_macro, + clippy::deprecated_cfg_attr, + clippy::separated_literal_suffix, + deprecated +)] +#![forbid(deprecated_in_future)] +#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] + use sd_prisma::prisma::{file_path, location, PrismaClient}; use sd_utils::error::{FileIOError, NonUtf8PathError}; @@ -21,107 +50,6 @@ pub use isolated_file_path_data::{ IsolatedFilePathDataParts, }; -// File Path selectables! -file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id }); -file_path::select!(file_path_just_pub_id_materialized_path { - pub_id - materialized_path -}); -file_path::select!(file_path_for_file_identifier { - id - pub_id - materialized_path - date_created - is_dir - name - extension - object_id -}); -file_path::select!(file_path_for_object_validator { - pub_id - materialized_path - is_dir - name - extension - integrity_checksum -}); -file_path::select!(file_path_for_media_processor { - id - materialized_path - is_dir - name - extension - cas_id - object_id -}); -file_path::select!(file_path_to_isolate { - location_id - materialized_path - is_dir - name - extension -}); -file_path::select!(file_path_to_isolate_with_id { - id - location_id - materialized_path - is_dir - name - extension -}); -file_path::select!(file_path_walker { - pub_id - location_id - object_id - materialized_path - is_dir - name - extension - date_modified - inode - size_in_bytes_bytes - hidden -}); -file_path::select!(file_path_to_handle_custom_uri { - pub_id - materialized_path - is_dir - name - extension - location: select { - id - path - instance: select { - identity - remote_identity - } - } -}); -file_path::select!(file_path_to_handle_p2p_serve_file { - materialized_path - name - extension - is_dir // For isolated file path - location: select { - id - path - } -}); -file_path::select!(file_path_to_full_path { - id - materialized_path - is_dir - name - extension - location: select { - id - path - } -}); - -// File Path includes! -file_path::include!(file_path_with_object { object }); - #[derive(Clone, Copy, Debug, Serialize, Deserialize)] pub struct FilePathMetadata { pub inode: u64, @@ -140,8 +68,7 @@ pub fn path_is_hidden(path: impl AsRef, metadata: &Metadata) -> bool { .as_ref() .file_name() .and_then(OsStr::to_str) - .map(|s| s.starts_with('.')) - .unwrap_or_default() + .is_some_and(|s| s.starts_with('.')) { return true; } @@ -176,10 +103,8 @@ pub fn path_is_hidden(path: impl AsRef, metadata: &Metadata) -> bool { } impl FilePathMetadata { - pub async fn from_path( - path: impl AsRef, - metadata: &Metadata, - ) -> Result { + pub fn from_path(path: impl AsRef, metadata: &Metadata) -> Result { + let path = path.as_ref(); let inode = { #[cfg(target_family = "unix")] { @@ -188,13 +113,21 @@ impl FilePathMetadata { #[cfg(target_family = "windows")] { - get_inode_from_path(path.as_ref()).await? + use winapi_util::{file::information, Handle}; + + let info = tokio::task::block_in_place(|| { + Handle::from_path_any(path) + .and_then(|ref handle| information(handle)) + .map_err(|e| FileIOError::from((path, e))) + })?; + + info.file_index() } }; Ok(Self { inode, - hidden: path_is_hidden(path.as_ref(), metadata), + hidden: path_is_hidden(path, metadata), size_in_bytes: metadata.len(), created_at: metadata.created_or_now().into(), modified_at: metadata.modified_or_now().into(), @@ -242,6 +175,7 @@ pub enum FilePathError { InvalidFilenameAndExtension(String), } +#[must_use] pub fn filter_existing_file_path_params( IsolatedFilePathData { materialized_path, @@ -250,7 +184,7 @@ pub fn filter_existing_file_path_params( name, extension, .. - }: &IsolatedFilePathData, + }: &IsolatedFilePathData<'_>, ) -> Vec { vec![ file_path::location_id::equals(Some(*location_id)), @@ -294,9 +228,10 @@ pub fn loose_find_existing_file_path_params( ]) } +#[allow(clippy::missing_panics_doc)] // Don't actually panic pub async fn ensure_sub_path_is_in_location( - location_path: impl AsRef, - sub_path: impl AsRef, + location_path: impl AsRef + Send, + sub_path: impl AsRef + Send, ) -> Result { let mut sub_path = sub_path.as_ref(); let location_path = location_path.as_ref(); @@ -311,7 +246,9 @@ pub async fn ensure_sub_path_is_in_location( .expect("we just checked that it starts with the separator"); } - if !sub_path.starts_with(location_path) { + if sub_path.starts_with(location_path) { + Ok(sub_path.to_path_buf()) + } else { // If the sub_path doesn't start with the location_path, we have to check if it's a // materialized path received from the frontend, then we check if the full path exists let full_path = location_path.join(sub_path); @@ -324,24 +261,22 @@ pub async fn ensure_sub_path_is_in_location( }), Err(e) => Err(FileIOError::from((full_path, e)).into()), } - } else { - Ok(sub_path.to_path_buf()) } } pub async fn ensure_file_path_exists( - sub_path: impl AsRef, + sub_path: impl AsRef + Send, iso_file_path: &IsolatedFilePathData<'_>, db: &PrismaClient, - error_fn: impl FnOnce(Box) -> E, + error_fn: impl FnOnce(Box) -> E + Send, ) -> Result<(), E> where E: From, { - if !check_file_path_exists(iso_file_path, db).await? { - Err(error_fn(sub_path.as_ref().into())) - } else { + if check_file_path_exists(iso_file_path, db).await? { Ok(()) + } else { + Err(error_fn(sub_path.as_ref().into())) } } @@ -360,9 +295,10 @@ where .await? > 0) } +#[allow(clippy::missing_panics_doc)] // Don't actually panic pub async fn ensure_sub_path_is_directory( - location_path: impl AsRef, - sub_path: impl AsRef, + location_path: impl AsRef + Send, + sub_path: impl AsRef + Send, ) -> Result<(), FilePathError> { let mut sub_path = sub_path.as_ref(); @@ -410,7 +346,7 @@ pub async fn ensure_sub_path_is_directory( } } -#[allow(unused)] // TODO remove this annotation when we can use it on windows +#[must_use] pub fn get_inode(metadata: &Metadata) -> u64 { #[cfg(target_family = "unix")] { @@ -435,8 +371,7 @@ pub fn get_inode(metadata: &Metadata) -> u64 { } } -#[allow(unused)] -pub async fn get_inode_from_path(path: impl AsRef) -> Result { +pub async fn get_inode_from_path(path: impl AsRef + Send) -> Result { #[cfg(target_family = "unix")] { // TODO use this when it's stable and remove winapi-utils dependency @@ -451,9 +386,11 @@ pub async fn get_inode_from_path(path: impl AsRef) -> Result"] +license = { workspace = true } +repository = { workspace = true } +edition = { workspace = true } + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +# Inner Core Sub-crates +sd-core-file-path-helper = { path = "../file-path-helper" } +sd-core-indexer-rules = { path = "../indexer-rules" } +sd-core-prisma-helpers = { path = "../prisma-helpers" } +sd-core-sync = { path = "../sync" } + +# Sub-crates +sd-prisma = { path = "../../../crates/prisma" } +sd-sync = { path = "../../../crates/sync" } +sd-task-system = { path = "../../../crates/task-system" } +sd-utils = { path = "../../../crates/utils" } + + +async-channel = { workspace = true } +async-trait = { workspace = true } +chrono = { workspace = true, features = ["serde"] } +futures = { workspace = true } +futures-concurrency = { workspace = true } +globset = { workspace = true } +itertools = { workspace = true } +lending-stream = { workspace = true } +prisma-client-rust = { workspace = true } +rmp-serde = { workspace = true } +rmpv = { workspace = true } +rspc = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +specta = { workspace = true } +strum = { workspace = true, features = ["derive", "phf"] } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["fs", "sync", "parking_lot"] } +tokio-stream = { workspace = true, features = ["fs"] } +tracing = { workspace = true } +uuid = { workspace = true, features = ["v4", "serde"] } + + +[dev-dependencies] +tempfile = { workspace = true } +tracing-test = { workspace.dev-dependencies = true } diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs new file mode 100644 index 000000000..d85f2fd32 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -0,0 +1,738 @@ +use crate::{ + indexer::BATCH_SIZE, + job_system::{ + job::{ + Job, JobContext, JobName, JobReturn, JobTaskDispatcher, ProgressUpdate, ReturnStatus, + }, + report::ReportOutputMetadata, + utils::cancel_pending_tasks, + SerializableJob, SerializedTasks, + }, + Error, NonCriticalJobError, +}; + +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_indexer_rules::{IndexerRule, IndexerRuler}; +use sd_core_prisma_helpers::location_with_indexer_rules; + +use sd_task_system::{ + AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId, + TaskOutput, TaskStatus, +}; +use sd_utils::db::maybe_missing; + +use std::{ + collections::{HashMap, HashSet}, + hash::{Hash, Hasher}, + mem, + path::PathBuf, + sync::Arc, + time::Duration, +}; + +use futures::{stream::FuturesUnordered, StreamExt}; +use futures_concurrency::future::TryJoin; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tokio::time::Instant; +use tracing::warn; + +use super::{ + determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes, + tasks::{ + saver::{SaveTask, SaveTaskOutput}, + updater::{UpdateTask, UpdateTaskOutput}, + walker::{WalkDirTask, WalkTaskOutput, WalkedEntry}, + }, + update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy, +}; + +#[derive(Debug)] +pub struct IndexerJob { + location: location_with_indexer_rules::Data, + sub_path: Option, + metadata: Metadata, + + iso_file_path_factory: IsoFilePathFactory, + indexer_ruler: IndexerRuler, + walker_root_path: Option>, + ancestors_needing_indexing: HashSet, + ancestors_already_indexed: HashSet>, + iso_paths_and_sizes: HashMap, u64>, + + errors: Vec, + + pending_tasks_on_resume: Vec>, + tasks_for_shutdown: Vec>>, +} + +impl Job for IndexerJob { + const NAME: JobName = JobName::Indexer; + + async fn run( + mut self, + dispatcher: JobTaskDispatcher, + ctx: impl JobContext, + ) -> Result { + let mut pending_running_tasks = FuturesUnordered::new(); + + self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) + .await?; + + if let Some(res) = self + .process_handles(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + return res; + } + + if !self.tasks_for_shutdown.is_empty() { + return Ok(ReturnStatus::Shutdown(self.serialize().await)); + } + + if !self.ancestors_needing_indexing.is_empty() { + let save_tasks = self + .ancestors_needing_indexing + .drain() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; + self.metadata.total_save_steps += 1; + + SaveTask::new( + self.location.id, + self.location.pub_id.clone(), + chunked_saves, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + ) + }) + .collect::>(); + + pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await); + + if let Some(res) = self + .process_handles(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + return res; + } + + if !self.tasks_for_shutdown.is_empty() { + return Ok(ReturnStatus::Shutdown(self.serialize().await)); + } + } + + // From here onward, job will not be interrupted anymore + + let Self { + location, + mut metadata, + iso_file_path_factory, + walker_root_path, + iso_paths_and_sizes, + mut errors, + tasks_for_shutdown, + .. + } = self; + + if metadata.indexed_count > 0 || metadata.removed_count > 0 || metadata.updated_count > 0 { + let start_size_update_time = Instant::now(); + + update_directory_sizes(iso_paths_and_sizes, ctx.db(), ctx.sync()).await?; + + let root_path = walker_root_path.expect("must be set"); + if root_path != iso_file_path_factory.location_path { + reverse_update_directories_sizes( + &*root_path, + location.id, + &*iso_file_path_factory.location_path, + ctx.db(), + ctx.sync(), + &mut errors, + ) + .await?; + } + + update_location_size(location.id, ctx.db(), &ctx.query_invalidator()).await?; + + metadata.db_write_time += start_size_update_time.elapsed(); + } + + if metadata.indexed_count > 0 || metadata.removed_count > 0 { + ctx.invalidate_query("search.paths"); + } + + assert!( + tasks_for_shutdown.is_empty(), + "all tasks must be completed here" + ); + + Ok(ReturnStatus::Completed( + JobReturn::builder() + .with_metadata(metadata) + .with_non_critical_errors(errors) + .build(), + )) + } + + async fn resume_tasks( + &mut self, + dispatcher: &JobTaskDispatcher, + ctx: &impl JobContext, + SerializedTasks(serialized_tasks): SerializedTasks, + ) -> Result<(), Error> { + let location_id = self.location.id; + + self.pending_tasks_on_resume = dispatcher + .dispatch_many_boxed( + rmp_serde::from_slice::)>>(&serialized_tasks) + .map_err(IndexerError::from)? + .into_iter() + .map(|(task_kind, task_bytes)| { + let indexer_ruler = self.indexer_ruler.clone(); + let iso_file_path_factory = self.iso_file_path_factory.clone(); + async move { + match task_kind { + TaskKind::Walk => WalkDirTask::deserialize( + &task_bytes, + ( + indexer_ruler.clone(), + WalkerDBProxy { + location_id, + db: Arc::clone(ctx.db()), + }, + iso_file_path_factory.clone(), + dispatcher.clone(), + ), + ) + .await + .map(IntoTask::into_task), + + TaskKind::Save => SaveTask::deserialize( + &task_bytes, + (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), + ) + .await + .map(IntoTask::into_task), + TaskKind::Update => UpdateTask::deserialize( + &task_bytes, + (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), + ) + .await + .map(IntoTask::into_task), + } + } + }) + .collect::>() + .try_join() + .await + .map_err(IndexerError::from)?, + ) + .await; + + Ok(()) + } +} + +impl IndexerJob { + pub fn new( + location: location_with_indexer_rules::Data, + sub_path: Option, + ) -> Result { + Ok(Self { + indexer_ruler: location + .indexer_rules + .iter() + .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) + .collect::, _>>() + .map(IndexerRuler::new)?, + iso_file_path_factory: IsoFilePathFactory { + location_id: location.id, + location_path: maybe_missing(&location.path, "location.path") + .map(PathBuf::from) + .map(Arc::new)?, + }, + walker_root_path: None, + ancestors_needing_indexing: HashSet::new(), + ancestors_already_indexed: HashSet::new(), + iso_paths_and_sizes: HashMap::new(), + location, + sub_path, + metadata: Metadata::default(), + errors: Vec::new(), + + pending_tasks_on_resume: Vec::new(), + tasks_for_shutdown: Vec::new(), + }) + } + + /// Process output of tasks, according to the downcasted output type + /// + /// # Panics + /// Will panic if another task type is added in the job, but this function wasn't updated to handle it + /// + async fn process_task_output( + &mut self, + task_id: TaskId, + any_task_output: Box, + job_ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Result>, IndexerError> { + if any_task_output.is::() { + return self + .process_walk_output( + *any_task_output + .downcast::() + .expect("just checked"), + job_ctx, + dispatcher, + ) + .await; + } else if any_task_output.is::() { + self.process_save_output( + *any_task_output + .downcast::() + .expect("just checked"), + job_ctx, + ); + } else if any_task_output.is::() { + self.process_update_output( + *any_task_output + .downcast::() + .expect("just checked"), + job_ctx, + ); + } else { + unreachable!("Unexpected task output type: "); + } + + self.metadata.completed_tasks += 1; + + job_ctx.progress(vec![ProgressUpdate::CompletedTaskCount( + self.metadata.completed_tasks, + )]); + + Ok(Vec::new()) + } + + async fn process_walk_output( + &mut self, + WalkTaskOutput { + to_create, + to_update, + to_remove, + accepted_ancestors, + errors, + directory_iso_file_path, + total_size, + mut handles, + scan_time, + }: WalkTaskOutput, + job_ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Result>, IndexerError> { + self.metadata.scan_read_time += scan_time; + + let (to_create_count, to_update_count) = (to_create.len(), to_update.len()); + + *self + .iso_paths_and_sizes + .entry(directory_iso_file_path) + .or_default() += total_size; + + for ancestor_iso_file_path in accepted_ancestors + .iter() + .map(|ancestor_entry| &ancestor_entry.iso_file_path) + { + if self + .iso_paths_and_sizes + .contains_key(ancestor_iso_file_path) + { + *self + .iso_paths_and_sizes + .get_mut(ancestor_iso_file_path) + .expect("we just checked") += total_size; + } else { + self.iso_paths_and_sizes + .insert(ancestor_iso_file_path.clone(), total_size); + } + } + + // First we add ancestors, filtering out ancestors already indexed in previous iterations + self.ancestors_needing_indexing + .extend(accepted_ancestors.into_iter().filter(|ancestor_entry| { + !self + .ancestors_already_indexed + .contains(&ancestor_entry.iso_file_path) + })); + + // Then we add new directories to be indexed as they can be received as ancestors in coming iterations + self.ancestors_already_indexed.extend( + to_create + .iter() + .filter(|&WalkedEntry { iso_file_path, .. }| iso_file_path.is_dir()) + .map(|WalkedEntry { iso_file_path, .. }| iso_file_path.clone()), + ); + + self.errors.extend(errors); + + let db_delete_time = Instant::now(); + self.metadata.removed_count += + remove_non_existing_file_paths(to_remove, job_ctx.db(), job_ctx.sync()).await?; + self.metadata.db_write_time += db_delete_time.elapsed(); + + let save_tasks = to_create + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; + self.metadata.total_save_steps += 1; + + SaveTask::new( + self.location.id, + self.location.pub_id.clone(), + chunked_saves, + Arc::clone(job_ctx.db()), + Arc::clone(job_ctx.sync()), + ) + }) + .collect::>(); + + let update_tasks = to_update + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_updates = chunk.collect::>(); + self.metadata.total_updated_paths += chunked_updates.len() as u64; + self.metadata.total_update_steps += 1; + + UpdateTask::new( + chunked_updates, + Arc::clone(job_ctx.db()), + Arc::clone(job_ctx.sync()), + ) + }) + .collect::>(); + + handles.extend(dispatcher.dispatch_many(save_tasks).await); + handles.extend(dispatcher.dispatch_many(update_tasks).await); + + self.metadata.total_tasks += handles.len() as u64; + + job_ctx.progress(vec![ + ProgressUpdate::TaskCount(handles.len() as u64), + ProgressUpdate::message(format!( + "Found {to_create_count} new files and {to_update_count} to update" + )), + ]); + + Ok(handles) + } + + fn process_save_output( + &mut self, + SaveTaskOutput { + saved_count, + save_duration, + }: SaveTaskOutput, + job_ctx: &impl JobContext, + ) { + self.metadata.indexed_count += saved_count; + self.metadata.db_write_time += save_duration; + + job_ctx.progress_msg(format!("Saved {saved_count} files")); + } + + fn process_update_output( + &mut self, + UpdateTaskOutput { + updated_count, + update_duration, + }: UpdateTaskOutput, + job_ctx: &impl JobContext, + ) { + self.metadata.updated_count += updated_count; + self.metadata.db_write_time += update_duration; + + job_ctx.progress_msg(format!("Updated {updated_count} files")); + } + + async fn process_handles( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + job_ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Option> { + while let Some(task) = pending_running_tasks.next().await { + match task { + Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => { + let more_handles = match self + .process_task_output(task_id, out, job_ctx, dispatcher) + .await + { + Ok(more_handles) => more_handles, + Err(e) => { + cancel_pending_tasks(&*pending_running_tasks).await; + + return Some(Err(e.into())); + } + }; + + pending_running_tasks.extend(more_handles); + } + + Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { + warn!("Task returned an empty output"); + } + + Ok(TaskStatus::Shutdown(task)) => { + self.tasks_for_shutdown.push(task); + } + + Ok(TaskStatus::Error(e)) => { + cancel_pending_tasks(&*pending_running_tasks).await; + + return Some(Err(e)); + } + + Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => { + cancel_pending_tasks(&*pending_running_tasks).await; + + return Some(Ok(ReturnStatus::Canceled)); + } + + Err(e) => { + cancel_pending_tasks(&*pending_running_tasks).await; + + return Some(Err(e.into())); + } + } + } + + None + } + + async fn init_or_resume( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + job_ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Result<(), IndexerError> { + // if we don't have any pending task, then this is a fresh job + if self.pending_tasks_on_resume.is_empty() { + let walker_root_path = Arc::new( + determine_initial_walk_path( + self.location.id, + &self.sub_path, + &*self.iso_file_path_factory.location_path, + job_ctx.db(), + ) + .await?, + ); + + pending_running_tasks.push( + dispatcher + .dispatch(WalkDirTask::new( + walker_root_path.as_ref(), + Arc::clone(&walker_root_path), + self.indexer_ruler.clone(), + self.iso_file_path_factory.clone(), + WalkerDBProxy { + location_id: self.location.id, + db: Arc::clone(job_ctx.db()), + }, + Some(dispatcher.clone()), + )?) + .await, + ); + + self.walker_root_path = Some(walker_root_path); + } else { + pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume)); + } + + Ok(()) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Metadata { + db_write_time: Duration, + scan_read_time: Duration, + total_tasks: u64, + completed_tasks: u64, + total_paths: u64, + total_updated_paths: u64, + total_save_steps: u64, + total_update_steps: u64, + indexed_count: u64, + updated_count: u64, + removed_count: u64, +} + +impl From for ReportOutputMetadata { + fn from(value: Metadata) -> Self { + Self::Metrics(HashMap::from([ + ("db_write_time".into(), json!(value.db_write_time)), + ("scan_read_time".into(), json!(value.scan_read_time)), + ("total_tasks".into(), json!(value.total_tasks)), + ("total_paths".into(), json!(value.total_paths)), + ( + "total_updated_paths".into(), + json!(value.total_updated_paths), + ), + ("total_save_steps".into(), json!(value.total_save_steps)), + ("total_update_steps".into(), json!(value.total_update_steps)), + ("indexed_count".into(), json!(value.indexed_count)), + ("updated_count".into(), json!(value.updated_count)), + ("removed_count".into(), json!(value.removed_count)), + ])) + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +enum TaskKind { + Walk, + Save, + Update, +} + +#[derive(Serialize, Deserialize)] +struct SaveState { + location: location_with_indexer_rules::Data, + sub_path: Option, + metadata: Metadata, + + iso_file_path_factory: IsoFilePathFactory, + indexer_ruler_bytes: Vec, + walker_root_path: Option>, + ancestors_needing_indexing: HashSet, + ancestors_already_indexed: HashSet>, + paths_and_sizes: HashMap, u64>, + + errors: Vec, + + tasks_for_shutdown_bytes: Option, +} + +impl SerializableJob for IndexerJob { + async fn serialize(self) -> Result>, rmp_serde::encode::Error> { + let Self { + location, + sub_path, + metadata, + iso_file_path_factory, + indexer_ruler, + walker_root_path, + ancestors_needing_indexing, + ancestors_already_indexed, + iso_paths_and_sizes: paths_and_sizes, + errors, + tasks_for_shutdown, + .. + } = self; + + rmp_serde::to_vec_named(&SaveState { + location, + sub_path, + metadata, + iso_file_path_factory, + indexer_ruler_bytes: indexer_ruler.serialize().await?, + walker_root_path, + ancestors_needing_indexing, + ancestors_already_indexed, + paths_and_sizes, + tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( + &tasks_for_shutdown + .into_iter() + .map(|task| async move { + if task + .is::>( + ) { + task + .downcast::>( + ) + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::Walk, bytes)) + } else if task.is::() { + task.downcast::() + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::Save, bytes)) + } else if task.is::() { + task.downcast::() + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::Update, bytes)) + } else { + unreachable!("Unexpected task type") + } + }) + .collect::>() + .try_join() + .await?, + )?)), + errors, + }) + .map(Some) + } + + async fn deserialize( + serialized_job: &[u8], + _: &impl JobContext, + ) -> Result)>, rmp_serde::decode::Error> { + let SaveState { + location, + sub_path, + metadata, + iso_file_path_factory, + indexer_ruler_bytes, + walker_root_path, + ancestors_needing_indexing, + ancestors_already_indexed, + paths_and_sizes, + errors, + tasks_for_shutdown_bytes, + } = rmp_serde::from_slice::(serialized_job)?; + + let indexer_ruler = IndexerRuler::deserialize(&indexer_ruler_bytes)?; + + Ok(Some(( + Self { + location, + sub_path, + metadata, + iso_file_path_factory, + indexer_ruler, + walker_root_path, + ancestors_needing_indexing, + ancestors_already_indexed, + iso_paths_and_sizes: paths_and_sizes, + errors, + pending_tasks_on_resume: Vec::new(), + tasks_for_shutdown: Vec::new(), + }, + tasks_for_shutdown_bytes, + ))) + } +} + +impl Hash for IndexerJob { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + if let Some(ref sub_path) = self.sub_path { + sub_path.hash(state); + } + } +} diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs new file mode 100644 index 000000000..12d27b337 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -0,0 +1,542 @@ +use crate::NonCriticalJobError; + +use sd_core_file_path_helper::{ + ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, + FilePathError, IsolatedFilePathData, +}; +use sd_core_indexer_rules::IndexerRuleError; +use sd_core_prisma_helpers::{ + file_path_pub_and_cas_ids, file_path_to_isolate_with_pub_id, file_path_walker, +}; +use sd_core_sync::Manager as SyncManager; + +use sd_prisma::{ + prisma::{file_path, location, PrismaClient, SortOrder}, + prisma_sync, +}; +use sd_sync::OperationFactory; +use sd_utils::{ + db::{size_in_bytes_from_db, size_in_bytes_to_db, MissingFieldError}, + error::{FileIOError, NonUtf8PathError}, + from_bytes_to_uuid, msgpack, +}; + +use std::{ + collections::{HashMap, HashSet}, + hash::BuildHasher, + mem, + path::{Path, PathBuf}, + sync::Arc, +}; + +use itertools::Itertools; +use prisma_client_rust::{operator::or, Select}; +use rspc::ErrorCode; +use serde::{Deserialize, Serialize}; +use specta::Type; +use tracing::warn; + +mod job; +mod shallow; +mod tasks; + +pub use job::IndexerJob; +pub use shallow::shallow; + +use tasks::walker; + +/// `BATCH_SIZE` is the number of files to index at each task, writing the chunk of files metadata in the database. +const BATCH_SIZE: usize = 1000; + +#[derive(thiserror::Error, Debug)] +pub enum IndexerError { + // Not Found errors + #[error("indexer rule not found: ")] + IndexerRuleNotFound(i32), + #[error("received sub path not in database: ", .0.display())] + SubPathNotFound(Box), + + // Internal Errors + #[error("database Error: {0}")] + Database(#[from] prisma_client_rust::QueryError), + #[error(transparent)] + FileIO(#[from] FileIOError), + #[error(transparent)] + NonUtf8Path(#[from] NonUtf8PathError), + #[error(transparent)] + IsoFilePath(#[from] FilePathError), + #[error("missing field on database: {0}")] + MissingField(#[from] MissingFieldError), + #[error("failed to deserialized stored tasks for job resume: {0}")] + DeserializeTasks(#[from] rmp_serde::decode::Error), + + // Mixed errors + #[error(transparent)] + Rules(#[from] IndexerRuleError), +} + +impl From for rspc::Error { + fn from(err: IndexerError) -> Self { + match err { + IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => { + Self::with_cause(ErrorCode::NotFound, err.to_string(), err) + } + + IndexerError::Rules(rule_err) => rule_err.into(), + + _ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err), + } + } +} + +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] +pub enum NonCriticalIndexerError { + #[error("failed to read directory entry: {0}")] + FailedDirectoryEntry(String), + #[error("failed to fetch metadata: {0}")] + Metadata(String), + #[error("error applying indexer rule: {0}")] + IndexerRule(String), + #[error("error trying to extract file path metadata from a file: {0}")] + FilePathMetadata(String), + #[error("failed to fetch file paths ids from existing files on database: {0}")] + FetchAlreadyExistingFilePathIds(String), + #[error("failed to fetch file paths to be removed from database: {0}")] + FetchFilePathsToRemove(String), + #[error("error constructing isolated file path: {0}")] + IsoFilePath(String), + #[error("failed to dispatch new task to keep walking a directory: {0}")] + DispatchKeepWalking(String), + #[error("missing file_path data on database: {0}")] + MissingFilePathData(String), +} + +async fn determine_initial_walk_path( + location_id: location::id::Type, + sub_path: &Option + Send + Sync>, + location_path: impl AsRef + Send, + db: &PrismaClient, +) -> Result { + let location_path = location_path.as_ref(); + + match sub_path { + Some(sub_path) if sub_path.as_ref() != Path::new("") => { + let sub_path = sub_path.as_ref(); + let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?; + + ensure_sub_path_is_directory(location_path, sub_path).await?; + + ensure_file_path_exists( + sub_path, + &IsolatedFilePathData::new(location_id, location_path, &full_path, true) + .map_err(IndexerError::from)?, + db, + IndexerError::SubPathNotFound, + ) + .await?; + + Ok(full_path) + } + _ => Ok(location_path.to_path_buf()), + } +} + +fn chunk_db_queries<'db, 'iso>( + iso_file_paths: impl IntoIterator>, + db: &'db PrismaClient, +) -> Vec>> { + iso_file_paths + .into_iter() + .chunks(200) + .into_iter() + .map(|paths_chunk| { + db.file_path() + .find_many(vec![or(paths_chunk + .into_iter() + .map(file_path::WhereParam::from) + .collect())]) + .select(file_path_to_isolate_with_pub_id::select()) + }) + .collect::>() +} + +#[allow(clippy::missing_panics_doc)] // Can't actually panic as we use the hashmap to fetch entries from db +async fn update_directory_sizes( + iso_paths_and_sizes: HashMap, u64, impl BuildHasher + Send>, + db: &PrismaClient, + sync: &SyncManager, +) -> Result<(), IndexerError> { + let to_sync_and_update = db + ._batch(chunk_db_queries(iso_paths_and_sizes.keys(), db)) + .await? + .into_iter() + .flatten() + .map(|file_path| { + let size_bytes = iso_paths_and_sizes + .get(&IsolatedFilePathData::try_from(&file_path)?) + .map(|size| size.to_be_bytes().to_vec()) + .expect("must be here"); + + Ok(( + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: file_path.pub_id.clone(), + }, + file_path::size_in_bytes_bytes::NAME, + msgpack!(size_bytes), + ), + db.file_path().update( + file_path::pub_id::equals(file_path.pub_id), + vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], + ), + )) + }) + .collect::, IndexerError>>()? + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + sync.write_ops(db, to_sync_and_update).await?; + + Ok(()) +} + +async fn update_location_size( + location_id: location::id::Type, + db: &PrismaClient, + invalidate_query: &InvalidateQuery, +) -> Result<(), IndexerError> { + let total_size = db + .file_path() + .find_many(vec![ + file_path::location_id::equals(Some(location_id)), + file_path::materialized_path::equals(Some("/".to_string())), + ]) + .select(file_path::select!({ size_in_bytes_bytes })) + .exec() + .await? + .into_iter() + .filter_map(|file_path| { + file_path + .size_in_bytes_bytes + .map(|size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes)) + }) + .sum::(); + + db.location() + .update( + location::id::equals(location_id), + vec![location::size_in_bytes::set(Some( + total_size.to_be_bytes().to_vec(), + ))], + ) + .exec() + .await?; + + invalidate_query("locations.list"); + invalidate_query("locations.get"); + + Ok(()) +} + +async fn remove_non_existing_file_paths( + to_remove: Vec, + db: &PrismaClient, + sync: &sd_core_sync::Manager, +) -> Result { + #[allow(clippy::cast_sign_loss)] + let (sync_params, db_params): (Vec<_>, Vec<_>) = to_remove + .into_iter() + .map(|file_path| { + ( + sync.shared_delete(prisma_sync::file_path::SyncId { + pub_id: file_path.pub_id, + }), + file_path.id, + ) + }) + .unzip(); + + sync.write_ops( + db, + ( + sync_params, + db.file_path() + .delete_many(vec![file_path::id::in_vec(db_params)]), + ), + ) + .await + .map( + #[allow(clippy::cast_sign_loss)] + |count| count as u64, + ) + .map_err(Into::into) +} + +#[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories +async fn reverse_update_directories_sizes( + base_path: impl AsRef + Send, + location_id: location::id::Type, + location_path: impl AsRef + Send, + db: &PrismaClient, + sync: &SyncManager, + errors: &mut Vec, +) -> Result<(), IndexerError> { + let location_path = location_path.as_ref(); + + let ancestors = base_path + .as_ref() + .ancestors() + .take_while(|&ancestor| ancestor != location_path) + .map(|ancestor| { + IsolatedFilePathData::new(location_id, location_path, ancestor, true).map( + |iso_file_path| { + let materialized_path = iso_file_path + .materialized_path_for_children() + .expect("each ancestor is a directory"); + + (iso_file_path, materialized_path) + }, + ) + }) + .collect::, _>>()?; + + let mut pub_id_by_ancestor_materialized_path = db + ._batch(chunk_db_queries(ancestors.keys(), db)) + .await? + .into_iter() + .flatten() + .filter_map(|mut file_path| { + let pub_id = mem::take(&mut file_path.pub_id); + IsolatedFilePathData::try_from(file_path) + .map_err(|e| { + errors.push( + NonCriticalIndexerError::MissingFilePathData(format!( + "Found a file_path missing data: , error: {e:#?}", + from_bytes_to_uuid(&pub_id) + )) + .into(), + ); + }) + .map(|iso_file_path| { + ( + iso_file_path + .materialized_path_for_children() + .expect("we know it's a directory"), + (pub_id, 0), + ) + }) + .ok() + }) + .collect::>(); + + compute_sizes( + location_id, + ancestors.values().cloned().collect(), + &mut pub_id_by_ancestor_materialized_path, + db, + errors, + ) + .await?; + + let to_sync_and_update = ancestors + .into_values() + .filter_map(|materialized_path| { + if let Some((pub_id, size)) = + pub_id_by_ancestor_materialized_path.remove(&materialized_path) + { + let size_bytes = size_in_bytes_to_db(size); + + Some(( + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: pub_id.clone(), + }, + file_path::size_in_bytes_bytes::NAME, + msgpack!(size_bytes), + ), + db.file_path().update( + file_path::pub_id::equals(pub_id), + vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], + ), + )) + } else { + warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption"); + None + } + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + sync.write_ops(db, to_sync_and_update).await?; + + Ok(()) +} + +async fn compute_sizes( + location_id: location::id::Type, + materialized_paths: Vec, + pub_id_by_ancestor_materialized_path: &mut HashMap, + db: &PrismaClient, + errors: &mut Vec, +) -> Result<(), IndexerError> { + db.file_path() + .find_many(vec![ + file_path::location_id::equals(Some(location_id)), + file_path::materialized_path::in_vec(materialized_paths), + ]) + .select(file_path::select!({ pub_id materialized_path size_in_bytes_bytes })) + .exec() + .await? + .into_iter() + .for_each(|file_path| { + if let Some(materialized_path) = file_path.materialized_path { + if let Some((_, size)) = + pub_id_by_ancestor_materialized_path.get_mut(&materialized_path) + { + *size += file_path.size_in_bytes_bytes.map_or_else( + || { + warn!("Got a directory missing its size in bytes"); + 0 + }, + |size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes), + ); + } + } else { + errors.push( + NonCriticalIndexerError::MissingFilePathData(format!( + "Corrupt database possessing a file_path entry without materialized_path: ", + from_bytes_to_uuid(&file_path.pub_id) + )) + .into(), + ); + } + }); + + Ok(()) +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct IsoFilePathFactory { + pub location_id: location::id::Type, + pub location_path: Arc, +} + +impl walker::IsoFilePathFactory for IsoFilePathFactory { + fn build( + &self, + path: impl AsRef, + is_dir: bool, + ) -> Result, FilePathError> { + IsolatedFilePathData::new(self.location_id, self.location_path.as_ref(), path, is_dir) + } +} + +#[derive(Debug, Clone)] +struct WalkerDBProxy { + location_id: location::id::Type, + db: Arc, +} + +impl walker::WalkerDBProxy for WalkerDBProxy { + async fn fetch_file_paths( + &self, + found_paths: Vec, + ) -> Result, IndexerError> { + // Each found path is a AND with 4 terms, and SQLite has a expression tree limit of 1000 terms + // so we will use chunks of 200 just to be safe + self.db + ._batch( + found_paths + .into_iter() + .chunks(200) + .into_iter() + .map(|founds| { + self.db + .file_path() + .find_many(vec![or(founds.collect::>())]) + .select(file_path_walker::select()) + }) + .collect::>(), + ) + .await + .map(|fetched| fetched.into_iter().flatten().collect::>()) + .map_err(Into::into) + } + + async fn fetch_file_paths_to_remove( + &self, + parent_iso_file_path: &IsolatedFilePathData<'_>, + unique_location_id_materialized_path_name_extension_params: Vec, + ) -> Result, NonCriticalIndexerError> { + // NOTE: This batch size can be increased if we wish to trade memory for more performance + const BATCH_SIZE: i64 = 1000; + + let founds_ids = self + .db + ._batch( + unique_location_id_materialized_path_name_extension_params + .into_iter() + .chunks(200) + .into_iter() + .map(|unique_params| { + self.db + .file_path() + .find_many(vec![or(unique_params.collect())]) + .select(file_path::select!({ id })) + }) + .collect::>(), + ) + .await + .map(|founds_chunk| { + founds_chunk + .into_iter() + .flat_map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id)) + .collect::>() + }) + .map_err(|e| NonCriticalIndexerError::FetchAlreadyExistingFilePathIds(e.to_string()))?; + + let mut to_remove = vec![]; + let mut cursor = 1; + + loop { + let found = self + .db + .file_path() + .find_many(vec![ + file_path::location_id::equals(Some(self.location_id)), + file_path::materialized_path::equals(Some( + parent_iso_file_path + .materialized_path_for_children() + .expect("the received isolated file path must be from a directory"), + )), + ]) + .order_by(file_path::id::order(SortOrder::Asc)) + .take(BATCH_SIZE) + .cursor(file_path::id::equals(cursor)) + .select(file_path_pub_and_cas_ids::select()) + .exec() + .await + .map_err(|e| NonCriticalIndexerError::FetchFilePathsToRemove(e.to_string()))?; + + #[allow(clippy::cast_possible_truncation)] // Safe because we are using a constant + let should_stop = found.len() < BATCH_SIZE as usize; + + if let Some(last) = found.last() { + cursor = last.id; + } else { + break; + } + + to_remove.extend( + found + .into_iter() + .filter(|file_path| !founds_ids.contains(&file_path.id)), + ); + + if should_stop { + break; + } + } + + Ok(to_remove) + } +} diff --git a/core/crates/heavy-lifting/src/indexer/shallow.rs b/core/crates/heavy-lifting/src/indexer/shallow.rs new file mode 100644 index 000000000..a39d37bbf --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/shallow.rs @@ -0,0 +1,261 @@ +use crate::{Error, NonCriticalJobError}; + +use sd_core_indexer_rules::{IndexerRule, IndexerRuler}; +use sd_core_prisma_helpers::location_with_indexer_rules; +use sd_core_sync::Manager as SyncManager; + +use sd_prisma::prisma::PrismaClient; +use sd_task_system::{BaseTaskDispatcher, CancelTaskOnDrop, IntoTask, TaskDispatcher, TaskOutput}; +use sd_utils::db::maybe_missing; + +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + sync::Arc, +}; + +use futures_concurrency::future::TryJoin; +use itertools::Itertools; +use tracing::{debug, warn}; + +use super::{ + determine_initial_walk_path, remove_non_existing_file_paths, reverse_update_directories_sizes, + tasks::{ + saver::{SaveTask, SaveTaskOutput}, + updater::{UpdateTask, UpdateTaskOutput}, + walker::{ToWalkEntry, WalkDirTask, WalkTaskOutput, WalkedEntry}, + }, + update_directory_sizes, update_location_size, IndexerError, IsoFilePathFactory, WalkerDBProxy, + BATCH_SIZE, +}; + +pub async fn shallow( + location: location_with_indexer_rules::Data, + sub_path: impl AsRef + Send, + dispatcher: BaseTaskDispatcher, + db: Arc, + sync: Arc, + invalidate_query: impl Fn(&'static str) + Send + Sync, +) -> Result, Error> { + let sub_path = sub_path.as_ref(); + + let location_path = maybe_missing(&location.path, "location.path") + .map(PathBuf::from) + .map(Arc::new) + .map_err(IndexerError::from)?; + + let to_walk_path = Arc::new( + determine_initial_walk_path(location.id, &Some(sub_path), &*location_path, &db).await?, + ); + + let Some(WalkTaskOutput { + to_create, + to_update, + to_remove, + mut errors, + directory_iso_file_path, + total_size, + .. + }) = walk( + &location, + Arc::clone(&location_path), + Arc::clone(&to_walk_path), + Arc::clone(&db), + &dispatcher, + ) + .await? + else { + return Ok(vec![]); + }; + + let removed_count = remove_non_existing_file_paths(to_remove, &db, &sync).await?; + + let Some(Metadata { + indexed_count, + updated_count, + }) = save_and_update( + &location, + to_create, + to_update, + Arc::clone(&db), + Arc::clone(&sync), + &dispatcher, + ) + .await? + else { + return Ok(errors); + }; + + if indexed_count > 0 || removed_count > 0 || updated_count > 0 { + update_directory_sizes( + HashMap::from([(directory_iso_file_path, total_size)]), + &db, + &sync, + ) + .await?; + + if to_walk_path != location_path { + reverse_update_directories_sizes( + &*to_walk_path, + location.id, + &*location_path, + &db, + &sync, + &mut errors, + ) + .await?; + } + + update_location_size(location.id, &db, &invalidate_query).await?; + } + + if indexed_count > 0 || removed_count > 0 { + invalidate_query("search.paths"); + } + + Ok(errors) +} + +async fn walk( + location: &location_with_indexer_rules::Data, + location_path: Arc, + to_walk_path: Arc, + db: Arc, + dispatcher: &BaseTaskDispatcher, +) -> Result, Error> { + match dispatcher + .dispatch(WalkDirTask::new( + ToWalkEntry::from(&*to_walk_path), + to_walk_path, + location + .indexer_rules + .iter() + .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) + .collect::, _>>() + .map(IndexerRuler::new) + .map_err(IndexerError::from)?, + IsoFilePathFactory { + location_id: location.id, + location_path, + }, + WalkerDBProxy { + location_id: location.id, + db, + }, + None::>, + )?) + .await + .await? + { + sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some( + *data + .downcast::() + .expect("we just dispatched this task"), + )), + sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => { + warn!("Shallow indexer's walker task finished without any output"); + Ok(None) + } + sd_task_system::TaskStatus::Error(e) => Err(e), + + sd_task_system::TaskStatus::Shutdown(_) => { + debug!("Spacedrive is shuting down while a shallow indexer was in progress"); + Ok(None) + } + sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => { + unreachable!("WalkDirTask on shallow indexer can never be canceled or aborted") + } + } +} + +struct Metadata { + indexed_count: u64, + updated_count: u64, +} + +async fn save_and_update( + location: &location_with_indexer_rules::Data, + to_create: Vec, + to_update: Vec, + db: Arc, + sync: Arc, + dispatcher: &BaseTaskDispatcher, +) -> Result, Error> { + let save_and_update_tasks = to_create + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + SaveTask::new( + location.id, + location.pub_id.clone(), + chunk.collect::>(), + Arc::clone(&db), + Arc::clone(&sync), + ) + }) + .map(IntoTask::into_task) + .chain( + to_update + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + UpdateTask::new( + chunk.collect::>(), + Arc::clone(&db), + Arc::clone(&sync), + ) + }) + .map(IntoTask::into_task), + ) + .collect::>(); + + let mut metadata = Metadata { + indexed_count: 0, + updated_count: 0, + }; + + for task_status in dispatcher + .dispatch_many_boxed(save_and_update_tasks) + .await + .into_iter() + .map(CancelTaskOnDrop) + .collect::>() + .try_join() + .await? + { + match task_status { + sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => { + if data.is::() { + metadata.indexed_count += data + .downcast::() + .expect("just checked") + .saved_count; + } else { + metadata.updated_count += data + .downcast::() + .expect("just checked") + .updated_count; + } + } + sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => { + warn!("Shallow indexer's saver or updater task finished without any output"); + return Ok(None); + } + sd_task_system::TaskStatus::Error(e) => return Err(e), + + sd_task_system::TaskStatus::Shutdown(_) => { + debug!("Spacedrive is shuting down while a shallow indexer was in progress"); + return Ok(None); + } + sd_task_system::TaskStatus::Canceled | sd_task_system::TaskStatus::ForcedAbortion => { + unreachable!( + "Save or Updater tasks on shallow indexer can never be canceled or aborted" + ); + } + } + } + + Ok(Some(metadata)) +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/mod.rs b/core/crates/heavy-lifting/src/indexer/tasks/mod.rs new file mode 100644 index 000000000..eacba8f11 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/mod.rs @@ -0,0 +1,3 @@ +pub mod saver; +pub mod updater; +pub mod walker; diff --git a/core/crates/heavy-lifting/src/indexer/tasks/saver.rs b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs new file mode 100644 index 000000000..2f1f6d433 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs @@ -0,0 +1,218 @@ +use crate::{indexer::IndexerError, Error}; + +use sd_core_file_path_helper::IsolatedFilePathDataParts; +use sd_core_sync::Manager as SyncManager; + +use sd_prisma::{ + prisma::{file_path, location, PrismaClient}, + prisma_sync, +}; +use sd_sync::{sync_db_entry, OperationFactory}; +use sd_task_system::{ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId}; +use sd_utils::{db::inode_to_db, msgpack}; + +use std::{sync::Arc, time::Duration}; + +use chrono::Utc; +use serde::{Deserialize, Serialize}; +use tokio::time::Instant; +use tracing::trace; + +use super::walker::WalkedEntry; + +#[derive(Debug)] +pub struct SaveTask { + id: TaskId, + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, + db: Arc, + sync: Arc, +} + +impl SaveTask { + #[must_use] + pub fn new( + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + location_id, + location_pub_id, + walked_entries, + db, + sync, + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct SaveTaskSaveState { + id: TaskId, + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, +} + +impl SerializableTask for SaveTask { + type SerializeError = rmp_serde::encode::Error; + + type DeserializeError = rmp_serde::decode::Error; + + type DeserializeCtx = (Arc, Arc); + + async fn serialize(self) -> Result, Self::SerializeError> { + let Self { + id, + location_id, + location_pub_id, + walked_entries, + .. + } = self; + rmp_serde::to_vec_named(&SaveTaskSaveState { + id, + location_id, + location_pub_id, + walked_entries, + }) + } + + async fn deserialize( + data: &[u8], + (db, sync): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |SaveTaskSaveState { + id, + location_id, + location_pub_id, + walked_entries, + }| Self { + id, + location_id, + location_pub_id, + walked_entries, + db, + sync, + }, + ) + } +} + +#[derive(Debug)] +pub struct SaveTaskOutput { + pub saved_count: u64, + pub save_duration: Duration, +} + +#[async_trait::async_trait] +impl Task for SaveTask { + fn id(&self) -> TaskId { + self.id + } + + async fn run(&mut self, _: &Interrupter) -> Result { + use file_path::{ + create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode, + is_dir, location, location_id, materialized_path, name, size_in_bytes_bytes, + }; + + let start_time = Instant::now(); + + let Self { + location_id, + location_pub_id, + walked_entries, + db, + sync, + .. + } = self; + + let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries + .drain(..) + .map(|entry| { + let IsolatedFilePathDataParts { + materialized_path, + is_dir, + name, + extension, + .. + } = entry.iso_file_path.to_parts(); + + let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); + + let (sync_params, db_params): (Vec<_>, Vec<_>) = [ + ( + ( + location::NAME, + msgpack!(prisma_sync::location::SyncId { + pub_id: location_pub_id.clone() + }), + ), + location_id::set(Some(*location_id)), + ), + sync_db_entry!(materialized_path.to_string(), materialized_path), + sync_db_entry!(name.to_string(), name), + sync_db_entry!(is_dir, is_dir), + sync_db_entry!(extension.to_string(), extension), + sync_db_entry!( + entry.metadata.size_in_bytes.to_be_bytes().to_vec(), + size_in_bytes_bytes + ), + sync_db_entry!(inode_to_db(entry.metadata.inode), inode), + { + let v = entry.metadata.created_at.into(); + sync_db_entry!(v, date_created) + }, + { + let v = entry.metadata.modified_at.into(); + sync_db_entry!(v, date_modified) + }, + { + let v = Utc::now().into(); + sync_db_entry!(v, date_indexed) + }, + sync_db_entry!(entry.metadata.hidden, hidden), + ] + .into_iter() + .unzip(); + + ( + sync.shared_create( + prisma_sync::file_path::SyncId { + pub_id: sd_utils::uuid_to_bytes(entry.pub_id), + }, + sync_params, + ), + create_unchecked(pub_id, db_params), + ) + }) + .unzip(); + + #[allow(clippy::cast_sign_loss)] + let saved_count = sync + .write_ops( + db, + ( + sync_stuff.into_iter().flatten().collect(), + db.file_path().create_many(paths).skip_duplicates(), + ), + ) + .await + .map_err(IndexerError::from)? as u64; + + trace!("Inserted {saved_count} records"); + + Ok(ExecStatus::Done( + SaveTaskOutput { + saved_count, + save_duration: start_time.elapsed(), + } + .into_output(), + )) + } +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/updater.rs b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs new file mode 100644 index 000000000..f7e99e800 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs @@ -0,0 +1,236 @@ +use crate::{indexer::IndexerError, Error}; + +use sd_core_file_path_helper::IsolatedFilePathDataParts; +use sd_core_sync::Manager as SyncManager; + +use sd_prisma::{ + prisma::{file_path, object, PrismaClient}, + prisma_sync, +}; +use sd_sync::{sync_db_entry, OperationFactory}; +use sd_task_system::{ + check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId, +}; +use sd_utils::{chain_optional_iter, db::inode_to_db, msgpack}; + +use std::{collections::HashSet, sync::Arc, time::Duration}; + +use serde::{Deserialize, Serialize}; +use tokio::time::Instant; +use tracing::trace; + +use super::walker::WalkedEntry; + +#[derive(Debug)] +pub struct UpdateTask { + id: TaskId, + walked_entries: Vec, + object_ids_that_should_be_unlinked: HashSet, + db: Arc, + sync: Arc, +} + +impl UpdateTask { + #[must_use] + pub fn new( + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + walked_entries, + db, + sync, + object_ids_that_should_be_unlinked: HashSet::new(), + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct UpdateTaskSaveState { + id: TaskId, + walked_entries: Vec, + object_ids_that_should_be_unlinked: HashSet, +} + +impl SerializableTask for UpdateTask { + type SerializeError = rmp_serde::encode::Error; + + type DeserializeError = rmp_serde::decode::Error; + + type DeserializeCtx = (Arc, Arc); + + async fn serialize(self) -> Result, Self::SerializeError> { + rmp_serde::to_vec_named(&UpdateTaskSaveState { + id: self.id, + walked_entries: self.walked_entries, + object_ids_that_should_be_unlinked: self.object_ids_that_should_be_unlinked, + }) + } + + async fn deserialize( + data: &[u8], + (db, sync): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |UpdateTaskSaveState { + id, + walked_entries, + object_ids_that_should_be_unlinked, + }| Self { + id, + walked_entries, + object_ids_that_should_be_unlinked, + db, + sync, + }, + ) + } +} + +#[derive(Debug)] +pub struct UpdateTaskOutput { + pub updated_count: u64, + pub update_duration: Duration, +} + +#[async_trait::async_trait] +impl Task for UpdateTask { + fn id(&self) -> TaskId { + self.id + } + + async fn run(&mut self, interrupter: &Interrupter) -> Result { + use file_path::{ + cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id, + size_in_bytes_bytes, + }; + + let start_time = Instant::now(); + + let Self { + walked_entries, + db, + sync, + object_ids_that_should_be_unlinked, + .. + } = self; + + fetch_objects_ids_to_unlink(walked_entries, object_ids_that_should_be_unlinked, db).await?; + + check_interruption!(interrupter); + + let (sync_stuff, paths_to_update) = walked_entries + .drain(..) + .map(|entry| { + let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts(); + + let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); + + let should_unlink_object = entry.maybe_object_id.map_or(false, |object_id| { + object_ids_that_should_be_unlinked.contains(&object_id) + }); + + let (sync_params, db_params) = chain_optional_iter( + [ + ((cas_id::NAME, msgpack!(nil)), cas_id::set(None)), + sync_db_entry!(*is_dir, is_dir), + sync_db_entry!( + entry.metadata.size_in_bytes.to_be_bytes().to_vec(), + size_in_bytes_bytes + ), + sync_db_entry!(inode_to_db(entry.metadata.inode), inode), + { + let v = entry.metadata.created_at.into(); + sync_db_entry!(v, date_created) + }, + { + let v = entry.metadata.modified_at.into(); + sync_db_entry!(v, date_modified) + }, + sync_db_entry!(entry.metadata.hidden, hidden), + ], + [ + // As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null + // So this file_path will be updated at file identifier job + should_unlink_object + .then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())), + ], + ) + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + ( + sync_params + .into_iter() + .map(|(field, value)| { + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: pub_id.clone(), + }, + field, + value, + ) + }) + .collect::>(), + db.file_path() + .update(file_path::pub_id::equals(pub_id), db_params) + .select(file_path::select!({ id })), + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + let updated = sync + .write_ops( + db, + (sync_stuff.into_iter().flatten().collect(), paths_to_update), + ) + .await + .map_err(IndexerError::from)?; + + trace!("Updated {updated:?} records"); + + Ok(ExecStatus::Done( + UpdateTaskOutput { + updated_count: updated.len() as u64, + update_duration: start_time.elapsed(), + } + .into_output(), + )) + } +} + +async fn fetch_objects_ids_to_unlink( + walked_entries: &[WalkedEntry], + object_ids_that_should_be_unlinked: &mut HashSet, + db: &PrismaClient, +) -> Result<(), IndexerError> { + if object_ids_that_should_be_unlinked.is_empty() { + // First we consult which file paths we should unlink + let object_ids = walked_entries + .iter() + .filter_map(|entry| entry.maybe_object_id) + .collect::>() // Removing possible duplicates + .into_iter() + .collect::>(); + + *object_ids_that_should_be_unlinked = db + ._batch( + object_ids + .iter() + .map(|object_id| { + db.file_path() + .count(vec![file_path::object_id::equals(Some(*object_id))]) + }) + .collect::>(), + ) + .await? + .into_iter() + .zip(object_ids) + .filter_map(|(count, object_id)| (count > 1).then_some(object_id)) + .collect::>(); + } + + Ok(()) +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker.rs new file mode 100644 index 000000000..7b8eefd4d --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker.rs @@ -0,0 +1,1516 @@ +use crate::{ + indexer::{IndexerError, NonCriticalIndexerError}, + Error, NonCriticalJobError, +}; + +use sd_core_file_path_helper::{FilePathError, FilePathMetadata, IsolatedFilePathData}; +use sd_core_indexer_rules::{IndexerRuler, MetadataForIndexerRules, RuleKind}; +use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker}; + +use sd_prisma::prisma::file_path; +use sd_task_system::{ + check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, + TaskDispatcher, TaskHandle, TaskId, +}; +use sd_utils::{db::inode_from_db, error::FileIOError}; + +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + fmt, + fs::Metadata, + future::Future, + hash::{Hash, Hasher}, + mem, + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; + +use chrono::{DateTime, Duration as ChronoDuration, FixedOffset, Utc}; +use futures_concurrency::future::Join; +use serde::{Deserialize, Serialize}; +use tokio::{fs, time::Instant}; +use tokio_stream::{wrappers::ReadDirStream, StreamExt}; +use tracing::trace; +use uuid::Uuid; + +/// `WalkedEntry` represents a single path in the filesystem +#[derive(Debug, Serialize, Deserialize)] +pub struct WalkedEntry { + pub pub_id: Uuid, + pub maybe_object_id: file_path::object_id::Type, + pub iso_file_path: IsolatedFilePathData<'static>, + pub metadata: FilePathMetadata, +} + +impl PartialEq for WalkedEntry { + fn eq(&self, other: &Self) -> bool { + self.iso_file_path == other.iso_file_path + } +} + +impl Eq for WalkedEntry {} + +impl Hash for WalkedEntry { + fn hash(&self, state: &mut H) { + self.iso_file_path.hash(state); + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct WalkingEntry { + iso_file_path: IsolatedFilePathData<'static>, + metadata: FilePathMetadata, +} + +impl From for WalkedEntry { + fn from( + WalkingEntry { + iso_file_path, + metadata, + }: WalkingEntry, + ) -> Self { + Self { + pub_id: Uuid::new_v4(), + maybe_object_id: None, + iso_file_path, + metadata, + } + } +} + +impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { + fn from( + ( + pub_id, + maybe_object_id, + WalkingEntry { + iso_file_path, + metadata, + }, + ): (Uuid, file_path::object_id::Type, WalkingEntry), + ) -> Self { + Self { + pub_id, + maybe_object_id, + iso_file_path, + metadata, + } + } +} + +pub trait IsoFilePathFactory: Clone + Send + Sync + fmt::Debug + 'static { + fn build( + &self, + path: impl AsRef, + is_dir: bool, + ) -> Result, FilePathError>; +} + +pub trait WalkerDBProxy: Clone + Send + Sync + fmt::Debug + 'static { + fn fetch_file_paths( + &self, + found_paths: Vec, + ) -> impl Future, IndexerError>> + Send; + + fn fetch_file_paths_to_remove( + &self, + parent_iso_file_path: &IsolatedFilePathData<'_>, + unique_location_id_materialized_path_name_extension_params: Vec, + ) -> impl Future, NonCriticalIndexerError>> + Send; +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct ToWalkEntry { + path: PathBuf, + parent_dir_accepted_by_its_children: Option, +} + +impl> From

for ToWalkEntry { + fn from(path: P) -> Self { + Self { + path: path.as_ref().into(), + parent_dir_accepted_by_its_children: None, + } + } +} + +#[derive(Debug)] +pub struct WalkTaskOutput { + pub to_create: Vec, + pub to_update: Vec, + pub to_remove: Vec, + pub accepted_ancestors: HashSet, + pub errors: Vec, + pub directory_iso_file_path: IsolatedFilePathData<'static>, + pub total_size: u64, + pub handles: Vec>, + pub scan_time: Duration, +} + +#[derive(Debug, Serialize, Deserialize)] +struct InnerMetadata { + pub is_dir: bool, + pub is_symlink: bool, + pub inode: u64, + pub size_in_bytes: u64, + pub hidden: bool, + pub created_at: DateTime, + pub modified_at: DateTime, +} + +impl InnerMetadata { + fn new(path: impl AsRef, metadata: &Metadata) -> Result { + let FilePathMetadata { + inode, + size_in_bytes, + created_at, + modified_at, + hidden, + } = FilePathMetadata::from_path(path, metadata) + .map_err(|e| NonCriticalIndexerError::FilePathMetadata(e.to_string()))?; + + Ok(Self { + is_dir: metadata.is_dir(), + is_symlink: metadata.is_symlink(), + inode, + size_in_bytes, + hidden, + created_at, + modified_at, + }) + } +} + +impl MetadataForIndexerRules for InnerMetadata { + fn is_dir(&self) -> bool { + self.is_dir + } +} + +impl From for FilePathMetadata { + fn from(metadata: InnerMetadata) -> Self { + Self { + inode: metadata.inode, + size_in_bytes: metadata.size_in_bytes, + hidden: metadata.hidden, + created_at: metadata.created_at, + modified_at: metadata.modified_at, + } + } +} + +#[derive(Debug)] +enum WalkerStage { + Start, + Walking { + read_dir_stream: ReadDirStream, + found_paths: Vec, + }, + CollectingMetadata { + found_paths: Vec, + }, + CheckingIndexerRules { + paths_and_metadatas: HashMap, + }, + ProcessingRulesResults { + paths_metadatas_and_acceptance: + HashMap>)>, + }, + GatheringFilePathsToRemove { + accepted_paths: HashMap, + maybe_to_keep_walking: Option>, + accepted_ancestors: HashSet, + }, + Finalize { + walking_entries: Vec, + accepted_ancestors: HashSet, + to_remove_entries: Vec, + maybe_to_keep_walking: Option>, + }, +} + +#[derive(Debug, Serialize, Deserialize)] +struct WalkDirSaveState { + id: TaskId, + entry: ToWalkEntry, + root: Arc, + entry_iso_file_path: IsolatedFilePathData<'static>, + stage: WalkerStageSaveState, + errors: Vec, + scan_time: Duration, +} + +#[derive(Debug, Serialize, Deserialize)] +enum WalkerStageSaveState { + Start, + CollectingMetadata { + found_paths: Vec, + }, + CheckingIndexerRules { + paths_and_metadatas: HashMap, + }, + ProcessingRulesResults { + paths_metadatas_and_acceptance: + HashMap>)>, + }, + GatheringFilePathsToRemove { + accepted_paths: HashMap, + maybe_to_keep_walking: Option>, + accepted_ancestors: HashSet, + }, + Finalize { + walking_entries: Vec, + accepted_ancestors: HashSet, + to_remove_entries: Vec, + maybe_to_keep_walking: Option>, + }, +} + +impl From for WalkerStageSaveState { + fn from(stage: WalkerStage) -> Self { + match stage { + // We can't store the current state of `ReadDirStream` so we start again from the beginning + WalkerStage::Start | WalkerStage::Walking { .. } => Self::Start, + WalkerStage::CollectingMetadata { found_paths } => { + Self::CollectingMetadata { found_paths } + } + WalkerStage::CheckingIndexerRules { + paths_and_metadatas, + } => Self::CheckingIndexerRules { + paths_and_metadatas, + }, + WalkerStage::ProcessingRulesResults { + paths_metadatas_and_acceptance, + } => Self::ProcessingRulesResults { + paths_metadatas_and_acceptance, + }, + WalkerStage::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + } => Self::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + }, + WalkerStage::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + } => Self::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + }, + } + } +} + +impl From for WalkerStage { + fn from(value: WalkerStageSaveState) -> Self { + match value { + WalkerStageSaveState::Start => Self::Start, + WalkerStageSaveState::CollectingMetadata { found_paths } => { + Self::CollectingMetadata { found_paths } + } + WalkerStageSaveState::CheckingIndexerRules { + paths_and_metadatas, + } => Self::CheckingIndexerRules { + paths_and_metadatas, + }, + WalkerStageSaveState::ProcessingRulesResults { + paths_metadatas_and_acceptance, + } => Self::ProcessingRulesResults { + paths_metadatas_and_acceptance, + }, + WalkerStageSaveState::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + } => Self::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + }, + WalkerStageSaveState::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + } => Self::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + }, + } + } +} + +#[derive(Debug)] +pub struct WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + id: TaskId, + entry: ToWalkEntry, + root: Arc, + entry_iso_file_path: IsolatedFilePathData<'static>, + indexer_ruler: IndexerRuler, + iso_file_path_factory: IsoPathFactory, + db_proxy: DBProxy, + stage: WalkerStage, + maybe_dispatcher: Option, + errors: Vec, + scan_time: Duration, +} + +impl WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + pub fn new( + entry: impl Into + Send, + root: Arc, + indexer_ruler: IndexerRuler, + iso_file_path_factory: IsoPathFactory, + db_proxy: DBProxy, + maybe_dispatcher: Option, + ) -> Result { + let entry = entry.into(); + Ok(Self { + id: TaskId::new_v4(), + root, + indexer_ruler, + entry_iso_file_path: iso_file_path_factory.build(&entry.path, true)?, + iso_file_path_factory, + db_proxy, + stage: WalkerStage::Start, + entry, + maybe_dispatcher, + errors: Vec::new(), + scan_time: Duration::ZERO, + }) + } +} + +impl SerializableTask + for WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + type SerializeError = rmp_serde::encode::Error; + type DeserializeError = rmp_serde::decode::Error; + type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory, Dispatcher); + + async fn serialize(self) -> Result, Self::SerializeError> { + rmp_serde::to_vec_named(&WalkDirSaveState { + id: self.id, + entry: self.entry, + root: self.root, + entry_iso_file_path: self.entry_iso_file_path, + stage: self.stage.into(), + errors: self.errors, + scan_time: self.scan_time, + }) + } + + async fn deserialize( + data: &[u8], + (indexer_ruler, db_proxy, iso_file_path_factory, dispatcher): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |WalkDirSaveState { + id, + entry, + root, + entry_iso_file_path, + stage, + errors, + scan_time, + }| Self { + id, + entry, + root, + entry_iso_file_path, + indexer_ruler, + iso_file_path_factory, + db_proxy, + stage: stage.into(), + maybe_dispatcher: Some(dispatcher), + errors, + scan_time, + }, + ) + } +} + +#[async_trait::async_trait] +impl Task + for WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + fn id(&self) -> TaskId { + self.id + } + + #[allow(clippy::too_many_lines)] + async fn run(&mut self, interrupter: &Interrupter) -> Result { + let Self { + root, + entry: ToWalkEntry { + path, + parent_dir_accepted_by_its_children, + }, + entry_iso_file_path, + iso_file_path_factory, + indexer_ruler, + db_proxy, + stage, + maybe_dispatcher, + errors, + scan_time, + .. + } = self; + + let start_time = Instant::now(); + + let (to_create, to_update, total_size, to_remove, accepted_ancestors, handles) = loop { + match stage { + WalkerStage::Start => { + *stage = WalkerStage::Walking { + read_dir_stream: ReadDirStream::new(fs::read_dir(&path).await.map_err( + |e| { + IndexerError::FileIO( + (&path, e, "Failed to open directory to read its entries") + .into(), + ) + }, + )?), + found_paths: Vec::new(), + }; + } + + WalkerStage::Walking { + read_dir_stream, + found_paths, + } => { + while let Some(res) = read_dir_stream.next().await { + match res { + Ok(dir_entry) => { + found_paths.push(dir_entry.path()); + } + Err(e) => { + errors.push(NonCriticalJobError::Indexer( + NonCriticalIndexerError::FailedDirectoryEntry( + FileIOError::from((&path, e)).to_string(), + ), + )); + } + } + + check_interruption!(interrupter, start_time, scan_time); + } + + *stage = WalkerStage::CollectingMetadata { + found_paths: mem::take(found_paths), + }; + + check_interruption!(interrupter, start_time, scan_time); + } + + WalkerStage::CollectingMetadata { found_paths } => { + *stage = WalkerStage::CheckingIndexerRules { + paths_and_metadatas: collect_metadata(found_paths, errors).await, + }; + + check_interruption!(interrupter, start_time, scan_time); + } + + WalkerStage::CheckingIndexerRules { + paths_and_metadatas, + } => { + *stage = WalkerStage::ProcessingRulesResults { + paths_metadatas_and_acceptance: apply_indexer_rules( + paths_and_metadatas, + indexer_ruler, + errors, + ) + .await, + }; + + check_interruption!(interrupter, start_time, scan_time); + } + + WalkerStage::ProcessingRulesResults { + paths_metadatas_and_acceptance, + } => { + let mut maybe_to_keep_walking = maybe_dispatcher.is_some().then(Vec::new); + let (accepted_paths, accepted_ancestors) = process_rules_results( + root, + iso_file_path_factory, + *parent_dir_accepted_by_its_children, + paths_metadatas_and_acceptance, + &mut maybe_to_keep_walking, + errors, + ) + .await; + + *stage = WalkerStage::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + }; + + check_interruption!(interrupter, start_time, scan_time); + } + + WalkerStage::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + } => { + let (walking_entries, to_remove_entries) = gather_file_paths_to_remove( + accepted_paths, + entry_iso_file_path, + iso_file_path_factory, + db_proxy, + errors, + ) + .await; + + *stage = WalkerStage::Finalize { + walking_entries, + to_remove_entries, + maybe_to_keep_walking: mem::take(maybe_to_keep_walking), + accepted_ancestors: mem::take(accepted_ancestors), + }; + + check_interruption!(interrupter, start_time, scan_time); + } + + // From this points onwards, we will not allow to be interrupted anymore + WalkerStage::Finalize { + walking_entries, + to_remove_entries, + maybe_to_keep_walking, + accepted_ancestors, + } => { + let (to_create, to_update, total_size) = + segregate_creates_and_updates(walking_entries, db_proxy).await?; + + let handles = keep_walking( + root, + indexer_ruler, + iso_file_path_factory, + db_proxy, + maybe_to_keep_walking, + maybe_dispatcher, + errors, + ) + .await; + + break ( + to_create, + to_update, + total_size, + mem::take(to_remove_entries), + mem::take(accepted_ancestors), + handles, + ); + } + } + }; + + *scan_time += start_time.elapsed(); + + // Taking out some data as the task is finally complete + Ok(ExecStatus::Done( + WalkTaskOutput { + to_create, + to_update, + to_remove, + accepted_ancestors, + errors: mem::take(errors), + directory_iso_file_path: mem::take(entry_iso_file_path), + total_size, + handles, + scan_time: *scan_time, + } + .into_output(), + )) + } +} + +async fn segregate_creates_and_updates( + walking_entries: &mut Vec, + db_proxy: &impl WalkerDBProxy, +) -> Result<(Vec, Vec, u64), IndexerError> { + if walking_entries.is_empty() { + Ok((vec![], vec![], 0)) + } else { + let iso_paths_already_in_db = db_proxy + .fetch_file_paths( + walking_entries + .iter() + .map(|entry| file_path::WhereParam::from(&entry.iso_file_path)) + .collect(), + ) + .await? + .into_iter() + .flat_map(|file_path| { + IsolatedFilePathData::try_from(file_path.clone()) + .map(|iso_file_path| (iso_file_path, file_path)) + }) + .collect::>(); + + Ok(walking_entries.drain(..).fold( + (Vec::new(), Vec::new(), 0), + |(mut to_create, mut to_update, mut total_size), entry| { + let WalkingEntry{iso_file_path, metadata} = &entry; + + total_size += metadata.size_in_bytes; + + if let Some(file_path) = iso_paths_already_in_db.get(iso_file_path) { + if let (Some(inode), Some(date_modified)) = ( + &file_path.inode, + &file_path.date_modified, + ) { + if ( + inode_from_db(&inode[0..8]) != metadata.inode + // Datetimes stored in DB loses a bit of precision, so we need to check against a delta + // instead of using != operator + || DateTime::::from(metadata.modified_at) - *date_modified + > ChronoDuration::milliseconds(1) || file_path.hidden.is_none() || metadata.hidden != file_path.hidden.unwrap_or_default() + ) + // We ignore the size of directories because it is not reliable, we need to + // calculate it ourselves later + && !( + iso_file_path.to_parts().is_dir + && metadata.size_in_bytes + != file_path + .size_in_bytes_bytes + .as_ref() + .map(|size_in_bytes_bytes| { + u64::from_be_bytes([ + size_in_bytes_bytes[0], + size_in_bytes_bytes[1], + size_in_bytes_bytes[2], + size_in_bytes_bytes[3], + size_in_bytes_bytes[4], + size_in_bytes_bytes[5], + size_in_bytes_bytes[6], + size_in_bytes_bytes[7], + ]) + }) + .unwrap_or_default() + ) { + to_update.push( + WalkedEntry::from((sd_utils::from_bytes_to_uuid(&file_path.pub_id), file_path.object_id, entry)), + ); + } + } + } else { + to_create.push(WalkedEntry::from(entry)); + } + + (to_create, to_update, total_size) + } + )) + } +} + +async fn keep_walking( + root: &Arc, + indexer_ruler: &IndexerRuler, + iso_file_path_factory: &impl IsoFilePathFactory, + db_proxy: &impl WalkerDBProxy, + maybe_to_keep_walking: &mut Option>, + dispatcher: &Option>, + errors: &mut Vec, +) -> Vec> { + if let (Some(dispatcher), Some(to_keep_walking)) = (dispatcher, maybe_to_keep_walking) { + dispatcher + .dispatch_many( + to_keep_walking + .drain(..) + .map(|entry| { + WalkDirTask::new( + entry, + Arc::clone(root), + indexer_ruler.clone(), + iso_file_path_factory.clone(), + db_proxy.clone(), + Some(dispatcher.clone()), + ) + .map_err(|e| NonCriticalIndexerError::DispatchKeepWalking(e.to_string())) + }) + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()), + ) + .await + } else { + Vec::new() + } +} + +async fn collect_metadata( + found_paths: &mut Vec, + errors: &mut Vec, +) -> HashMap { + found_paths + .drain(..) + .map(|current_path| async move { + fs::metadata(¤t_path) + .await + .map_err(|e| { + NonCriticalIndexerError::Metadata( + FileIOError::from((¤t_path, e)).to_string(), + ) + }) + .and_then(|metadata| { + InnerMetadata::new(¤t_path, &metadata) + .map(|metadata| (current_path, metadata)) + }) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect() +} + +async fn apply_indexer_rules( + paths_and_metadatas: &mut HashMap, + indexer_ruler: &IndexerRuler, + errors: &mut Vec, +) -> HashMap>)> { + paths_and_metadatas + .drain() + // TODO: Hard ignoring symlinks for now, but this should be configurable + .filter(|(_, metadata)| !metadata.is_symlink) + .map(|(current_path, metadata)| async { + indexer_ruler + .apply_all(¤t_path, &metadata) + .await + .map(|acceptance_per_rule_kind| { + (current_path, (metadata, acceptance_per_rule_kind)) + }) + .map_err(|e| NonCriticalIndexerError::IndexerRule(e.to_string())) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect() +} + +async fn process_rules_results( + root: &Arc, + iso_file_path_factory: &impl IsoFilePathFactory, + parent_dir_accepted_by_its_children: Option, + paths_metadatas_and_acceptance: &mut HashMap< + PathBuf, + (InnerMetadata, HashMap>), + >, + maybe_to_keep_walking: &mut Option>, + errors: &mut Vec, +) -> (HashMap, HashSet) { + let root = root.as_ref(); + + let (accepted, accepted_ancestors) = paths_metadatas_and_acceptance.drain().fold( + (HashMap::new(), HashMap::new()), + |(mut accepted, mut accepted_ancestors), + (current_path, (metadata, acceptance_per_rule_kind))| { + // Accept by children has three states, + // None if we don't now yet or if this check doesn't apply + // Some(true) if this check applies and it passes + // Some(false) if this check applies and it was rejected + // and we pass the current parent state to its children + let mut accept_by_children_dir = parent_dir_accepted_by_its_children; + + if rejected_by_reject_glob(&acceptance_per_rule_kind) { + trace!( + "Path {} rejected by `RuleKind::RejectFilesByGlob`", + current_path.display() + ); + + return (accepted, accepted_ancestors); + } + + let is_dir = metadata.is_dir(); + + if is_dir + && process_and_maybe_reject_by_directory_rules( + ¤t_path, + &acceptance_per_rule_kind, + &mut accept_by_children_dir, + maybe_to_keep_walking, + ) { + trace!( + "Path {} rejected by rule `RuleKind::RejectIfChildrenDirectoriesArePresent`", + current_path.display(), + ); + return (accepted, accepted_ancestors); + } + + if rejected_by_accept_glob(&acceptance_per_rule_kind) { + trace!( + "Path {} reject because it didn't passed in any AcceptFilesByGlob rules", + current_path.display() + ); + return (accepted, accepted_ancestors); + } + + if accept_by_children_dir.unwrap_or(true) { + accept_ancestors( + current_path, + metadata, + root, + &mut accepted, + iso_file_path_factory, + &mut accepted_ancestors, + errors, + ); + } + + (accepted, accepted_ancestors) + }, + ); + + ( + accepted, + accepted_ancestors + .into_iter() + .map(|(ancestor_iso_file_path, ancestor_path)| async move { + fs::metadata(&ancestor_path) + .await + .map_err(|e| { + NonCriticalIndexerError::Metadata( + FileIOError::from((&ancestor_path, e)).to_string(), + ) + }) + .and_then(|metadata| { + FilePathMetadata::from_path(&ancestor_path, &metadata) + .map(|metadata| { + WalkingEntry { + iso_file_path: ancestor_iso_file_path, + metadata, + } + .into() + }) + .map_err(|e| NonCriticalIndexerError::FilePathMetadata(e.to_string())) + }) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect(), + ) +} + +fn process_and_maybe_reject_by_directory_rules( + current_path: &Path, + acceptance_per_rule_kind: &HashMap>, + accept_by_children_dir: &mut Option, + maybe_to_keep_walking: &mut Option>, +) -> bool { + // If it is a directory, first we check if we must reject it and its children entirely + if rejected_by_children_directories(acceptance_per_rule_kind) { + return true; + } + + // Then we check if we must accept it and its children + if let Some(accepted_by_children_rules) = + acceptance_per_rule_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) + { + if accepted_by_children_rules.iter().any(|accept| *accept) { + *accept_by_children_dir = Some(true); + } + + // If it wasn't accepted then we mark as rejected + if accept_by_children_dir.is_none() { + trace!( + "Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule", + current_path.display() + ); + *accept_by_children_dir = Some(false); + } + } + + // Then we mark this directory to maybe be walked in too + if let Some(ref mut to_keep_walking) = maybe_to_keep_walking { + to_keep_walking.push(ToWalkEntry { + path: current_path.to_path_buf(), + parent_dir_accepted_by_its_children: *accept_by_children_dir, + }); + } + + false +} + +fn accept_ancestors( + current_path: PathBuf, + metadata: InnerMetadata, + root: &Path, + accepted: &mut HashMap, + iso_file_path_factory: &impl IsoFilePathFactory, + accepted_ancestors: &mut HashMap, PathBuf>, + errors: &mut Vec, +) { + // If the ancestors directories wasn't indexed before, now we do + for ancestor in current_path + .ancestors() + .skip(1) // Skip the current directory as it was already indexed + .take_while(|&ancestor| ancestor != root) + { + if let Ok(iso_file_path) = iso_file_path_factory + .build(ancestor, true) + .map_err(|e| errors.push(NonCriticalIndexerError::IsoFilePath(e.to_string()).into())) + { + match accepted_ancestors.entry(iso_file_path) { + Entry::Occupied(_) => { + // If we already accepted this ancestor, then it will contain + // also all if its ancestors too, so we can stop here + break; + } + Entry::Vacant(entry) => { + trace!("Accepted ancestor {}", ancestor.display()); + entry.insert(ancestor.to_path_buf()); + } + } + } + } + + accepted.insert(current_path, metadata); +} + +fn rejected_by_accept_glob(acceptance_per_rule_kind: &HashMap>) -> bool { + acceptance_per_rule_kind + .get(&RuleKind::AcceptFilesByGlob) + .map_or(false, |accept_rules| { + accept_rules.iter().all(|accept| !accept) + }) +} + +fn rejected_by_children_directories( + acceptance_per_rule_kind: &HashMap>, +) -> bool { + acceptance_per_rule_kind + .get(&RuleKind::RejectIfChildrenDirectoriesArePresent) + .map_or(false, |reject_results| { + reject_results.iter().any(|reject| !reject) + }) +} + +fn rejected_by_reject_glob(acceptance_per_rule_kind: &HashMap>) -> bool { + acceptance_per_rule_kind + .get(&RuleKind::RejectFilesByGlob) + .map_or(false, |reject_results| { + reject_results.iter().any(|reject| !reject) + }) +} + +async fn gather_file_paths_to_remove( + accepted_paths: &mut HashMap, + entry_iso_file_path: &IsolatedFilePathData<'_>, + iso_file_path_factory: &impl IsoFilePathFactory, + db_proxy: &impl WalkerDBProxy, + errors: &mut Vec, +) -> (Vec, Vec) { + let (walking, to_delete_params) = accepted_paths + .drain() + .filter_map(|(path, metadata)| { + iso_file_path_factory + .build(&path, metadata.is_dir()) + .map(|iso_file_path| { + let params = file_path::WhereParam::from(&iso_file_path); + + ( + WalkingEntry { + iso_file_path, + metadata: FilePathMetadata::from(metadata), + }, + params, + ) + }) + .map_err(|e| { + errors.push(NonCriticalIndexerError::IsoFilePath(e.to_string()).into()); + }) + .ok() + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + // We continue the function even if we fail to fetch `file_path`s to remove, + // the DB will have old `file_path`s but at least this is better than + // don't adding the newly indexed paths + let to_remove_entries = db_proxy + .fetch_file_paths_to_remove(entry_iso_file_path, to_delete_params) + .await + .map_err(|e| errors.push(e.into())) + .unwrap_or_default(); + + (walking, to_remove_entries) +} + +#[cfg(test)] +mod tests { + use super::*; + + use sd_core_indexer_rules::{IndexerRule, RulePerKind}; + use sd_task_system::{TaskOutput, TaskStatus, TaskSystem}; + + use chrono::Utc; + use futures_concurrency::future::FutureGroup; + use globset::{Glob, GlobSetBuilder}; + use lending_stream::{LendingStream, StreamExt}; + use tempfile::{tempdir, TempDir}; + use tokio::fs; + use tracing::debug; + use tracing_test::traced_test; + + #[derive(Debug, Clone)] + struct DummyIsoPathFactory { + root_path: Arc, + } + + impl IsoFilePathFactory for DummyIsoPathFactory { + fn build( + &self, + path: impl AsRef, + is_dir: bool, + ) -> Result, FilePathError> { + IsolatedFilePathData::new(0, self.root_path.as_ref(), path, is_dir).map_err(Into::into) + } + } + + #[derive(Debug, Clone)] + struct DummyDBProxy; + + impl WalkerDBProxy for DummyDBProxy { + async fn fetch_file_paths( + &self, + _: Vec, + ) -> Result, IndexerError> { + Ok(vec![]) + } + + async fn fetch_file_paths_to_remove( + &self, + _: &IsolatedFilePathData<'_>, + _: Vec, + ) -> Result, NonCriticalIndexerError> { + Ok(vec![]) + } + } + + fn new_indexer_rule( + name: impl Into, + default: bool, + rules: Vec, + ) -> IndexerRule { + IndexerRule { + id: None, + name: name.into(), + default, + rules, + date_created: Utc::now(), + date_modified: Utc::now(), + } + } + + async fn prepare_location() -> TempDir { + // root + // |__ rust_project + // | |__ .git + // | |__ + // | |__ Cargo.toml + // | |__ src + // | | |__ main.rs + // | |__ target + // | |__ debug + // | |__ main + // |__ inner + // | |__ node_project + // | |__ .git + // | |__ + // | |__ package.json + // | |__ src + // | | |__ App.tsx + // | |__ node_modules + // | |__ react + // | |__ package.json + // |__ photos + // |__ photo1.png + // |__ photo2.jpg + // |__ photo3.jpeg + // |__ text.txt + + let root = tempdir().unwrap(); + let root_path = root.path(); + let rust_project = root_path.join("rust_project"); + let inner_project = root_path.join("inner"); + let node_project = inner_project.join("node_project"); + let photos = root_path.join("photos"); + + fs::create_dir(&rust_project).await.unwrap(); + fs::create_dir(&inner_project).await.unwrap(); + fs::create_dir(&node_project).await.unwrap(); + fs::create_dir(&photos).await.unwrap(); + + // Making rust and node projects a git repository + fs::create_dir(rust_project.join(".git")).await.unwrap(); + fs::create_dir(node_project.join(".git")).await.unwrap(); + + // Populating rust project + fs::File::create(rust_project.join("Cargo.toml")) + .await + .unwrap(); + let rust_src_dir = rust_project.join("src"); + fs::create_dir(&rust_src_dir).await.unwrap(); + fs::File::create(rust_src_dir.join("main.rs")) + .await + .unwrap(); + let rust_target_dir = rust_project.join("target"); + fs::create_dir(&rust_target_dir).await.unwrap(); + let rust_build_dir = rust_target_dir.join("debug"); + fs::create_dir(&rust_build_dir).await.unwrap(); + fs::File::create(rust_build_dir.join("main")).await.unwrap(); + + // Populating node project + fs::File::create(node_project.join("package.json")) + .await + .unwrap(); + let node_src_dir = node_project.join("src"); + fs::create_dir(&node_src_dir).await.unwrap(); + fs::File::create(node_src_dir.join("App.tsx")) + .await + .unwrap(); + let node_modules = node_project.join("node_modules"); + fs::create_dir(&node_modules).await.unwrap(); + let node_modules_dep = node_modules.join("react"); + fs::create_dir(&node_modules_dep).await.unwrap(); + fs::File::create(node_modules_dep.join("package.json")) + .await + .unwrap(); + + // Photos directory + for photo in ["photo1.png", "photo2.jpg", "photo3.jpeg", "text.txt"] { + fs::File::create(photos.join(photo)).await.unwrap(); + } + + root + } + + async fn run_test( + root_path: &Path, + indexer_ruler: IndexerRuler, + expected: HashSet, + ) { + let system = TaskSystem::new(); + + let handle = system + .dispatch( + WalkDirTask::new( + root_path.to_path_buf(), + Arc::new(root_path.to_path_buf()), + indexer_ruler, + DummyIsoPathFactory { + root_path: Arc::new(root_path.to_path_buf()), + }, + DummyDBProxy, + Some(system.get_dispatcher()), + ) + .unwrap(), + ) + .await; + + let mut group = FutureGroup::new(); + + group.insert(handle); + + let mut group = group.lend_mut(); + + let mut actual_set = HashSet::new(); + + let mut ancestors = HashSet::new(); + + while let Some((group, task_result)) = group.next().await { + let TaskStatus::Done((_task_id, TaskOutput::Out(output))) = task_result.unwrap() else { + panic!("unexpected task output") + }; + + let WalkTaskOutput { + to_create, + accepted_ancestors, + errors, + handles, + .. + } = *output.downcast::().unwrap(); + + assert!(errors.is_empty(), "errors: {errors:#?}"); + + actual_set.extend(to_create); + ancestors.extend(accepted_ancestors); + + for handle in handles { + group.insert(handle); + } + } + + for actual in &actual_set { + ancestors.remove(actual); + } + + if !ancestors.is_empty() { + debug!("Adding ancestors to actual: {:#?}", ancestors); + actual_set.extend(ancestors); + } + + assert_eq!( + actual_set, + expected, + "Expected \\ Actual: {:#?};\n Actual \\ Expected: {:#?}", + expected.difference(&actual_set), + actual_set.difference(&expected) + ); + } + + #[tokio::test] + #[traced_test] + async fn test_walk_without_rules() { + let root = prepare_location().await; + let root_path = root.path(); + + let metadata = FilePathMetadata { + inode: 0, + size_in_bytes: 0, + created_at: Utc::now(), + modified_at: Utc::now(), + hidden: false, + }; + + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + let maybe_object_id = None; + + #[rustfmt::skip] + let expected = [ + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/text.txt"), false), metadata }, + ] + .into_iter() + .collect::>(); + + run_test(root_path, IndexerRuler::default(), expected).await; + } + + #[tokio::test] + #[traced_test] + async fn test_only_photos() { + let root = prepare_location().await; + let root_path = root.path(); + + let metadata = FilePathMetadata { + inode: 0, + size_in_bytes: 0, + created_at: Utc::now(), + modified_at: Utc::now(), + hidden: false, + }; + + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + let maybe_object_id = None; + + #[rustfmt::skip] + let expected = [ + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, + ] + .into_iter() + .collect::>(); + + run_test( + root_path, + IndexerRuler::new(vec![new_indexer_rule( + "only photos", + false, + vec![RulePerKind::AcceptFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()) + .build() + .unwrap(), + )], + )]), + expected, + ) + .await; + } + + #[tokio::test] + #[traced_test] + async fn test_git_repos() { + let root = prepare_location().await; + let root_path = root.path(); + + let metadata = FilePathMetadata { + inode: 0, + size_in_bytes: 0, + created_at: Utc::now(), + modified_at: Utc::now(), + hidden: false, + }; + + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + let maybe_object_id = None; + + #[rustfmt::skip] + let expected = [ + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, + ] + .into_iter() + .collect::>(); + + run_test( + root_path, + IndexerRuler::new(vec![new_indexer_rule( + "git repos", + false, + vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( + HashSet::from([".git".to_string()]), + )], + )]), + expected, + ) + .await; + } + + #[tokio::test] + #[traced_test] + async fn git_repos_without_deps_or_build_dirs() { + let root = prepare_location().await; + let root_path = root.path(); + + let metadata = FilePathMetadata { + inode: 0, + size_in_bytes: 0, + created_at: Utc::now(), + modified_at: Utc::now(), + hidden: false, + }; + + let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); + let pub_id = Uuid::new_v4(); + let maybe_object_id = None; + + #[rustfmt::skip] + let expected = [ + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + ] + .into_iter() + .collect::>(); + + run_test( + root_path, + IndexerRuler::new(vec![ + new_indexer_rule( + "git repos", + false, + vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( + HashSet::from([".git".into()]), + )], + ), + new_indexer_rule( + "reject node_modules", + false, + vec![RulePerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{**/node_modules/*,**/node_modules}").unwrap()) + .build() + .unwrap(), + )], + ), + new_indexer_rule( + "reject rust build dir", + false, + vec![RulePerKind::RejectFilesByGlob( + vec![], + GlobSetBuilder::new() + .add(Glob::new("{**/target/*,**/target}").unwrap()) + .build() + .unwrap(), + )], + ), + ]), + expected, + ) + .await; + } +} diff --git a/core/crates/heavy-lifting/src/job_system/error.rs b/core/crates/heavy-lifting/src/job_system/error.rs new file mode 100644 index 000000000..af212ef4e --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/error.rs @@ -0,0 +1,61 @@ +use crate::Error; + +use sd_utils::error::FileIOError; + +use prisma_client_rust::QueryError; + +use super::{job::JobName, report::ReportError, JobId}; + +#[derive(thiserror::Error, Debug)] +pub enum JobSystemError { + #[error("job not found: ")] + NotFound(JobId), + #[error("job already running: ")] + AlreadyRunning { + new_id: JobId, + job_name: JobName, + already_running_id: JobId, + }, + + #[error("job canceled: ")] + Canceled(JobId), + + #[error("failed to load job reports from database to resume jobs: {0}")] + LoadReportsForResume(#[from] QueryError), + + #[error("failed to serialize job to be saved and resumed later: {0}")] + Serialize(#[from] rmp_serde::encode::Error), + + #[error("failed to deserialize job to be resumed: {0}")] + Deserialize(#[from] rmp_serde::decode::Error), + + #[error("failed to save or load jobs on disk: {0}")] + StoredJobs(FileIOError), + + #[error(transparent)] + Report(#[from] ReportError), + + #[error(transparent)] + Processing(#[from] Error), +} + +impl From for rspc::Error { + fn from(e: JobSystemError) -> Self { + match e { + JobSystemError::NotFound(_) => { + Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e) + } + JobSystemError::AlreadyRunning { .. } => { + Self::with_cause(rspc::ErrorCode::Conflict, e.to_string(), e) + } + + JobSystemError::Canceled(_) => { + Self::with_cause(rspc::ErrorCode::ClientClosedRequest, e.to_string(), e) + } + JobSystemError::Processing(e) => e.into(), + JobSystemError::Report(e) => e.into(), + + _ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e), + } + } +} diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs new file mode 100644 index 000000000..5dfdddfcb --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -0,0 +1,784 @@ +use crate::{Error, NonCriticalJobError}; + +use sd_core_sync::Manager as SyncManager; + +use sd_prisma::prisma::PrismaClient; +use sd_task_system::{ + BaseTaskDispatcher, Task, TaskDispatcher, TaskHandle, TaskRemoteController, TaskSystemError, +}; + +use std::{ + collections::VecDeque, + hash::{DefaultHasher, Hash, Hasher}, + marker::PhantomData, + pin::pin, + sync::Arc, +}; + +use async_channel as chan; +use chrono::{DateTime, Utc}; +use futures::{stream, Future, StreamExt}; +use futures_concurrency::{ + future::{Join, TryJoin}, + stream::Merge, +}; +use serde::{Deserialize, Serialize}; +use specta::Type; +use strum::{Display, EnumString}; +use tokio::spawn; +use tracing::{debug, error, info, warn}; +use uuid::Uuid; + +use super::{ + report::{ + Report, ReportBuilder, ReportInputMetadata, ReportMetadata, ReportOutputMetadata, Status, + }, + Command, JobId, JobSystemError, SerializableJob, SerializedTasks, +}; + +#[derive( + Debug, Serialize, Deserialize, EnumString, Display, Clone, Copy, Type, Hash, PartialEq, Eq, +)] +#[strum(use_phf, serialize_all = "snake_case")] +pub enum JobName { + Indexer, + // TODO: Add more job names as needed +} + +pub enum ReturnStatus { + Completed(JobReturn), + Shutdown(Result>, rmp_serde::encode::Error>), + Canceled, +} + +pub enum ProgressUpdate { + TaskCount(u64), + CompletedTaskCount(u64), + Message(String), + Phase(String), +} + +impl ProgressUpdate { + pub fn message(message: impl Into) -> Self { + Self::Message(message.into()) + } + + pub fn phase(phase: impl Into) -> Self { + Self::Phase(phase.into()) + } +} + +pub trait JobContext: Send + Sync + Clone + 'static { + fn id(&self) -> Uuid; + fn db(&self) -> &Arc; + fn sync(&self) -> &Arc; + fn invalidate_query(&self, query: &'static str); + fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync; + fn progress(&self, updates: Vec); + fn progress_msg(&self, msg: impl Into) { + self.progress(vec![ProgressUpdate::Message(msg.into())]); + } +} + +pub trait Job: Send + Sync + Hash + 'static { + const NAME: JobName; + + #[allow(unused_variables)] + fn resume_tasks( + &mut self, + dispatcher: &JobTaskDispatcher, + ctx: &impl JobContext, + serialized_tasks: SerializedTasks, + ) -> impl Future> + Send { + async move { Ok(()) } + } + + fn run( + self, + dispatcher: JobTaskDispatcher, + ctx: impl JobContext, + ) -> impl Future> + Send; +} + +pub trait IntoJob +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + fn into_job(self) -> Box>; +} + +impl IntoJob for J +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + fn into_job(self) -> Box> { + let id = JobId::new_v4(); + + Box::new(JobHolder { + id, + job: self, + report: ReportBuilder::new(id, J::NAME).build(), + next_jobs: VecDeque::new(), + _ctx: PhantomData, + }) + } +} + +impl IntoJob for JobBuilder +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + fn into_job(self) -> Box> { + self.build() + } +} + +#[derive(Debug)] +pub struct JobReturn { + data: JobOutputData, + metadata: Option, + non_critical_errors: Vec, +} + +impl JobReturn { + #[must_use] + pub fn builder() -> JobReturnBuilder { + JobReturnBuilder { + job_return: Self::default(), + } + } +} + +impl Default for JobReturn { + fn default() -> Self { + Self { + data: JobOutputData::Empty, + metadata: None, + non_critical_errors: vec![], + } + } +} + +#[derive(Debug, Default)] +pub struct JobReturnBuilder { + job_return: JobReturn, +} + +impl JobReturnBuilder { + #[must_use] + pub const fn with_data(mut self, data: JobOutputData) -> Self { + self.job_return.data = data; + self + } + + #[must_use] + pub fn with_metadata(mut self, metadata: impl Into) -> Self { + self.job_return.metadata = Some(metadata.into()); + self + } + + #[must_use] + pub fn with_non_critical_errors(mut self, errors: Vec) -> Self { + if self.job_return.non_critical_errors.is_empty() { + self.job_return.non_critical_errors = errors; + } else { + self.job_return.non_critical_errors.extend(errors); + } + self + } + + #[must_use] + pub fn build(self) -> JobReturn { + self.job_return + } +} + +#[derive(Serialize, Type)] +pub struct JobOutput { + id: JobId, + status: Status, + job_name: JobName, + data: JobOutputData, + metadata: Vec, + non_critical_errors: Vec, +} + +impl JobOutput { + pub fn prepare_output_and_report( + JobReturn { + data, + metadata, + non_critical_errors, + }: JobReturn, + report: &mut Report, + ) -> Self { + if non_critical_errors.is_empty() { + report.status = Status::Completed; + debug!("Job completed", report.id, report.name); + } else { + report.status = Status::CompletedWithErrors; + report.non_critical_errors = non_critical_errors + .iter() + .map(ToString::to_string) + .collect(); + + warn!( + "Job completed with errors: {non_critical_errors:#?}", + report.id, report.name + ); + } + + if let Some(metadata) = metadata { + report.metadata.push(ReportMetadata::Output(metadata)); + } + + report.completed_at = Some(Utc::now()); + + Self { + id: report.id, + status: report.status, + job_name: report.name, + data, + metadata: report.metadata.clone(), + non_critical_errors, + } + } +} + +#[derive(Debug, Serialize, Type)] +pub enum JobOutputData { + Empty, + // TODO: Add more types +} + +pub struct JobBuilder +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + id: JobId, + job: J, + report_builder: ReportBuilder, + next_jobs: VecDeque>>, + _ctx: PhantomData, +} + +impl JobBuilder +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + pub fn build(self) -> Box> { + Box::new(JobHolder { + id: self.id, + job: self.job, + report: self.report_builder.build(), + next_jobs: VecDeque::new(), + _ctx: PhantomData, + }) + } + + pub fn new(job: J) -> Self { + let id = JobId::new_v4(); + Self { + id, + job, + report_builder: ReportBuilder::new(id, J::NAME), + next_jobs: VecDeque::new(), + _ctx: PhantomData, + } + } + + #[must_use] + pub fn with_action(mut self, action: impl Into) -> Self { + self.report_builder = self.report_builder.with_action(action); + self + } + + #[must_use] + pub fn with_parent_id(mut self, parent_id: JobId) -> Self { + self.report_builder = self.report_builder.with_parent_id(parent_id); + self + } + + #[must_use] + pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self { + self.report_builder = self.report_builder.with_metadata(metadata); + self + } + + #[must_use] + pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self { + let next_job_order = self.next_jobs.len() + 1; + + let mut child_job_builder = JobBuilder::new(next).with_parent_id(self.id); + + if let Some(parent_action) = &self.report_builder.action { + child_job_builder = + child_job_builder.with_action(format!("{parent_action}-{next_job_order}")); + } + + self.next_jobs.push_back(child_job_builder.build()); + + self + } +} + +pub struct JobHolder +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + pub(super) id: JobId, + pub(super) job: J, + pub(super) report: Report, + pub(super) next_jobs: VecDeque>>, + pub(super) _ctx: PhantomData, +} + +pub struct JobHandle { + pub(crate) next_jobs: VecDeque>>, + pub(crate) job_ctx: Ctx, + pub(crate) report: Report, + pub(crate) commands_tx: chan::Sender, +} + +impl JobHandle { + pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> { + if self.commands_tx.send(command).await.is_err() { + warn!("Tried to send a {command:?} to a job that was already completed"); + + Ok(()) + } else { + self.command_children(command).await + } + } + + pub async fn command_children(&mut self, command: Command) -> Result<(), JobSystemError> { + let (new_status, completed_at) = match command { + Command::Pause => (Status::Paused, None), + Command::Resume => return Ok(()), + Command::Cancel => (Status::Canceled, Some(Utc::now())), + }; + + self.next_jobs + .iter_mut() + .map(|dyn_job| dyn_job.report_mut()) + .map(|next_job_report| async { + next_job_report.status = new_status; + next_job_report.completed_at = completed_at; + + next_job_report.update(self.job_ctx.db()).await + }) + .collect::>() + .try_join() + .await + .map(|_| ()) + .map_err(Into::into) + } + + pub async fn register_start( + &mut self, + start_time: DateTime, + ) -> Result<(), JobSystemError> { + let Self { + next_jobs, + report, + job_ctx, + .. + } = self; + + report.status = Status::Running; + if report.started_at.is_none() { + report.started_at = Some(start_time); + } + + let db = job_ctx.db(); + + // If the report doesn't have a created_at date, it's a new report + if report.created_at.is_none() { + report.create(db).await?; + } else { + // Otherwise it can be a job being resumed or a children job that was already been created + report.update(db).await?; + } + + // Registering children jobs + next_jobs + .iter_mut() + .map(|dyn_job| dyn_job.report_mut()) + .map(|next_job_report| async { + if next_job_report.created_at.is_none() { + next_job_report.create(db).await + } else { + Ok(()) + } + }) + .collect::>() + .try_join() + .await + .map(|_| ()) + .map_err(Into::into) + } + + pub async fn complete_job( + &mut self, + job_return: JobReturn, + ) -> Result { + let Self { + report, job_ctx, .. + } = self; + + let output = JobOutput::prepare_output_and_report(job_return, report); + + report.update(job_ctx.db()).await?; + + Ok(output) + } + + pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> { + let Self { + report, job_ctx, .. + } = self; + error!( + "Job failed with a critical error: {e:#?};", + report.id, report.name + ); + + report.status = Status::Failed; + report.critical_error = Some(e.to_string()); + report.completed_at = Some(Utc::now()); + + report.update(job_ctx.db()).await?; + + self.command_children(Command::Cancel).await + } + + pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> { + let Self { + report, job_ctx, .. + } = self; + info!( + "Job paused due to system shutdown, we will pause all children jobs", + report.id, report.name + ); + + report.status = Status::Paused; + + report.update(job_ctx.db()).await?; + + self.command_children(Command::Pause).await + } + + pub async fn cancel_job(&mut self) -> Result<(), JobSystemError> { + let Self { + report, job_ctx, .. + } = self; + info!( + "Job canceled, we will cancel all children jobs", + report.id, report.name + ); + + report.status = Status::Canceled; + report.completed_at = Some(Utc::now()); + + report.update(job_ctx.db()).await?; + + self.command_children(Command::Cancel).await + } +} + +#[async_trait::async_trait] +pub trait DynJob: Send + Sync + 'static { + fn id(&self) -> JobId; + + fn job_name(&self) -> JobName; + + fn hash(&self) -> u64; + + fn report_mut(&mut self) -> &mut Report; + + fn set_next_jobs(&mut self, next_jobs: VecDeque>>); + + fn next_jobs(&self) -> &VecDeque>>; + + async fn serialize(self: Box) -> Result>, rmp_serde::encode::Error>; + + fn dispatch( + self: Box, + base_dispatcher: BaseTaskDispatcher, + job_ctx: Ctx, + done_tx: chan::Sender<(JobId, Result)>, + ) -> JobHandle; + + fn resume( + self: Box, + base_dispatcher: BaseTaskDispatcher, + job_ctx: Ctx, + serialized_tasks: Option, + done_tx: chan::Sender<(JobId, Result)>, + ) -> JobHandle; +} + +#[async_trait::async_trait] +impl DynJob for JobHolder +where + J: Job + SerializableJob, + Ctx: JobContext, +{ + fn id(&self) -> JobId { + self.id + } + + fn job_name(&self) -> JobName { + J::NAME + } + + fn hash(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + J::NAME.hash(&mut hasher); + self.job.hash(&mut hasher); + hasher.finish() + } + + fn report_mut(&mut self) -> &mut Report { + &mut self.report + } + + fn set_next_jobs(&mut self, next_jobs: VecDeque>>) { + self.next_jobs = next_jobs; + } + + fn next_jobs(&self) -> &VecDeque>> { + &self.next_jobs + } + + async fn serialize(self: Box) -> Result>, rmp_serde::encode::Error> { + self.job.serialize().await + } + + fn dispatch( + self: Box, + base_dispatcher: BaseTaskDispatcher, + job_ctx: Ctx, + done_tx: chan::Sender<(JobId, Result)>, + ) -> JobHandle { + let (commands_tx, commands_rx) = chan::bounded(8); + + spawn(to_spawn_job( + self.id, + self.job, + job_ctx.clone(), + None, + base_dispatcher, + commands_rx, + done_tx, + )); + + JobHandle { + next_jobs: self.next_jobs, + job_ctx, + report: self.report, + commands_tx, + } + } + + fn resume( + self: Box, + base_dispatcher: BaseTaskDispatcher, + job_ctx: Ctx, + serialized_tasks: Option, + done_tx: chan::Sender<(JobId, Result)>, + ) -> JobHandle { + let (commands_tx, commands_rx) = chan::bounded(8); + + spawn(to_spawn_job( + self.id, + self.job, + job_ctx.clone(), + serialized_tasks, + base_dispatcher, + commands_rx, + done_tx, + )); + + JobHandle { + next_jobs: self.next_jobs, + job_ctx, + report: self.report, + commands_tx, + } + } +} + +async fn to_spawn_job( + id: JobId, + mut job: impl Job, + job_ctx: Ctx, + existing_tasks: Option, + base_dispatcher: BaseTaskDispatcher, + commands_rx: chan::Receiver, + done_tx: chan::Sender<(JobId, Result)>, +) { + enum StreamMessage { + Commands(Command), + NewRemoteController(TaskRemoteController), + Done(Result), + } + + let mut remote_controllers = vec![]; + + let (dispatcher, remote_controllers_rx) = JobTaskDispatcher::new(base_dispatcher); + + if let Some(existing_tasks) = existing_tasks { + if let Err(e) = job + .resume_tasks(&dispatcher, &job_ctx, existing_tasks) + .await + { + done_tx + .send((id, Err(e))) + .await + .expect("jobs done tx closed on error at resume_tasks"); + + return; + } + } + + let mut msgs_stream = pin!(( + commands_rx.map(StreamMessage::Commands), + remote_controllers_rx.map(StreamMessage::NewRemoteController), + stream::once(job.run(dispatcher, job_ctx)).map(StreamMessage::Done), + ) + .merge()); + + while let Some(msg) = msgs_stream.next().await { + match msg { + StreamMessage::NewRemoteController(remote_controller) => { + remote_controllers.push(remote_controller); + } + StreamMessage::Commands(command) => { + remote_controllers.retain(|controller| !controller.is_done()); + + match command { + Command::Pause => { + remote_controllers + .iter() + .map(TaskRemoteController::pause) + .collect::>() + .join() + .await + .into_iter() + .for_each(|res| { + if let Err(e) = res { + assert!(matches!(e, TaskSystemError::TaskNotFound(_))); + + warn!("Tried to pause a task that was already completed"); + } + }); + } + Command::Resume => { + remote_controllers + .iter() + .map(TaskRemoteController::resume) + .collect::>() + .join() + .await + .into_iter() + .for_each(|res| { + if let Err(e) = res { + assert!(matches!(e, TaskSystemError::TaskNotFound(_))); + + warn!("Tried to pause a task that was already completed"); + } + }); + } + Command::Cancel => { + remote_controllers + .iter() + .map(TaskRemoteController::cancel) + .collect::>() + .join() + .await; + + return done_tx + .send((id, Ok(ReturnStatus::Canceled))) + .await + .expect("jobs done tx closed"); + } + } + } + + StreamMessage::Done(res) => { + #[cfg(debug_assertions)] + { + // Just a sanity check to make sure we don't have any pending tasks left + remote_controllers.retain(|controller| !controller.is_done()); + assert!(remote_controllers.is_empty()); + // Using #[cfg(debug_assertions)] to don't pay this retain cost in release builds + } + + return done_tx.send((id, res)).await.expect("jobs done tx closed"); + } + } + } +} + +#[derive(Debug, Clone)] +pub struct JobTaskDispatcher { + dispatcher: BaseTaskDispatcher, + remote_controllers_tx: chan::Sender, +} + +impl TaskDispatcher for JobTaskDispatcher { + async fn dispatch_boxed(&self, boxed_task: Box>) -> TaskHandle { + let handle = self.dispatcher.dispatch_boxed(boxed_task).await; + + self.remote_controllers_tx + .send(handle.remote_controller()) + .await + .expect("remote controllers tx closed"); + + handle + } + + async fn dispatch_many_boxed( + &self, + boxed_tasks: impl IntoIterator>> + Send, + ) -> Vec> { + let handles = self.dispatcher.dispatch_many_boxed(boxed_tasks).await; + + for handle in &handles { + self.remote_controllers_tx + .send(handle.remote_controller()) + .await + .expect("remote controllers tx closed"); + } + + handles + .iter() + .map(|handle| self.remote_controllers_tx.send(handle.remote_controller())) + .collect::>() + .try_join() + .await + .expect("remote controllers tx closed"); + + handles + } +} + +impl JobTaskDispatcher { + fn new(dispatcher: BaseTaskDispatcher) -> (Self, chan::Receiver) { + let (remote_controllers_tx, remote_controllers_rx) = chan::unbounded(); + + ( + Self { + dispatcher, + remote_controllers_tx, + }, + remote_controllers_rx, + ) + } +} diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs new file mode 100644 index 000000000..9f8c6c15b --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -0,0 +1,313 @@ +use crate::Error; + +use sd_prisma::prisma::location; +use sd_task_system::BaseTaskDispatcher; +use sd_utils::error::FileIOError; + +use std::{cell::RefCell, collections::hash_map::HashMap, path::Path, sync::Arc}; + +use async_channel as chan; +use futures::Stream; +use futures_concurrency::future::{Join, TryJoin}; +use tokio::{fs, spawn, sync::oneshot, task::JoinHandle}; +use tracing::{error, info, trace, warn}; +use uuid::Uuid; + +mod error; +pub mod job; +pub mod report; +mod runner; +mod store; +pub mod utils; + +use error::JobSystemError; +use job::{IntoJob, Job, JobContext, JobName, JobOutput}; +use runner::{run, JobSystemRunner, RunnerMessage}; +use store::{load_jobs, StoredJobEntry}; + +pub use store::{SerializableJob, SerializedTasks}; + +const PENDING_JOBS_FILE: &str = "pending_jobs.bin"; + +pub type JobId = Uuid; + +#[derive(Debug, Clone, Copy)] +pub enum Command { + Pause, + Resume, + Cancel, +} + +pub struct JobSystem { + msgs_tx: chan::Sender>, + job_outputs_rx: chan::Receiver<(JobId, Result)>, + runner_handle: RefCell>>, +} + +impl JobSystem { + pub async fn new( + base_dispatcher: BaseTaskDispatcher, + data_directory: impl AsRef + Send, + previously_existing_contexts: &HashMap, + ) -> Result { + let (job_outputs_tx, job_outputs_rx) = chan::unbounded(); + let (job_return_status_tx, job_return_status_rx) = chan::bounded(16); + let (msgs_tx, msgs_rx) = chan::bounded(8); + + let store_jobs_file = Arc::new(data_directory.as_ref().join(PENDING_JOBS_FILE)); + + let runner_handle = RefCell::new(Some(spawn({ + let store_jobs_file = Arc::clone(&store_jobs_file); + async move { + trace!("Job System Runner starting..."); + while let Err(e) = spawn({ + let store_jobs_file = Arc::clone(&store_jobs_file); + let base_dispatcher = base_dispatcher.clone(); + let job_return_status_tx = job_return_status_tx.clone(); + let job_return_status_rx = job_return_status_rx.clone(); + let job_outputs_tx = job_outputs_tx.clone(); + let msgs_rx = msgs_rx.clone(); + + async move { + run( + JobSystemRunner::new( + base_dispatcher, + job_return_status_tx, + job_outputs_tx, + ), + store_jobs_file.as_ref(), + msgs_rx, + job_return_status_rx, + ) + .await; + } + }) + .await + { + if e.is_panic() { + error!("Job system panicked: {e:#?}"); + } else { + trace!("JobSystemRunner received shutdown signal and will exit..."); + break; + } + trace!("Restarting JobSystemRunner processing task..."); + } + + info!("JobSystemRunner gracefully shutdown"); + } + }))); + + load_stored_job_entries( + store_jobs_file.as_ref(), + previously_existing_contexts, + &msgs_tx, + ) + .await?; + + Ok(Self { + msgs_tx, + job_outputs_rx, + runner_handle, + }) + } + + /// Checks if *any* of the desired jobs is running for the desired location + /// # Panics + /// Panics only happen if internal channels are unexpectedly closed + pub async fn check_running_jobs( + &self, + job_names: Vec, + location_id: location::id::Type, + ) -> bool { + let (ack_tx, ack_rx) = oneshot::channel(); + + self.msgs_tx + .send(RunnerMessage::CheckIfJobAreRunning { + job_names, + location_id, + ack_tx, + }) + .await + .expect("runner msgs channel unexpectedly closed on check running job request"); + + ack_rx + .await + .expect("ack channel closed before receiving check running job response") + } + + /// Shutdown the job system + /// # Panics + /// Panics only happen if internal channels are unexpectedly closed + pub async fn shutdown(&self) { + if let Some(handle) = self + .runner_handle + .try_borrow_mut() + .ok() + .and_then(|mut maybe_handle| maybe_handle.take()) + { + self.msgs_tx + .send(RunnerMessage::Shutdown) + .await + .expect("runner msgs channel unexpectedly closed on shutdown request"); + + if let Err(e) = handle.await { + if e.is_panic() { + error!("JobSystem panicked: {e:#?}"); + } + } + info!("JobSystem gracefully shutdown"); + } else { + warn!("JobSystem already shutdown"); + } + } + + /// Dispatch a new job to the system + /// # Panics + /// Panics only happen if internal channels are unexpectedly closed + pub async fn dispatch( + &mut self, + job: impl IntoJob + Send, + location_id: location::id::Type, + job_ctx: Ctx, + ) -> Result { + let dyn_job = job.into_job(); + let id = dyn_job.id(); + + let (ack_tx, ack_rx) = oneshot::channel(); + self.msgs_tx + .send(RunnerMessage::NewJob { + id, + location_id, + dyn_job, + job_ctx, + ack_tx, + }) + .await + .expect("runner msgs channel unexpectedly closed on new job request"); + + ack_rx + .await + .expect("ack channel closed before receiving new job request") + .map(|()| id) + } + + pub fn receive_job_outputs( + &self, + ) -> impl Stream)> { + self.job_outputs_rx.clone() + } + + async fn send_command(&self, id: JobId, command: Command) -> Result<(), JobSystemError> { + let (ack_tx, ack_rx) = oneshot::channel(); + self.msgs_tx + .send(RunnerMessage::Command { + id, + command, + ack_tx, + }) + .await + .unwrap_or_else(|_| { + panic!("runner msgs channel unexpectedly closed on {command:?} request") + }); + + ack_rx + .await + .unwrap_or_else(|_| panic!("ack channel closed before receiving {command:?} response")) + } + + pub async fn pause(&self, id: JobId) -> Result<(), JobSystemError> { + self.send_command(id, Command::Pause).await + } + + pub async fn resume(&self, id: JobId) -> Result<(), JobSystemError> { + self.send_command(id, Command::Resume).await + } + + pub async fn cancel(&self, id: JobId) -> Result<(), JobSystemError> { + self.send_command(id, Command::Cancel).await + } +} + +/// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method +/// receiving `&self` which is called once, and we also use `try_borrow_mut` so we never panic +unsafe impl Sync for JobSystem {} + +async fn load_stored_job_entries( + store_jobs_file: impl AsRef + Send, + previously_existing_job_contexts: &HashMap, + msgs_tx: &chan::Sender>, +) -> Result<(), JobSystemError> { + let store_jobs_file = store_jobs_file.as_ref(); + + let stores_jobs_by_db = rmp_serde::from_slice::>>( + &fs::read(store_jobs_file).await.map_err(|e| { + JobSystemError::StoredJobs(FileIOError::from(( + store_jobs_file, + e, + "Failed to load jobs from disk", + ))) + })?, + )?; + + stores_jobs_by_db + .into_iter() + .filter_map(|(ctx_id, entries)| { + previously_existing_job_contexts.get(&ctx_id).map_or_else( + || { + warn!("Found stored jobs for a database that doesn't exist anymore: "); + None + }, + |ctx| Some((entries, ctx.clone())), + ) + }) + .map(|(entries, ctx)| async move { + load_jobs(entries, &ctx) + .await + .map(|stored_jobs| (stored_jobs, ctx)) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| { + res.map_err(|e| error!("Failed to load stored jobs: {e:#?}")) + .ok() + }) + .flat_map(|(stored_jobs, job_ctx)| { + stored_jobs + .into_iter() + .map(move |(location_id, dyn_job, serialized_tasks)| { + let job_ctx = job_ctx.clone(); + async move { + let (ack_tx, ack_rx) = oneshot::channel(); + + msgs_tx + .send(RunnerMessage::ResumeStoredJob { + id: dyn_job.id(), + location_id, + dyn_job, + job_ctx, + serialized_tasks, + ack_tx, + }) + .await + .expect("runner msgs channel unexpectedly closed on stored job resume"); + + ack_rx.await.expect( + "ack channel closed before receiving stored job resume response", + ) + } + }) + }) + .collect::>() + .try_join() + .await?; + + fs::remove_file(store_jobs_file).await.map_err(|e| { + JobSystemError::StoredJobs(FileIOError::from(( + store_jobs_file, + e, + "Failed to clean stored jobs file", + ))) + }) +} diff --git a/core/crates/heavy-lifting/src/job_system/report.rs b/core/crates/heavy-lifting/src/job_system/report.rs new file mode 100644 index 000000000..dbb9af221 --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/report.rs @@ -0,0 +1,359 @@ +use sd_prisma::prisma::{job, PrismaClient}; +use sd_utils::db::{maybe_missing, MissingFieldError}; + +use std::{collections::HashMap, fmt, str::FromStr}; + +use chrono::{DateTime, Utc}; +use prisma_client_rust::QueryError; +use serde::{Deserialize, Serialize}; +use specta::Type; +use strum::ParseError; +use tracing::error; + +use super::{job::JobName, JobId}; + +#[derive(thiserror::Error, Debug)] +pub enum ReportError { + #[error("failed to create job report in database: {0}")] + Create(QueryError), + #[error("failed to update job report in database: {0}")] + Update(QueryError), + #[error("invalid job status integer: {0}")] + InvalidJobStatusInt(i32), + #[error("job not found in database: ")] + MissingReport(JobId), + #[error("serialization error: {0}")] + Serialization(#[from] rmp_serde::encode::Error), + #[error("deserialization error: {0}")] + Deserialization(#[from] rmp_serde::decode::Error), + #[error(transparent)] + MissingField(#[from] MissingFieldError), + #[error("failed to parse job name from database: {0}")] + JobNameParse(#[from] ParseError), +} + +impl From for rspc::Error { + fn from(e: ReportError) -> Self { + match e { + ReportError::Create(_) + | ReportError::Update(_) + | ReportError::InvalidJobStatusInt(_) => { + Self::with_cause(rspc::ErrorCode::BadRequest, e.to_string(), e) + } + + ReportError::MissingReport(_) => { + Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e) + } + ReportError::Serialization(_) + | ReportError::Deserialization(_) + | ReportError::MissingField(_) + | ReportError::JobNameParse(_) => { + Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e) + } + } + } +} + +#[derive(Debug, Serialize, Deserialize, Type, Clone)] +pub enum ReportMetadata { + Input(ReportInputMetadata), + Output(ReportOutputMetadata), +} + +#[derive(Debug, Serialize, Deserialize, Type, Clone)] +pub enum ReportInputMetadata { + Placeholder, + // TODO: Add more types +} + +#[derive(Debug, Serialize, Deserialize, Type, Clone)] +pub enum ReportOutputMetadata { + Metrics(HashMap), + // TODO: Add more types +} + +#[derive(Debug, Serialize, Type, Clone)] +pub struct Report { + pub id: JobId, + pub name: JobName, + pub action: Option, + + pub metadata: Vec, + pub critical_error: Option, + pub non_critical_errors: Vec, + + pub created_at: Option>, + pub started_at: Option>, + pub completed_at: Option>, + + pub parent_id: Option, + + pub status: Status, + pub task_count: i32, + pub completed_task_count: i32, + + pub phase: String, + pub message: String, + pub estimated_completion: DateTime, +} + +impl fmt::Display for Report { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Job {:#?}", + self.name, self.id, self.status + ) + } +} + +// convert database struct into a resource struct +impl TryFrom for Report { + type Error = ReportError; + + fn try_from(data: job::Data) -> Result { + Ok(Self { + id: JobId::from_slice(&data.id).expect("corrupted database"), + name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?, + action: data.action, + + metadata: data + .metadata + .map(|m| { + rmp_serde::from_slice(&m).unwrap_or_else(|e| { + error!("Failed to deserialize job metadata: {e:#?}"); + vec![] + }) + }) + .unwrap_or_default(), + critical_error: data.critical_error, + non_critical_errors: data.non_critical_errors.map_or_else( + Default::default, + |non_critical_errors| { + serde_json::from_slice(&non_critical_errors).unwrap_or_else(|e| { + error!("Failed to deserialize job non-critical errors: {e:#?}"); + vec![] + }) + }, + ), + created_at: data.date_created.map(DateTime::into), + started_at: data.date_started.map(DateTime::into), + completed_at: data.date_completed.map(DateTime::into), + parent_id: data + .parent_id + .map(|id| JobId::from_slice(&id).expect("corrupted database")), + status: Status::try_from(maybe_missing(data.status, "job.status")?) + .expect("corrupted database"), + task_count: data.task_count.unwrap_or(0), + completed_task_count: data.completed_task_count.unwrap_or(0), + phase: String::new(), + message: String::new(), + estimated_completion: data + .date_estimated_completion + .map_or_else(Utc::now, DateTime::into), + }) + } +} + +impl Report { + #[must_use] + pub fn new(uuid: JobId, name: JobName) -> Self { + Self { + id: uuid, + name, + action: None, + created_at: None, + started_at: None, + completed_at: None, + status: Status::Queued, + critical_error: None, + non_critical_errors: vec![], + task_count: 0, + metadata: vec![], + parent_id: None, + completed_task_count: 0, + phase: String::new(), + message: String::new(), + estimated_completion: Utc::now(), + } + } + + #[must_use] + pub fn get_action_name_and_group_key(&self) -> (String, Option) { + // actions are formatted like "added_location" or "added_location-1" + let Some(action_name) = self + .action + .as_ref() + .and_then(|action| action.split('-').next().map(str::to_string)) + else { + return (self.id.to_string(), None); + }; + // create a unique group_key, EG: "added_location-" + let group_key = self.parent_id.map_or_else( + || format!("{action_name}-{}", self.id), + |parent_id| format!("{action_name}-{parent_id}"), + ); + + (action_name, Some(group_key)) + } + + pub async fn create(&mut self, db: &PrismaClient) -> Result<(), ReportError> { + let now = Utc::now(); + + db.job() + .create( + self.id.as_bytes().to_vec(), + sd_utils::chain_optional_iter( + [ + job::name::set(Some(self.name.to_string())), + job::action::set(self.action.clone()), + job::date_created::set(Some(now.into())), + job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)), + job::status::set(Some(self.status as i32)), + job::date_started::set(self.started_at.map(Into::into)), + job::task_count::set(Some(1)), + job::completed_task_count::set(Some(0)), + ], + [self + .parent_id + .map(|id| job::parent::connect(job::id::equals(id.as_bytes().to_vec())))], + ), + ) + .exec() + .await + .map_err(ReportError::Create)?; + + // Only setting created_at after we successfully created the job in DB + self.created_at = Some(now); + + Ok(()) + } + + pub async fn update(&mut self, db: &PrismaClient) -> Result<(), ReportError> { + db.job() + .update( + job::id::equals(self.id.as_bytes().to_vec()), + vec![ + job::status::set(Some(self.status as i32)), + job::critical_error::set(self.critical_error.clone()), + job::non_critical_errors::set(Some(rmp_serde::to_vec( + &self.non_critical_errors, + )?)), + job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)), + job::task_count::set(Some(self.task_count)), + job::completed_task_count::set(Some(self.completed_task_count)), + job::date_started::set(self.started_at.map(Into::into)), + job::date_completed::set(self.completed_at.map(Into::into)), + ], + ) + .exec() + .await + .map_err(ReportError::Update)?; + + Ok(()) + } +} + +#[repr(i32)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, Type, Eq, PartialEq)] +pub enum Status { + Queued = 0, + Running = 1, + Completed = 2, + Canceled = 3, + Failed = 4, + Paused = 5, + CompletedWithErrors = 6, +} + +impl Status { + #[must_use] + pub const fn is_finished(self) -> bool { + matches!( + self, + Self::Completed + | Self::Canceled | Self::Paused + | Self::Failed | Self::CompletedWithErrors + ) + } +} + +impl TryFrom for Status { + type Error = ReportError; + + fn try_from(value: i32) -> Result { + let s = match value { + 0 => Self::Queued, + 1 => Self::Running, + 2 => Self::Completed, + 3 => Self::Canceled, + 4 => Self::Failed, + 5 => Self::Paused, + 6 => Self::CompletedWithErrors, + _ => return Err(Self::Error::InvalidJobStatusInt(value)), + }; + + Ok(s) + } +} + +pub struct ReportBuilder { + pub id: JobId, + pub name: JobName, + pub action: Option, + pub metadata: Vec, + pub parent_id: Option, +} + +impl ReportBuilder { + #[must_use] + pub fn build(self) -> Report { + Report { + id: self.id, + name: self.name, + action: self.action, + created_at: None, + started_at: None, + completed_at: None, + status: Status::Queued, + critical_error: None, + task_count: 0, + non_critical_errors: vec![], + metadata: self.metadata, + parent_id: self.parent_id, + completed_task_count: 0, + phase: String::new(), + message: String::new(), + estimated_completion: Utc::now(), + } + } + + #[must_use] + pub fn new(id: JobId, name: JobName) -> Self { + Self { + id, + name, + action: None, + metadata: vec![], + parent_id: None, + } + } + + #[must_use] + pub fn with_action(mut self, action: impl Into) -> Self { + self.action = Some(action.into()); + self + } + + #[must_use] + pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self { + self.metadata.push(ReportMetadata::Input(metadata)); + self + } + + #[must_use] + pub const fn with_parent_id(mut self, parent_id: JobId) -> Self { + self.parent_id = Some(parent_id); + self + } +} diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs new file mode 100644 index 000000000..0f257f259 --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -0,0 +1,535 @@ +use crate::Error; + +use sd_prisma::prisma::location; +use sd_task_system::BaseTaskDispatcher; +use sd_utils::error::FileIOError; + +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + mem, + path::Path, + pin::pin, + time::Duration, +}; + +use async_channel as chan; +use chrono::Utc; +use futures::StreamExt; +use futures_concurrency::{future::TryJoin, stream::Merge}; +use tokio::{ + fs, + sync::oneshot, + time::{interval_at, Instant}, +}; +use tokio_stream::wrappers::IntervalStream; +use tracing::{debug, error, info, warn}; +use uuid::Uuid; + +use super::{ + job::{DynJob, JobContext, JobHandle, JobName, JobOutput, ReturnStatus}, + report, + store::{StoredJob, StoredJobEntry}, + Command, JobId, JobSystemError, SerializedTasks, +}; + +const JOBS_INITIAL_CAPACITY: usize = 32; +const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60); + +pub(super) enum RunnerMessage { + NewJob { + id: JobId, + location_id: location::id::Type, + dyn_job: Box>, + job_ctx: Ctx, + ack_tx: oneshot::Sender>, + }, + ResumeStoredJob { + id: JobId, + location_id: location::id::Type, + dyn_job: Box>, + job_ctx: Ctx, + serialized_tasks: Option, + ack_tx: oneshot::Sender>, + }, + Command { + id: JobId, + command: Command, + ack_tx: oneshot::Sender>, + }, + CheckIfJobAreRunning { + job_names: Vec, + location_id: location::id::Type, + ack_tx: oneshot::Sender, + }, + Shutdown, +} + +pub(super) struct JobSystemRunner { + base_dispatcher: BaseTaskDispatcher, + handles: HashMap>, + job_hashes: HashMap, + job_hashes_by_id: HashMap, + running_jobs_by_job_id: HashMap, + running_jobs_set: HashSet<(JobName, location::id::Type)>, + jobs_to_store_by_ctx_id: HashMap>, + job_return_status_tx: chan::Sender<(JobId, Result)>, + job_outputs_tx: chan::Sender<(JobId, Result)>, +} + +impl JobSystemRunner { + pub(super) fn new( + base_dispatcher: BaseTaskDispatcher, + job_return_status_tx: chan::Sender<(JobId, Result)>, + job_outputs_tx: chan::Sender<(JobId, Result)>, + ) -> Self { + Self { + base_dispatcher, + handles: HashMap::with_capacity(JOBS_INITIAL_CAPACITY), + job_hashes: HashMap::with_capacity(JOBS_INITIAL_CAPACITY), + job_hashes_by_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY), + running_jobs_by_job_id: HashMap::with_capacity(JOBS_INITIAL_CAPACITY), + running_jobs_set: HashSet::with_capacity(JOBS_INITIAL_CAPACITY), + jobs_to_store_by_ctx_id: HashMap::new(), + job_return_status_tx, + job_outputs_tx, + } + } + + async fn new_job( + &mut self, + id: JobId, + location_id: location::id::Type, + dyn_job: Box>, + job_ctx: Ctx, + maybe_existing_tasks: Option, + ) -> Result<(), JobSystemError> { + let Self { + base_dispatcher, + handles, + job_hashes, + job_hashes_by_id, + job_return_status_tx, + running_jobs_by_job_id, + running_jobs_set, + .. + } = self; + + let db = job_ctx.db(); + let job_name = dyn_job.job_name(); + + let job_hash = dyn_job.hash(); + if let Some(&already_running_id) = job_hashes.get(&job_hash) { + return Err(JobSystemError::AlreadyRunning { + new_id: id, + already_running_id, + job_name, + }); + } + + running_jobs_by_job_id.insert(id, (job_name, location_id)); + running_jobs_set.insert((job_name, location_id)); + + job_hashes.insert(job_hash, id); + job_hashes_by_id.insert(id, job_hash); + + let start_time = Utc::now(); + + let mut handle = if maybe_existing_tasks.is_some() { + dyn_job.resume( + base_dispatcher.clone(), + job_ctx.clone(), + maybe_existing_tasks, + job_return_status_tx.clone(), + ) + } else { + dyn_job.dispatch( + base_dispatcher.clone(), + job_ctx.clone(), + job_return_status_tx.clone(), + ) + }; + + handle.report.status = report::Status::Running; + if handle.report.started_at.is_none() { + handle.report.started_at = Some(start_time); + } + + // If the report doesn't have a created_at date, it's a new report + if handle.report.created_at.is_none() { + handle.report.create(db).await?; + } else { + // Otherwise it can be a job being resumed or a children job that was already been created + handle.report.update(db).await?; + } + + // Registering children jobs + handle + .next_jobs + .iter_mut() + .map(|dyn_job| dyn_job.report_mut()) + .map(|next_job_report| async { + if next_job_report.created_at.is_none() { + next_job_report.create(job_ctx.db()).await + } else { + Ok(()) + } + }) + .collect::>() + .try_join() + .await?; + + handles.insert(id, handle); + + Ok(()) + } + + async fn process_command(&mut self, id: JobId, command: Command) -> Result<(), JobSystemError> { + if let Some(handle) = self.handles.get_mut(&id) { + handle.send_command(command).await?; + Ok(()) + } else { + Err(JobSystemError::NotFound(id)) + } + } + + fn is_empty(&self) -> bool { + self.handles.is_empty() && self.job_hashes.is_empty() && self.job_hashes_by_id.is_empty() + } + + fn check_if_job_are_running( + &self, + job_names: Vec, + location_id: location::id::Type, + ) -> bool { + job_names + .into_iter() + .any(|job_name| self.running_jobs_set.contains(&(job_name, location_id))) + } + + async fn process_return_status(&mut self, job_id: JobId, status: Result) { + let Self { + handles, + job_hashes, + job_hashes_by_id, + job_outputs_tx, + job_return_status_tx, + base_dispatcher, + jobs_to_store_by_ctx_id, + running_jobs_by_job_id, + running_jobs_set, + .. + } = self; + + let job_hash = job_hashes_by_id.remove(&job_id).expect("it must be here"); + let (job_name, location_id) = running_jobs_by_job_id + .remove(&job_id) + .expect("a JobName and location_id must've been inserted in the map with the job id"); + assert!(running_jobs_set.remove(&(job_name, location_id))); + + assert!(job_hashes.remove(&job_hash).is_some()); + let mut handle = handles.remove(&job_id).expect("it must be here"); + + let res = match status { + Ok(ReturnStatus::Completed(job_return)) => { + try_dispatch_next_job( + &mut handle, + base_dispatcher.clone(), + (job_hashes, job_hashes_by_id), + handles, + job_return_status_tx.clone(), + ); + + handle.complete_job(job_return).await + } + + Ok(ReturnStatus::Shutdown(Ok(Some(serialized_job)))) => { + let name = handle.report.name; + + let Ok(next_jobs) = handle + .next_jobs + .into_iter() + .map(|next_job| async move { + let next_id = next_job.id(); + let next_name = next_job.job_name(); + next_job + .serialize() + .await + .map(|maybe_serialized_job| { + maybe_serialized_job.map(|serialized_job| StoredJob { + id: next_id, + name: next_name, + serialized_job, + }) + }) + .map_err(|e| { + error!( + "Failed to serialize next job: \ + : {e:#?}" + ); + }) + }) + .collect::>() + .try_join() + .await + else { + return; + }; + + jobs_to_store_by_ctx_id + .entry(handle.job_ctx.id()) + .or_default() + .push(StoredJobEntry { + location_id, + root_job: StoredJob { + id: job_id, + name, + serialized_job, + }, + next_jobs: next_jobs.into_iter().flatten().collect(), + }); + + return; + } + + Ok(ReturnStatus::Shutdown(Ok(None))) => { + debug!( + "Job was shutdown but didn't returned any serialized data, \ + probably it isn't resumable job: " + ); + return; + } + + Ok(ReturnStatus::Shutdown(Err(e))) => { + error!("Failed to serialize job: {e:#?}"); + return; + } + + Ok(ReturnStatus::Canceled) => handle + .cancel_job() + .await + .and_then(|()| Err(JobSystemError::Canceled(job_id))), + + Err(e) => handle.failed_job(&e).await.and_then(|()| Err(e.into())), + }; + + job_outputs_tx + .send((job_id, res)) + .await + .expect("job outputs channel unexpectedly closed on job completion"); + } + + fn clean_memory(&mut self) { + if self.handles.capacity() > JOBS_INITIAL_CAPACITY + && self.handles.len() < JOBS_INITIAL_CAPACITY + { + self.handles.shrink_to(JOBS_INITIAL_CAPACITY); + } + + if self.job_hashes.capacity() > JOBS_INITIAL_CAPACITY + && self.job_hashes.len() < JOBS_INITIAL_CAPACITY + { + self.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY); + } + + if self.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY + && self.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY + { + self.job_hashes_by_id.shrink_to(JOBS_INITIAL_CAPACITY); + } + + if self.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY + && self.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY + { + self.running_jobs_by_job_id.shrink_to(JOBS_INITIAL_CAPACITY); + } + + if self.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY + && self.running_jobs_set.len() < JOBS_INITIAL_CAPACITY + { + self.running_jobs_set.shrink_to(JOBS_INITIAL_CAPACITY); + } + } + + async fn save_jobs( + self, + store_jobs_file: impl AsRef + Send, + ) -> Result<(), JobSystemError> { + let store_jobs_file = store_jobs_file.as_ref(); + + let Self { + handles, + job_hashes, + job_hashes_by_id, + jobs_to_store_by_ctx_id, + .. + } = self; + + assert!( + handles.is_empty() && job_hashes.is_empty() && job_hashes_by_id.is_empty(), + "All jobs must be completed before saving" + ); + + if jobs_to_store_by_ctx_id.is_empty() { + info!("No jobs to store in disk for job system shutdown!"); + return Ok(()); + } + + fs::write( + store_jobs_file, + rmp_serde::to_vec_named(&jobs_to_store_by_ctx_id)?, + ) + .await + .map_err(|e| JobSystemError::StoredJobs(FileIOError::from((store_jobs_file, e)))) + } +} + +fn try_dispatch_next_job( + handle: &mut JobHandle, + base_dispatcher: BaseTaskDispatcher, + (job_hashes, job_hashes_by_id): (&mut HashMap, &mut HashMap), + handles: &mut HashMap>, + job_return_status_tx: chan::Sender<(JobId, Result)>, +) { + if let Some(next) = handle.next_jobs.pop_front() { + let next_id = next.id(); + let next_hash = next.hash(); + if let Entry::Vacant(e) = job_hashes.entry(next_hash) { + e.insert(next_id); + job_hashes_by_id.insert(next_id, next_hash); + let mut next_handle = next.dispatch( + base_dispatcher, + handle.job_ctx.clone(), + job_return_status_tx, + ); + + assert!( + next_handle.next_jobs.is_empty(), + "Only the root job will have next jobs, the rest will be empty and \ + we will swap with remaining ones from the previous job" + ); + + next_handle.next_jobs = mem::take(&mut handle.next_jobs); + + handles.insert(next_id, next_handle); + } else { + warn!("Unexpectedly found a job with the same hash as the next job: ", next.job_name()); + } + } +} + +pub(super) async fn run( + mut runner: JobSystemRunner, + store_jobs_file: impl AsRef + Send, + msgs_rx: chan::Receiver>, + job_return_status_rx: chan::Receiver<(JobId, Result)>, +) { + enum StreamMessage { + ReturnStatus((JobId, Result)), + RunnerMessage(RunnerMessage), + CleanMemoryTick, + } + + let memory_cleanup_interval = interval_at(Instant::now() + FIVE_MINUTES, FIVE_MINUTES); + + let job_return_status_rx_to_shutdown = job_return_status_rx.clone(); + + let mut msg_stream = pin!(( + msgs_rx.map(StreamMessage::RunnerMessage), + job_return_status_rx.map(StreamMessage::ReturnStatus), + IntervalStream::new(memory_cleanup_interval).map(|_| StreamMessage::CleanMemoryTick), + ) + .merge()); + + while let Some(msg) = msg_stream.next().await { + match msg { + // Job return status messages + StreamMessage::ReturnStatus((job_id, status)) => { + runner.process_return_status(job_id, status).await; + } + + // Runner messages + StreamMessage::RunnerMessage(RunnerMessage::NewJob { + id, + location_id, + dyn_job, + job_ctx, + ack_tx, + }) => { + ack_tx + .send( + runner + .new_job(id, location_id, dyn_job, job_ctx, None) + .await, + ) + .expect("ack channel closed before sending new job response"); + } + + StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob { + id, + location_id, + dyn_job, + job_ctx, + serialized_tasks, + ack_tx, + }) => { + ack_tx + .send( + runner + .new_job(id, location_id, dyn_job, job_ctx, serialized_tasks) + .await, + ) + .expect("ack channel closed before sending resume job response"); + } + + StreamMessage::RunnerMessage(RunnerMessage::Command { + id, + command, + ack_tx, + }) => { + ack_tx + .send(runner.process_command(id, command).await) + .unwrap_or_else(|_| { + panic!("ack channel closed before sending {command:?} response") + }); + } + + StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => { + // Consuming all pending return status messages + loop { + while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() { + runner.process_return_status(job_id, status).await; + } + + if runner.is_empty() { + break; + } + + debug!("Waiting for all jobs to complete before shutting down..."); + } + + // Now the runner can shutdown + if let Err(e) = runner.save_jobs(store_jobs_file).await { + error!("Failed to save jobs before shutting down: {e:#?}"); + } + + return; + } + + StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobAreRunning { + job_names, + location_id, + ack_tx, + }) => { + ack_tx + .send(runner.check_if_job_are_running(job_names, location_id)) + .expect("ack channel closed before sending resume job response"); + } + + // Memory cleanup tick + StreamMessage::CleanMemoryTick => { + runner.clean_memory(); + } + } + } +} diff --git a/core/crates/heavy-lifting/src/job_system/store.rs b/core/crates/heavy-lifting/src/job_system/store.rs new file mode 100644 index 000000000..93728030c --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/store.rs @@ -0,0 +1,219 @@ +use crate::indexer::IndexerJob; + +use sd_prisma::prisma::{job, location}; +use sd_utils::uuid_to_bytes; + +use std::{ + collections::{HashMap, VecDeque}, + future::Future, + iter, + marker::PhantomData, +}; + +use futures_concurrency::future::TryJoin; +use serde::{Deserialize, Serialize}; + +use super::{ + job::{DynJob, Job, JobContext, JobHolder, JobName}, + report::{Report, ReportError}, + JobId, JobSystemError, +}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct SerializedTasks(pub Vec); + +pub trait SerializableJob: 'static +where + Self: Sized, +{ + fn serialize( + self, + ) -> impl Future>, rmp_serde::encode::Error>> + Send { + async move { Ok(None) } + } + + #[allow(unused_variables)] + fn deserialize( + serialized_job: &[u8], + ctx: &impl JobContext, + ) -> impl Future< + Output = Result)>, rmp_serde::decode::Error>, + > + Send { + async move { Ok(None) } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct StoredJob { + pub(super) id: JobId, + pub(super) name: JobName, + pub(super) serialized_job: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct StoredJobEntry { + pub(super) location_id: location::id::Type, + pub(super) root_job: StoredJob, + pub(super) next_jobs: Vec, +} + +pub async fn load_jobs( + entries: Vec, + job_ctx: &Ctx, +) -> Result< + Vec<( + location::id::Type, + Box>, + Option, + )>, + JobSystemError, +> { + let mut reports = job_ctx + .db() + .job() + .find_many(vec![job::id::in_vec( + entries + .iter() + .flat_map( + |StoredJobEntry { + root_job: StoredJob { id, .. }, + next_jobs, + .. + }| { iter::once(*id).chain(next_jobs.iter().map(|StoredJob { id, .. }| *id)) }, + ) + .map(uuid_to_bytes) + .collect::>(), + )]) + .exec() + .await + .map_err(JobSystemError::LoadReportsForResume)? + .into_iter() + .map(Report::try_from) + .map(|report_res| report_res.map(|report| (report.id, report))) + .collect::, _>>()?; + + entries + .into_iter() + .map( + |StoredJobEntry { + location_id, + root_job, + next_jobs, + }| { + let report = reports + .remove(&root_job.id) + .ok_or(ReportError::MissingReport(root_job.id))?; + + Ok(async move { + load_job(root_job, report, job_ctx) + .await + .map(|maybe_loaded_job| { + maybe_loaded_job + .map(|(dyn_job, tasks)| (location_id, dyn_job, tasks, next_jobs)) + }) + }) + }, + ) + .collect::, JobSystemError>>()? + .try_join() + .await? + .into_iter() + .flatten() + .map(|(location_id, mut dyn_job, tasks, next_jobs)| { + let next_jobs_and_reports = next_jobs + .into_iter() + .map(|next_job| { + let next_job_id = next_job.id; + reports + .remove(&next_job.id) + .map(|report| (next_job, report)) + .ok_or(ReportError::MissingReport(next_job_id)) + }) + .collect::, _>>()?; + + Ok(async move { + next_jobs_and_reports + .into_iter() + .map(|(next_job, report)| async move { + load_job(next_job, report, job_ctx) + .await + .map(|maybe_loaded_next_job| { + maybe_loaded_next_job.map(|(next_dyn_job, next_tasks)| { + assert!( + next_tasks.is_none(), + "Next jobs must not have tasks as they haven't run yet" + ); + assert!( + next_dyn_job.next_jobs().is_empty(), + "Next jobs must not have next jobs" + ); + next_dyn_job + }) + }) + }) + .collect::>() + .try_join() + .await + .map(|maybe_next_dyn_jobs| { + dyn_job.set_next_jobs(maybe_next_dyn_jobs.into_iter().flatten().collect()); + (location_id, dyn_job, tasks) + }) + }) + }) + .collect::, JobSystemError>>()? + .try_join() + .await +} + +macro_rules! match_deserialize_job { + ($stored_job:ident, $report:ident, $job_ctx:ident, $ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{ + let StoredJob { + id, + name, + serialized_job, + } = $stored_job; + + + match name { + $(<$job_type as Job>::NAME => <$job_type as SerializableJob>::deserialize( + &serialized_job, + $job_ctx, + ).await + .map(|maybe_job| maybe_job.map(|(job, tasks)| -> ( + Box>, + Option + ) { + ( + Box::new(JobHolder { + id, + job, + report: $report, + next_jobs: VecDeque::new(), + _ctx: PhantomData, + }), + tasks, + ) + } + )) + .map_err(Into::into),)+ + } + }}; +} + +async fn load_job( + stored_job: StoredJob, + report: Report, + job_ctx: &Ctx, +) -> Result>, Option)>, JobSystemError> { + match_deserialize_job!( + stored_job, + report, + job_ctx, + Ctx, + [ + IndexerJob, + // TODO: Add more jobs here + // e.g.: FileIdentifierJob, MediaProcessorJob, etc., + ] + ) +} diff --git a/core/crates/heavy-lifting/src/job_system/utils.rs b/core/crates/heavy-lifting/src/job_system/utils.rs new file mode 100644 index 000000000..afa8ce56f --- /dev/null +++ b/core/crates/heavy-lifting/src/job_system/utils.rs @@ -0,0 +1,16 @@ +use crate::Error; + +use sd_task_system::TaskHandle; + +use futures_concurrency::future::Join; + +pub async fn cancel_pending_tasks( + pending_tasks: impl IntoIterator> + Send, +) { + pending_tasks + .into_iter() + .map(TaskHandle::cancel) + .collect::>() + .join() + .await; +} diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs new file mode 100644 index 000000000..3675cdedb --- /dev/null +++ b/core/crates/heavy-lifting/src/lib.rs @@ -0,0 +1,71 @@ +#![warn( + clippy::all, + clippy::pedantic, + clippy::correctness, + clippy::perf, + clippy::style, + clippy::suspicious, + clippy::complexity, + clippy::nursery, + clippy::unwrap_used, + unused_qualifications, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unused_allocation, + clippy::unnecessary_cast, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::dbg_macro, + clippy::deprecated_cfg_attr, + clippy::separated_literal_suffix, + deprecated +)] +#![forbid(deprecated_in_future)] +#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] + +use sd_task_system::TaskSystemError; + +use serde::{Deserialize, Serialize}; +use specta::Type; +use thiserror::Error; + +pub mod indexer; +pub mod job_system; + +use indexer::{IndexerError, NonCriticalIndexerError}; + +pub use job_system::{ + job::{IntoJob, JobBuilder, JobContext, JobName, JobOutput, JobOutputData, ProgressUpdate}, + JobId, JobSystem, +}; + +#[derive(Error, Debug)] +pub enum Error { + #[error(transparent)] + Indexer(#[from] IndexerError), + + #[error(transparent)] + TaskSystem(#[from] TaskSystemError), +} + +impl From for rspc::Error { + fn from(e: Error) -> Self { + match e { + Error::Indexer(e) => e.into(), + Error::TaskSystem(e) => { + Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e) + } + } + } +} + +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] +pub enum NonCriticalJobError { + // TODO: Add variants as needed + #[error(transparent)] + Indexer(#[from] NonCriticalIndexerError), +} diff --git a/core/crates/indexer-rules/Cargo.toml b/core/crates/indexer-rules/Cargo.toml new file mode 100644 index 000000000..5ac05ea8f --- /dev/null +++ b/core/crates/indexer-rules/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "sd-core-indexer-rules" +version = "0.1.0" +authors = ["Ericson Soares "] +license = { workspace = true } +repository = { workspace = true } +edition = { workspace = true } + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +# Spacedrive Sub-crates +sd-prisma = { path = "../../../crates/prisma" } +sd-utils = { path = "../../../crates/utils" } + +chrono = { workspace = true } +futures-concurrency = { workspace = true } +globset = { workspace = true, features = ["serde1"] } +prisma-client-rust = { workspace = true } +rmp-serde = { workspace = true } +rspc = { workspace = true } +serde = { workspace = true, features = ["derive"] } +specta = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["fs"] } +tracing = { workspace = true } +uuid = { workspace = true, features = ["v4", "serde"] } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/core/src/location/indexer/rules/mod.rs b/core/crates/indexer-rules/src/lib.rs similarity index 67% rename from core/src/location/indexer/rules/mod.rs rename to core/crates/indexer-rules/src/lib.rs index c86e8f533..7c40b2eb1 100644 --- a/core/src/location/indexer/rules/mod.rs +++ b/core/crates/indexer-rules/src/lib.rs @@ -1,30 +1,60 @@ -use crate::library::Library; +#![warn( + clippy::all, + clippy::pedantic, + clippy::correctness, + clippy::perf, + clippy::style, + clippy::suspicious, + clippy::complexity, + clippy::nursery, + clippy::unwrap_used, + unused_qualifications, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unused_allocation, + clippy::unnecessary_cast, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::dbg_macro, + clippy::deprecated_cfg_attr, + clippy::separated_literal_suffix, + deprecated +)] +#![forbid(deprecated_in_future)] +#![allow(clippy::missing_errors_doc)] -use sd_prisma::prisma::indexer_rule; +use sd_prisma::prisma::{indexer_rule, PrismaClient}; use sd_utils::{ db::{maybe_missing, MissingFieldError}, error::{FileIOError, NonUtf8PathError}, }; +use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, - marker::PhantomData, + fs::Metadata, path::Path, + sync::Arc, }; use chrono::{DateTime, Utc}; -use futures::future::try_join_all; +use futures_concurrency::future::TryJoin; use globset::{Glob, GlobSet, GlobSetBuilder}; use rmp_serde::{decode, encode}; use rspc::ErrorCode; -use serde::{de, ser, Deserialize, Serialize}; + use specta::Type; use thiserror::Error; -use tokio::fs; +use tokio::{fs, sync::RwLock}; use tracing::debug; use uuid::Uuid; pub mod seed; +mod serde_impl; #[derive(Error, Debug)] pub enum IndexerRuleError { @@ -57,10 +87,10 @@ impl From for rspc::Error { IndexerRuleError::InvalidRuleKindInt(_) | IndexerRuleError::Glob(_) | IndexerRuleError::NonUtf8Path(_) => { - rspc::Error::with_cause(ErrorCode::BadRequest, err.to_string(), err) + Self::with_cause(ErrorCode::BadRequest, err.to_string(), err) } - _ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err), + _ => Self::with_cause(ErrorCode::InternalServerError, err.to_string(), err), } } } @@ -83,8 +113,10 @@ pub struct IndexerRuleCreateArgs { impl IndexerRuleCreateArgs { pub async fn create( self, - library: &Library, + db: &PrismaClient, ) -> Result, IndexerRuleError> { + use indexer_rule::{date_created, date_modified, name, rules_per_kind}; + debug!( "{} a new indexer rule (name = {}, params = {:?})", if self.dry_run { @@ -127,12 +159,8 @@ impl IndexerRuleCreateArgs { let date_created = Utc::now(); - use indexer_rule::*; - Ok(Some( - library - .db - .indexer_rule() + db.indexer_rule() .create( sd_utils::uuid_to_bytes(generate_pub_id()), vec![ @@ -159,6 +187,7 @@ pub enum RuleKind { } impl RuleKind { + #[must_use] pub const fn variant_count() -> usize { // TODO: Use https://doc.rust-lang.org/std/mem/fn.variant_count.html if it ever gets stabilized 4 @@ -168,9 +197,10 @@ impl RuleKind { /// `ParametersPerKind` is a mapping from `RuleKind` to the parameters required for each kind of rule. /// In case of doubt about globs, consult /// -/// We store directly globs in the database, serialized using rmp_serde. +/// We store directly globs in the database, serialized using [rmp_serde](https://docs.rs/rmp-serde). /// -/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or `ParametersPerKind::RejectIfChildrenDirectoriesArePresent` +/// In case of `ParametersPerKind::AcceptIfChildrenDirectoriesArePresent` or +/// `ParametersPerKind::RejectIfChildrenDirectoriesArePresent` /// first we change the data structure to a vector, then we serialize it. #[derive(Debug)] pub enum RulePerKind { @@ -219,232 +249,67 @@ impl RulePerKind { } } -/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on -/// serialization -impl Serialize for RulePerKind { - fn serialize(&self, serializer: S) -> Result - where - S: ser::Serializer, - { - match *self { - RulePerKind::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer - .serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs), - RulePerKind::RejectFilesByGlob(ref globs, ref _glob_set) => serializer - .serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs), - RulePerKind::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer - .serialize_newtype_variant( - "ParametersPerKind", - 2, - "AcceptIfChildrenDirectoriesArePresent", - children, - ), - RulePerKind::RejectIfChildrenDirectoriesArePresent(ref children) => serializer - .serialize_newtype_variant( - "ParametersPerKind", - 3, - "RejectIfChildrenDirectoriesArePresent", - children, - ), - } - } +pub trait MetadataForIndexerRules: Send + Sync + 'static { + fn is_dir(&self) -> bool; } -impl<'de> Deserialize<'de> for RulePerKind { - fn deserialize(deserializer: D) -> Result - where - D: de::Deserializer<'de>, - { - const VARIANTS: &[&str] = &[ - "AcceptFilesByGlob", - "RejectFilesByGlob", - "AcceptIfChildrenDirectoriesArePresent", - "RejectIfChildrenDirectoriesArePresent", - ]; - - enum Fields { - AcceptFilesByGlob, - RejectFilesByGlob, - AcceptIfChildrenDirectoriesArePresent, - RejectIfChildrenDirectoriesArePresent, - } - - struct FieldsVisitor; - - impl<'de> de::Visitor<'de> for FieldsVisitor { - type Value = Fields; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str( - "`AcceptFilesByGlob` \ - or `RejectFilesByGlob` \ - or `AcceptIfChildrenDirectoriesArePresent` \ - or `RejectIfChildrenDirectoriesArePresent`", - ) - } - - fn visit_u64(self, value: u64) -> Result - where - E: de::Error, - { - match value { - 0 => Ok(Fields::AcceptFilesByGlob), - 1 => Ok(Fields::RejectFilesByGlob), - 2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent), - 3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent), - _ => Err(de::Error::invalid_value( - de::Unexpected::Unsigned(value), - &"variant index 0 <= i < 3", - )), - } - } - fn visit_str(self, value: &str) -> Result - where - E: de::Error, - { - match value { - "AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), - "RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), - "AcceptIfChildrenDirectoriesArePresent" => { - Ok(Fields::AcceptIfChildrenDirectoriesArePresent) - } - "RejectIfChildrenDirectoriesArePresent" => { - Ok(Fields::RejectIfChildrenDirectoriesArePresent) - } - _ => Err(de::Error::unknown_variant(value, VARIANTS)), - } - } - fn visit_bytes(self, bytes: &[u8]) -> Result - where - E: de::Error, - { - match bytes { - b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), - b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), - b"AcceptIfChildrenDirectoriesArePresent" => { - Ok(Fields::AcceptIfChildrenDirectoriesArePresent) - } - b"RejectIfChildrenDirectoriesArePresent" => { - Ok(Fields::RejectIfChildrenDirectoriesArePresent) - } - _ => Err(de::Error::unknown_variant( - &String::from_utf8_lossy(bytes), - VARIANTS, - )), - } - } - } - - impl<'de> Deserialize<'de> for Fields { - #[inline] - fn deserialize(deserializer: D) -> Result - where - D: de::Deserializer<'de>, - { - deserializer.deserialize_identifier(FieldsVisitor) - } - } - - struct ParametersPerKindVisitor<'de> { - marker: PhantomData, - lifetime: PhantomData<&'de ()>, - } - - impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> { - type Value = RulePerKind; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("enum ParametersPerKind") - } - - fn visit_enum(self, data: PPK) -> Result - where - PPK: de::EnumAccess<'de>, - { - use de::Error; - - de::EnumAccess::variant(data).and_then(|value| match value { - (Fields::AcceptFilesByGlob, accept_files_by_glob) => { - de::VariantAccess::newtype_variant::>(accept_files_by_glob) - .and_then(|globs| { - globs - .iter() - .fold(&mut GlobSetBuilder::new(), |builder, glob| { - builder.add(glob.to_owned()) - }) - .build() - .map_or_else( - |e| Err(PPK::Error::custom(e)), - |glob_set| { - Ok(Self::Value::AcceptFilesByGlob(globs, glob_set)) - }, - ) - }) - } - (Fields::RejectFilesByGlob, reject_files_by_glob) => { - de::VariantAccess::newtype_variant::>(reject_files_by_glob) - .and_then(|globs| { - globs - .iter() - .fold(&mut GlobSetBuilder::new(), |builder, glob| { - builder.add(glob.to_owned()) - }) - .build() - .map_or_else( - |e| Err(PPK::Error::custom(e)), - |glob_set| { - Ok(Self::Value::RejectFilesByGlob(globs, glob_set)) - }, - ) - }) - } - ( - Fields::AcceptIfChildrenDirectoriesArePresent, - accept_if_children_directories_are_present, - ) => de::VariantAccess::newtype_variant::>( - accept_if_children_directories_are_present, - ) - .map(Self::Value::AcceptIfChildrenDirectoriesArePresent), - ( - Fields::RejectIfChildrenDirectoriesArePresent, - reject_if_children_directories_are_present, - ) => de::VariantAccess::newtype_variant::>( - reject_if_children_directories_are_present, - ) - .map(Self::Value::RejectIfChildrenDirectoriesArePresent), - }) - } - } - - deserializer.deserialize_enum( - "ParametersPerKind", - VARIANTS, - ParametersPerKindVisitor { - marker: PhantomData::, - lifetime: PhantomData, - }, - ) +impl MetadataForIndexerRules for Metadata { + fn is_dir(&self) -> bool { + self.is_dir() } } impl RulePerKind { - async fn apply(&self, source: impl AsRef) -> Result<(RuleKind, bool), IndexerRuleError> { + #[deprecated] + async fn apply( + &self, + source: impl AsRef + Send, + ) -> Result<(RuleKind, bool), IndexerRuleError> { match self { - RulePerKind::AcceptIfChildrenDirectoriesArePresent(children) => { + Self::AcceptIfChildrenDirectoriesArePresent(children) => { accept_dir_for_its_children(source, children) .await .map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted)) } - RulePerKind::RejectIfChildrenDirectoriesArePresent(children) => { + Self::RejectIfChildrenDirectoriesArePresent(children) => { reject_dir_for_its_children(source, children) .await .map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected)) } - RulePerKind::AcceptFilesByGlob(_globs, accept_glob_set) => Ok(( + Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok(( RuleKind::AcceptFilesByGlob, accept_by_glob(source, accept_glob_set), )), - RulePerKind::RejectFilesByGlob(_globs, reject_glob_set) => Ok(( + Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok(( + RuleKind::RejectFilesByGlob, + reject_by_glob(source, reject_glob_set), + )), + } + } + + async fn apply_with_metadata( + &self, + source: impl AsRef + Send, + metadata: &impl MetadataForIndexerRules, + ) -> Result<(RuleKind, bool), IndexerRuleError> { + match self { + Self::AcceptIfChildrenDirectoriesArePresent(children) => { + accept_dir_for_its_children_with_metadata(source, metadata, children) + .await + .map(|accepted| (RuleKind::AcceptIfChildrenDirectoriesArePresent, accepted)) + } + Self::RejectIfChildrenDirectoriesArePresent(children) => { + reject_dir_for_its_children_with_metadata(source, metadata, children) + .await + .map(|rejected| (RuleKind::RejectIfChildrenDirectoriesArePresent, rejected)) + } + + Self::AcceptFilesByGlob(_globs, accept_glob_set) => Ok(( + RuleKind::AcceptFilesByGlob, + accept_by_glob(source, accept_glob_set), + )), + Self::RejectFilesByGlob(_globs, reject_glob_set) => Ok(( RuleKind::RejectFilesByGlob, reject_by_glob(source, reject_glob_set), )), @@ -463,18 +328,50 @@ pub struct IndexerRule { } impl IndexerRule { + #[deprecated] pub async fn apply( &self, - source: impl AsRef, + source: impl AsRef + Send, ) -> Result, IndexerRuleError> { - try_join_all(self.rules.iter().map(|rule| rule.apply(source.as_ref()))).await + self.rules + .iter() + .map(|rule| rule.apply(source.as_ref())) + .collect::>() + .try_join() + .await } + pub async fn apply_with_metadata( + &self, + source: impl AsRef + Send, + metadata: &impl MetadataForIndexerRules, + ) -> Result, IndexerRuleError> { + async fn inner( + rules: &[RulePerKind], + source: &Path, + metadata: &impl MetadataForIndexerRules, + ) -> Result, IndexerRuleError> { + rules + .iter() + .map(|rule| rule.apply_with_metadata(source, metadata)) + .collect::>() + .try_join() + .await + } + + inner(&self.rules, source.as_ref(), metadata).await + } + + #[deprecated] pub async fn apply_all( - rules: &[IndexerRule], - source: impl AsRef, + rules: &[Self], + source: impl AsRef + Send, ) -> Result>, IndexerRuleError> { - try_join_all(rules.iter().map(|rule| rule.apply(source.as_ref()))) + rules + .iter() + .map(|rule| rule.apply(source.as_ref())) + .collect::>() + .try_join() .await .map(|results| { results.into_iter().flatten().fold( @@ -488,6 +385,59 @@ impl IndexerRule { } } +#[derive(Debug, Clone, Default)] +pub struct IndexerRuler { + // TODO(fogodev): Use this RwLock later to acquire new rules while applying rules, like from a .gitignore file + rules: Arc>>, +} + +impl IndexerRuler { + #[must_use] + pub fn new(rules: Vec) -> Self { + Self { + rules: Arc::new(RwLock::new(rules)), + } + } + + pub async fn serialize(&self) -> Result, rmp_serde::encode::Error> { + rmp_serde::to_vec_named(&*self.rules.read().await) + } + + pub fn deserialize(data: &[u8]) -> Result { + rmp_serde::from_slice(data).map(Self::new) + } + + pub async fn apply_all( + &self, + source: impl AsRef + Send, + metadata: &impl MetadataForIndexerRules, + ) -> Result>, IndexerRuleError> { + async fn inner( + rules: &[IndexerRule], + source: &Path, + metadata: &impl MetadataForIndexerRules, + ) -> Result>, IndexerRuleError> { + rules + .iter() + .map(|rule| rule.apply_with_metadata(source, metadata)) + .collect::>() + .try_join() + .await + .map(|results| { + results.into_iter().flatten().fold( + HashMap::<_, Vec<_>>::with_capacity(RuleKind::variant_count()), + |mut map, (kind, result)| { + map.entry(kind).or_default().push(result); + map + }, + ) + }) + } + + inner(&self.rules.read().await, source.as_ref(), metadata).await + } +} + impl TryFrom<&indexer_rule::Data> for IndexerRule { type Error = IndexerRuleError; @@ -522,8 +472,9 @@ fn reject_by_glob(source: impl AsRef, reject_glob_set: &GlobSet) -> bool { !accept_by_glob(source.as_ref(), reject_glob_set) } +#[deprecated] async fn accept_dir_for_its_children( - source: impl AsRef, + source: impl AsRef + Send, children: &HashSet, ) -> Result { let source = source.as_ref(); @@ -566,8 +517,50 @@ async fn accept_dir_for_its_children( Ok(false) } +async fn accept_dir_for_its_children_with_metadata( + source: impl AsRef + Send, + metadata: &impl MetadataForIndexerRules, + children: &HashSet, +) -> Result { + let source = source.as_ref(); + + // FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442") + if !metadata.is_dir() { + return Ok(false); + } + + let mut read_dir = fs::read_dir(source) + .await // TODO: Check NotADirectory error here when available + .map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))?; + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))))? + { + let entry_name = entry + .file_name() + .to_str() + .ok_or_else(|| NonUtf8PathError(entry.path().into()))? + .to_string(); + + if entry + .metadata() + .await + .map_err(|e| { + IndexerRuleError::AcceptByItsChildrenFileIO(FileIOError::from((source, e))) + })? + .is_dir() && children.contains(&entry_name) + { + return Ok(true); + } + } + + Ok(false) +} + +#[deprecated] async fn reject_dir_for_its_children( - source: impl AsRef, + source: impl AsRef + Send, children: &HashSet, ) -> Result { let source = source.as_ref(); @@ -608,6 +601,46 @@ async fn reject_dir_for_its_children( Ok(true) } +async fn reject_dir_for_its_children_with_metadata( + source: impl AsRef + Send, + metadata: &impl MetadataForIndexerRules, + children: &HashSet, +) -> Result { + let source = source.as_ref(); + + // FIXME(fogodev): Just check for io::ErrorKind::NotADirectory error instead (feature = "io_error_more", issue = "86442") + if !metadata.is_dir() { + return Ok(true); + } + + let mut read_dir = fs::read_dir(source) + .await // TODO: Check NotADirectory error here when available + .map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))?; + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))))? + { + if entry + .metadata() + .await + .map_err(|e| { + IndexerRuleError::RejectByItsChildrenFileIO(FileIOError::from((source, e))) + })? + .is_dir() && children.contains( + entry + .file_name() + .to_str() + .ok_or_else(|| NonUtf8PathError(entry.path().into()))?, + ) { + return Ok(false); + } + } + + Ok(true) +} + +#[must_use] pub fn generate_pub_id() -> Uuid { loop { let pub_id = Uuid::new_v4(); @@ -624,6 +657,7 @@ mod tests { use tempfile::tempdir; impl IndexerRule { + #[must_use] pub fn new(name: String, default: bool, rules: Vec) -> Self { Self { id: None, @@ -636,7 +670,7 @@ mod tests { } } - async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef) -> bool { + async fn check_rule(indexer_rule: &IndexerRule, path: impl AsRef + Send) -> bool { indexer_rule .apply(path) .await @@ -697,6 +731,7 @@ mod tests { } #[tokio::test] + #[allow(clippy::similar_names)] async fn test_only_photos() { let text = Path::new("file.txt"); let png = Path::new("photo1.png"); @@ -748,7 +783,7 @@ mod tests { fs::create_dir(project2.join(".git")).await.unwrap(); fs::create_dir(project2.join("books")).await.unwrap(); - let childrens = [".git".to_string()].into_iter().collect::>(); + let childrens = HashSet::from([".git".to_string()]); let rule = IndexerRule::new( "git projects".to_string(), @@ -779,7 +814,7 @@ mod tests { fs::create_dir(project2.join(".git")).await.unwrap(); fs::create_dir(project2.join("books")).await.unwrap(); - let childrens = [".git".to_string()].into_iter().collect::>(); + let childrens = HashSet::from([".git".to_string()]); let rule = IndexerRule::new( "git projects".to_string(), @@ -798,21 +833,23 @@ mod tests { fn eq(&self, other: &Self) -> bool { match (self, other) { ( - RulePerKind::AcceptFilesByGlob(self_globs, _), - RulePerKind::AcceptFilesByGlob(other_globs, _), + Self::AcceptFilesByGlob(self_globs, _), + Self::AcceptFilesByGlob(other_globs, _), + ) + | ( + Self::RejectFilesByGlob(self_globs, _), + Self::RejectFilesByGlob(other_globs, _), ) => self_globs == other_globs, + ( - RulePerKind::RejectFilesByGlob(self_globs, _), - RulePerKind::RejectFilesByGlob(other_globs, _), - ) => self_globs == other_globs, - ( - RulePerKind::AcceptIfChildrenDirectoriesArePresent(self_childrens), - RulePerKind::AcceptIfChildrenDirectoriesArePresent(other_childrens), - ) => self_childrens == other_childrens, - ( - RulePerKind::RejectIfChildrenDirectoriesArePresent(self_childrens), - RulePerKind::RejectIfChildrenDirectoriesArePresent(other_childrens), + Self::AcceptIfChildrenDirectoriesArePresent(self_childrens), + Self::AcceptIfChildrenDirectoriesArePresent(other_childrens), + ) + | ( + Self::RejectIfChildrenDirectoriesArePresent(self_childrens), + Self::RejectIfChildrenDirectoriesArePresent(other_childrens), ) => self_childrens == other_childrens, + _ => false, } } diff --git a/core/src/location/indexer/rules/seed.rs b/core/crates/indexer-rules/src/seed.rs similarity index 94% rename from core/src/location/indexer/rules/seed.rs rename to core/crates/indexer-rules/src/seed.rs index 9fff82c1e..4205f1caa 100644 --- a/core/src/location/indexer/rules/seed.rs +++ b/core/crates/indexer-rules/src/seed.rs @@ -1,14 +1,11 @@ -use crate::{ - library::Library, - location::indexer::rules::{IndexerRule, IndexerRuleError, RulePerKind}, -}; - -use sd_prisma::prisma::indexer_rule; +use sd_prisma::prisma::{indexer_rule, PrismaClient}; use chrono::Utc; use thiserror::Error; use uuid::Uuid; +use super::{IndexerRule, IndexerRuleError, RulePerKind}; + #[derive(Error, Debug)] pub enum SeederError { #[error("Failed to run indexer rules seeder: {0}")] @@ -37,7 +34,9 @@ impl From for IndexerRule { } /// Seeds system indexer rules into a new or existing library, -pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederError> { +pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederError> { + use indexer_rule::{date_created, date_modified, default, name, rules_per_kind}; + // DO NOT REORDER THIS ARRAY! for (i, rule) in [no_os_protected(), no_hidden(), no_git(), only_images()] .into_iter() @@ -46,8 +45,6 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128)); let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?; - use indexer_rule::*; - let data = vec![ name::set(Some(rule.name.to_string())), rules_per_kind::set(Some(rules.clone())), @@ -56,9 +53,7 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro date_modified::set(Some(Utc::now().into())), ]; - library - .db - .indexer_rule() + db.indexer_rule() .upsert( indexer_rule::pub_id::equals(pub_id.clone()), indexer_rule::create(pub_id.clone(), data.clone()), @@ -71,6 +66,8 @@ pub async fn new_or_existing_library(library: &Library) -> Result<(), SeederErro Ok(()) } +#[must_use] +#[allow(clippy::missing_panics_doc)] pub fn no_os_protected() -> SystemIndexerRule { SystemIndexerRule { // TODO: On windows, beside the listed files, any file with the FILE_ATTRIBUTE_SYSTEM should be considered a system file @@ -105,7 +102,7 @@ pub fn no_os_protected() -> SystemIndexerRule { "C:/Users/*/NTUSER.DAT*", "C:/Users/*/ntuser.dat*", "C:/Users/*/{ntuser.ini,ntuser.dat,NTUSER.DAT}", - // User special folders (most of these the user dont even have permission to access) + // User special folders (most of these the user don't even have permission to access) "C:/Users/*/{Cookies,AppData,NetHood,Recent,PrintHood,SendTo,Templates,Start Menu,Application Data,Local Settings,My Documents}", // System special folders "C:/{$Recycle.Bin,$WinREAgent,Documents and Settings,Program Files,Program Files (x86),ProgramData,Recovery,PerfLogs,Windows,Windows.old}", @@ -177,6 +174,8 @@ pub fn no_os_protected() -> SystemIndexerRule { } } +#[must_use] +#[allow(clippy::missing_panics_doc)] pub fn no_hidden() -> SystemIndexerRule { SystemIndexerRule { name: "No Hidden", @@ -186,6 +185,8 @@ pub fn no_hidden() -> SystemIndexerRule { } } +#[must_use] +#[allow(clippy::missing_panics_doc)] fn no_git() -> SystemIndexerRule { SystemIndexerRule { name: "No Git", @@ -197,6 +198,8 @@ fn no_git() -> SystemIndexerRule { } } +#[must_use] +#[allow(clippy::missing_panics_doc)] fn only_images() -> SystemIndexerRule { SystemIndexerRule { name: "Only Images", diff --git a/core/crates/indexer-rules/src/serde_impl.rs b/core/crates/indexer-rules/src/serde_impl.rs new file mode 100644 index 000000000..7abe9d87b --- /dev/null +++ b/core/crates/indexer-rules/src/serde_impl.rs @@ -0,0 +1,214 @@ +use std::{collections::HashSet, marker::PhantomData}; + +use globset::{Glob, GlobSetBuilder}; +use serde::{de, ser, Deserialize, Serialize}; + +use super::RulePerKind; + +/// We're implementing `Serialize` by hand as `GlobSet`s aren't serializable, so we ignore them on +/// serialization +impl Serialize for RulePerKind { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + match *self { + Self::AcceptFilesByGlob(ref globs, ref _glob_set) => serializer + .serialize_newtype_variant("ParametersPerKind", 0, "AcceptFilesByGlob", globs), + Self::RejectFilesByGlob(ref globs, ref _glob_set) => serializer + .serialize_newtype_variant("ParametersPerKind", 1, "RejectFilesByGlob", globs), + Self::AcceptIfChildrenDirectoriesArePresent(ref children) => serializer + .serialize_newtype_variant( + "ParametersPerKind", + 2, + "AcceptIfChildrenDirectoriesArePresent", + children, + ), + Self::RejectIfChildrenDirectoriesArePresent(ref children) => serializer + .serialize_newtype_variant( + "ParametersPerKind", + 3, + "RejectIfChildrenDirectoriesArePresent", + children, + ), + } + } +} + +impl<'de> Deserialize<'de> for RulePerKind { + #[allow(clippy::too_many_lines)] + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + const VARIANTS: &[&str] = &[ + "AcceptFilesByGlob", + "RejectFilesByGlob", + "AcceptIfChildrenDirectoriesArePresent", + "RejectIfChildrenDirectoriesArePresent", + ]; + + enum Fields { + AcceptFilesByGlob, + RejectFilesByGlob, + AcceptIfChildrenDirectoriesArePresent, + RejectIfChildrenDirectoriesArePresent, + } + + struct FieldsVisitor; + + impl<'de> de::Visitor<'de> for FieldsVisitor { + type Value = Fields; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str( + "`AcceptFilesByGlob` \ + or `RejectFilesByGlob` \ + or `AcceptIfChildrenDirectoriesArePresent` \ + or `RejectIfChildrenDirectoriesArePresent`", + ) + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + match value { + 0 => Ok(Fields::AcceptFilesByGlob), + 1 => Ok(Fields::RejectFilesByGlob), + 2 => Ok(Fields::AcceptIfChildrenDirectoriesArePresent), + 3 => Ok(Fields::RejectIfChildrenDirectoriesArePresent), + _ => Err(de::Error::invalid_value( + de::Unexpected::Unsigned(value), + &"variant index 0 <= i < 3", + )), + } + } + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + match value { + "AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), + "RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), + "AcceptIfChildrenDirectoriesArePresent" => { + Ok(Fields::AcceptIfChildrenDirectoriesArePresent) + } + "RejectIfChildrenDirectoriesArePresent" => { + Ok(Fields::RejectIfChildrenDirectoriesArePresent) + } + _ => Err(de::Error::unknown_variant(value, VARIANTS)), + } + } + fn visit_bytes(self, bytes: &[u8]) -> Result + where + E: de::Error, + { + match bytes { + b"AcceptFilesByGlob" => Ok(Fields::AcceptFilesByGlob), + b"RejectFilesByGlob" => Ok(Fields::RejectFilesByGlob), + b"AcceptIfChildrenDirectoriesArePresent" => { + Ok(Fields::AcceptIfChildrenDirectoriesArePresent) + } + b"RejectIfChildrenDirectoriesArePresent" => { + Ok(Fields::RejectIfChildrenDirectoriesArePresent) + } + _ => Err(de::Error::unknown_variant( + &String::from_utf8_lossy(bytes), + VARIANTS, + )), + } + } + } + + impl<'de> Deserialize<'de> for Fields { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + deserializer.deserialize_identifier(FieldsVisitor) + } + } + + struct ParametersPerKindVisitor<'de> { + marker: PhantomData, + lifetime: PhantomData<&'de ()>, + } + + impl<'de> de::Visitor<'de> for ParametersPerKindVisitor<'de> { + type Value = RulePerKind; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("enum ParametersPerKind") + } + + fn visit_enum(self, data: PPK) -> Result + where + PPK: de::EnumAccess<'de>, + { + use de::Error; + + de::EnumAccess::variant(data).and_then(|value| match value { + (Fields::AcceptFilesByGlob, accept_files_by_glob) => { + de::VariantAccess::newtype_variant::>(accept_files_by_glob) + .and_then(|globs| { + globs + .iter() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob.to_owned()) + }) + .build() + .map_or_else( + |e| Err(PPK::Error::custom(e)), + |glob_set| { + Ok(Self::Value::AcceptFilesByGlob(globs, glob_set)) + }, + ) + }) + } + (Fields::RejectFilesByGlob, reject_files_by_glob) => { + de::VariantAccess::newtype_variant::>(reject_files_by_glob) + .and_then(|globs| { + globs + .iter() + .fold(&mut GlobSetBuilder::new(), |builder, glob| { + builder.add(glob.to_owned()) + }) + .build() + .map_or_else( + |e| Err(PPK::Error::custom(e)), + |glob_set| { + Ok(Self::Value::RejectFilesByGlob(globs, glob_set)) + }, + ) + }) + } + ( + Fields::AcceptIfChildrenDirectoriesArePresent, + accept_if_children_directories_are_present, + ) => de::VariantAccess::newtype_variant::>( + accept_if_children_directories_are_present, + ) + .map(Self::Value::AcceptIfChildrenDirectoriesArePresent), + ( + Fields::RejectIfChildrenDirectoriesArePresent, + reject_if_children_directories_are_present, + ) => de::VariantAccess::newtype_variant::>( + reject_if_children_directories_are_present, + ) + .map(Self::Value::RejectIfChildrenDirectoriesArePresent), + }) + } + } + + deserializer.deserialize_enum( + "ParametersPerKind", + VARIANTS, + ParametersPerKindVisitor { + marker: PhantomData::, + lifetime: PhantomData, + }, + ) + } +} diff --git a/core/crates/prisma-helpers/Cargo.toml b/core/crates/prisma-helpers/Cargo.toml new file mode 100644 index 000000000..6271e754b --- /dev/null +++ b/core/crates/prisma-helpers/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "sd-core-prisma-helpers" +version = "0.1.0" +authors = ["Ericson Soares "] +license = { workspace = true } +repository = { workspace = true } +edition = { workspace = true } + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +# Spacedrive Sub-crates +sd-prisma = { path = "../../../crates/prisma" } + +prisma-client-rust = { workspace = true } +serde = { workspace = true } diff --git a/core/crates/prisma-helpers/src/lib.rs b/core/crates/prisma-helpers/src/lib.rs new file mode 100644 index 000000000..9f04b609e --- /dev/null +++ b/core/crates/prisma-helpers/src/lib.rs @@ -0,0 +1,226 @@ +#![warn( + clippy::all, + clippy::pedantic, + clippy::correctness, + clippy::perf, + clippy::style, + clippy::suspicious, + clippy::complexity, + clippy::nursery, + clippy::unwrap_used, + unused_qualifications, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unused_allocation, + clippy::unnecessary_cast, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::dbg_macro, + clippy::deprecated_cfg_attr, + clippy::separated_literal_suffix, + deprecated +)] +#![forbid(deprecated_in_future)] +#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] + +use sd_prisma::prisma::{file_path, job, label, location, object}; + +// File Path selectables! +file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id }); +file_path::select!(file_path_just_pub_id_materialized_path { + pub_id + materialized_path +}); +file_path::select!(file_path_for_file_identifier { + id + pub_id + materialized_path + date_created + is_dir + name + extension + object_id +}); +file_path::select!(file_path_for_object_validator { + pub_id + materialized_path + is_dir + name + extension + integrity_checksum +}); +file_path::select!(file_path_for_media_processor { + id + materialized_path + is_dir + name + extension + cas_id + object_id +}); +file_path::select!(file_path_to_isolate { + location_id + materialized_path + is_dir + name + extension +}); +file_path::select!(file_path_to_isolate_with_pub_id { + pub_id + location_id + materialized_path + is_dir + name + extension +}); +file_path::select!(file_path_to_isolate_with_id { + id + location_id + materialized_path + is_dir + name + extension +}); +file_path::select!(file_path_walker { + pub_id + location_id + object_id + materialized_path + is_dir + name + extension + date_modified + inode + size_in_bytes_bytes + hidden +}); +file_path::select!(file_path_to_handle_custom_uri { + pub_id + materialized_path + is_dir + name + extension + location: select { + id + path + instance: select { + identity + remote_identity + } + } +}); +file_path::select!(file_path_to_handle_p2p_serve_file { + materialized_path + name + extension + is_dir // For isolated file path + location: select { + id + path + } +}); +file_path::select!(file_path_to_full_path { + id + materialized_path + is_dir + name + extension + location: select { + id + path + } +}); + +// File Path includes! +file_path::include!(file_path_with_object { object }); + +// Object selectables! +object::select!(object_for_file_identifier { + pub_id + file_paths: select { pub_id cas_id extension is_dir materialized_path name } +}); + +// Object includes! +object::include!(object_with_file_paths { file_paths }); + +// Job selectables! +job::select!(job_without_data { + id + name + action + status + parent_id + errors_text + metadata + date_created + date_started + date_completed + task_count + completed_task_count + date_estimated_completion +}); + +// Location includes! +location::include!(location_with_indexer_rules { + indexer_rules: select { indexer_rule } +}); + +impl From for location::Data { + fn from(data: location_with_indexer_rules::Data) -> Self { + Self { + id: data.id, + pub_id: data.pub_id, + path: data.path, + instance_id: data.instance_id, + name: data.name, + total_capacity: data.total_capacity, + available_capacity: data.available_capacity, + is_archived: data.is_archived, + size_in_bytes: data.size_in_bytes, + generate_preview_media: data.generate_preview_media, + sync_preview_media: data.sync_preview_media, + hidden: data.hidden, + date_created: data.date_created, + file_paths: None, + indexer_rules: None, + instance: None, + } + } +} + +impl From<&location_with_indexer_rules::Data> for location::Data { + fn from(data: &location_with_indexer_rules::Data) -> Self { + Self { + id: data.id, + pub_id: data.pub_id.clone(), + path: data.path.clone(), + instance_id: data.instance_id, + name: data.name.clone(), + total_capacity: data.total_capacity, + available_capacity: data.available_capacity, + size_in_bytes: data.size_in_bytes.clone(), + is_archived: data.is_archived, + generate_preview_media: data.generate_preview_media, + sync_preview_media: data.sync_preview_media, + hidden: data.hidden, + date_created: data.date_created, + file_paths: None, + indexer_rules: None, + instance: None, + } + } +} + +// Label includes! +label::include!((take: i64) => label_with_objects { + label_objects(vec![]).take(take): select { + object: select { + id + file_paths(vec![]).take(1) + } + } +}); diff --git a/core/crates/sync/Cargo.toml b/core/crates/sync/Cargo.toml index c164aade1..d5796f8e0 100644 --- a/core/crates/sync/Cargo.toml +++ b/core/crates/sync/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" default = [] [dependencies] +# Spacedrive Sub-crates sd-prisma = { path = "../../../crates/prisma" } sd-sync = { path = "../../../crates/sync" } sd-utils = { path = "../../../crates/utils" } diff --git a/core/crates/sync/src/manager.rs b/core/crates/sync/src/manager.rs index 72b54a618..a25581738 100644 --- a/core/crates/sync/src/manager.rs +++ b/core/crates/sync/src/manager.rs @@ -7,6 +7,7 @@ use sd_utils::uuid_to_bytes; use std::{ cmp::Ordering, collections::HashMap, + fmt, ops::Deref, sync::{ atomic::{self, AtomicBool}, @@ -25,6 +26,12 @@ pub struct Manager { pub shared: Arc, } +impl fmt::Debug for Manager { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SyncManager").finish() + } +} + #[derive(serde::Serialize, serde::Deserialize, Debug, PartialEq, Eq)] pub struct GetOpsArgs { pub clocks: Vec<(Uuid, NTP64)>, diff --git a/core/crates/sync/tests/lib.rs b/core/crates/sync/tests/lib.rs index 11c9ee768..7f2698868 100644 --- a/core/crates/sync/tests/lib.rs +++ b/core/crates/sync/tests/lib.rs @@ -1,6 +1,7 @@ mod mock_instance; use sd_core_sync::*; + use sd_prisma::{prisma, prisma_sync}; use sd_sync::*; use sd_utils::uuid_to_bytes; diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index de058f8a2..b7f4cf6bb 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -403,10 +403,13 @@ model Job { // Enum: sd_core::job::job_manager:JobStatus status Int? // 0 = Queued - // List of errors, separated by "\n\n" in case of failed jobs or completed with errors - errors_text String? - data Bytes? // Serialized data to be used on pause/resume + // List of errors, separated by "\n\n" in case of failed jobs or completed with errors + errors_text String? // Deprecated, use `critical_error` or `non_critical_errors` instead + critical_error String? // Serialized error field with info about the failed job after completion + non_critical_errors Bytes? // Serialized non-critical errors field with info about the completed job with errors after completion + + data Bytes? // Deprecated metadata Bytes? // Serialized metadata field with info about the job after completion parent_id Bytes? diff --git a/core/src/api/ephemeral_files.rs b/core/src/api/ephemeral_files.rs index a7eaa0f08..17eaa4114 100644 --- a/core/src/api/ephemeral_files.rs +++ b/core/src/api/ephemeral_files.rs @@ -10,8 +10,9 @@ use crate::{ }, }; +use sd_core_file_path_helper::IsolatedFilePathData; + use sd_file_ext::extensions::ImageExtension; -use sd_file_path_helper::IsolatedFilePathData; use sd_media_metadata::MediaMetadata; use sd_utils::error::FileIOError; @@ -37,9 +38,10 @@ const UNTITLED_FILE_STR: &str = "Untitled"; const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt"; #[derive(Type, Deserialize)] +#[serde(rename_all = "camelCase")] enum EphemeralFileCreateContextTypes { - empty, - text, + Empty, + Text, } pub(crate) fn mount() -> AlphaRouter { @@ -103,10 +105,10 @@ pub(crate) fn mount() -> AlphaRouter { context, }: CreateEphemeralFileArgs| async move { match context { - EphemeralFileCreateContextTypes::empty => { + EphemeralFileCreateContextTypes::Empty => { path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR)); } - EphemeralFileCreateContextTypes::text => { + EphemeralFileCreateContextTypes::Text => { path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR)); } } diff --git a/core/src/api/files.rs b/core/src/api/files.rs index 5cf3cf0e5..1ea794d30 100644 --- a/core/src/api/files.rs +++ b/core/src/api/files.rs @@ -1,5 +1,5 @@ use crate::{ - api::{locations::object_with_file_paths, utils::library}, + api::utils::library, invalidate_query, library::Library, location::{get_location_path_from_location_id, LocationError}, @@ -14,11 +14,13 @@ use crate::{ old_job::Job, }; +use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; +use sd_core_prisma_helpers::{ + file_path_to_isolate, file_path_to_isolate_with_id, object_with_file_paths, +}; + use sd_cache::{CacheNode, Model, NormalisedResult, Reference}; use sd_file_ext::kind::ObjectKind; -use sd_file_path_helper::{ - file_path_to_isolate, file_path_to_isolate_with_id, FilePathError, IsolatedFilePathData, -}; use sd_images::ConvertibleExtension; use sd_media_metadata::MediaMetadata; use sd_prisma::{ @@ -50,9 +52,10 @@ const UNTITLED_FILE_STR: &str = "Untitled"; const UNTITLED_TEXT_FILE_STR: &str = "Untitled.txt"; #[derive(Type, Deserialize)] +#[serde(rename_all = "camelCase")] enum FileCreateContextTypes { - empty, - text, + Empty, + Text, } pub(crate) fn mount() -> AlphaRouter { @@ -329,10 +332,10 @@ pub(crate) fn mount() -> AlphaRouter { } match context { - FileCreateContextTypes::empty => { + FileCreateContextTypes::Empty => { path.push(name.as_deref().unwrap_or(UNTITLED_FILE_STR)) } - FileCreateContextTypes::text => { + FileCreateContextTypes::Text => { path.push(name.as_deref().unwrap_or(UNTITLED_TEXT_FILE_STR)) } } @@ -645,7 +648,7 @@ pub(crate) fn mount() -> AlphaRouter { Ok(()) }) }) - .procedure("getConvertableImageExtensions", { + .procedure("getConvertibleImageExtensions", { R.query(|_, _: ()| async move { Ok(sd_images::all_compatible_extensions()) }) }) .procedure("eraseFiles", { diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 16a4b741f..8144b4c12 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -6,9 +6,11 @@ use crate::{ old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, validation::old_validator_job::OldObjectValidatorJobInit, }, - old_job::{job_without_data, Job, JobReport, JobStatus, OldJobs}, + old_job::{Job, JobReport, JobStatus, OldJobs}, }; +use sd_core_prisma_helpers::job_without_data; + use sd_prisma::prisma::{job, location, SortOrder}; use std::{ diff --git a/core/src/api/labels.rs b/core/src/api/labels.rs index 4e8fd2efb..0e5249c59 100644 --- a/core/src/api/labels.rs +++ b/core/src/api/labels.rs @@ -2,6 +2,8 @@ use crate::{ invalidate_query, library::Library, object::media::old_thumbnail::get_indexed_thumb_key, }; +use sd_core_prisma_helpers::label_with_objects; + use sd_prisma::{ prisma::{label, label_on_object, object, SortOrder}, prisma_sync, @@ -14,15 +16,6 @@ use rspc::alpha::AlphaRouter; use super::{locations::ExplorerItem, utils::library, Ctx, R}; -label::include!((take: i64) => label_with_objects { - label_objects(vec![]).take(take): select { - object: select { - id - file_paths(vec![]).take(1) - } - } -}); - pub(crate) fn mount() -> AlphaRouter { R.router() .procedure("list", { diff --git a/core/src/api/locations.rs b/core/src/api/locations.rs index cd2bc58dd..ed421ce8d 100644 --- a/core/src/api/locations.rs +++ b/core/src/api/locations.rs @@ -1,12 +1,9 @@ use crate::{ invalidate_query, location::{ - delete_location, find_location, - indexer::{rules::IndexerRuleCreateArgs, OldIndexerJobInit}, - light_scan_location, location_with_indexer_rules, - non_indexed::NonIndexedPathItem, - relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError, - LocationUpdateArgs, + delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location, + non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path, + LocationCreateArgs, LocationError, LocationUpdateArgs, }, object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, old_job::StatefulJob, @@ -14,11 +11,14 @@ use crate::{ util::AbortOnDrop, }; -use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference}; -use sd_prisma::prisma::{ - file_path, indexer_rule, indexer_rules_in_location, location, object, SortOrder, +use sd_core_indexer_rules::IndexerRuleCreateArgs; +use sd_core_prisma_helpers::{ + file_path_with_object, label_with_objects, location_with_indexer_rules, object_with_file_paths, }; +use sd_cache::{CacheNode, Model, Normalise, NormalisedResult, NormalisedResults, Reference}; +use sd_prisma::prisma::{file_path, indexer_rule, indexer_rules_in_location, location, SortOrder}; + use std::path::{Path, PathBuf}; use chrono::{DateTime, FixedOffset, Utc}; @@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize}; use specta::Type; use tracing::{debug, error}; -use super::{labels::label_with_objects, utils::library, Ctx, R}; +use super::{utils::library, Ctx, R}; // it includes the shard hex formatted as ([["f02", "cab34a76fbf3469f"]]) // Will be None if no thumbnail exists @@ -197,9 +197,6 @@ impl ExplorerItem { } } -file_path::include!(file_path_with_object { object }); -object::include!(object_with_file_paths { file_paths }); - pub(crate) fn mount() -> AlphaRouter { R.router() .procedure("list", { @@ -518,7 +515,7 @@ fn mount_indexer_rule_routes() -> AlphaRouter { .procedure("create", { R.with2(library()) .mutation(|(_, library), args: IndexerRuleCreateArgs| async move { - if args.create(&library).await?.is_some() { + if args.create(&library.db).await?.is_some() { invalidate_query!(library, "locations.indexer_rules.list"); } diff --git a/core/src/api/search/file_path.rs b/core/src/api/search/file_path.rs index bfb682ccc..7e7a4f29f 100644 --- a/core/src/api/search/file_path.rs +++ b/core/src/api/search/file_path.rs @@ -1,6 +1,7 @@ use crate::location::LocationError; -use sd_file_path_helper::{check_file_path_exists, IsolatedFilePathData}; +use sd_core_file_path_helper::{check_file_path_exists, IsolatedFilePathData}; + use sd_prisma::prisma::{self, file_path}; use chrono::{DateTime, FixedOffset, Utc}; diff --git a/core/src/api/search/mod.rs b/core/src/api/search/mod.rs index f976918b1..b34576308 100644 --- a/core/src/api/search/mod.rs +++ b/core/src/api/search/mod.rs @@ -1,14 +1,13 @@ use crate::{ - api::{ - locations::{file_path_with_object, object_with_file_paths, ExplorerItem}, - utils::library, - }, + api::{locations::ExplorerItem, utils::library}, library::Library, location::{non_indexed, LocationError}, object::media::old_thumbnail::get_indexed_thumb_key, util::{unsafe_streamed_query, BatchedStream}, }; +use sd_core_prisma_helpers::{file_path_with_object, object_with_file_paths}; + use sd_cache::{CacheNode, Model, Normalise, Reference}; use sd_prisma::prisma::{self, PrismaClient}; diff --git a/core/src/cloud/sync/ingest.rs b/core/src/cloud/sync/ingest.rs index 8c489f0f7..1d08db285 100644 --- a/core/src/cloud/sync/ingest.rs +++ b/core/src/cloud/sync/ingest.rs @@ -56,10 +56,10 @@ pub async fn run_actor( } debug!( - "Sending {} messages ({} to {}) to ingester", + "Sending {} messages ({:?} to {:?}) to ingester", ops.len(), - ops.first().unwrap().timestamp.as_u64(), - ops.last().unwrap().timestamp.as_u64(), + ops.first().map(|operation| operation.timestamp.as_u64()), + ops.last().map(|operation| operation.timestamp.as_u64()), ); err_break!( diff --git a/core/src/cloud/sync/receive.rs b/core/src/cloud/sync/receive.rs index a5ff0b8fe..f435358cc 100644 --- a/core/src/cloud/sync/receive.rs +++ b/core/src/cloud/sync/receive.rs @@ -1,12 +1,10 @@ use crate::{library::Libraries, Node}; -use super::{err_break, CompressedCRDTOperations}; use sd_cloud_api::RequestConfigProvider; use sd_p2p::RemoteIdentity; use sd_prisma::prisma::{cloud_crdt_operation, instance, PrismaClient, SortOrder}; use sd_sync::CRDTOperation; use sd_utils::uuid_to_bytes; -use tracing::{debug, info}; use std::{ collections::{hash_map::Entry, HashMap}, @@ -21,8 +19,11 @@ use base64::prelude::*; use chrono::Utc; use serde_json::to_vec; use tokio::{sync::Notify, time::sleep}; +use tracing::{debug, info}; use uuid::Uuid; +use super::{err_break, CompressedCRDTOperations}; + // Responsible for downloading sync operations from the cloud to be processed by the ingester #[allow(clippy::too_many_arguments)] @@ -42,7 +43,7 @@ pub async fn run_actor( active_notify.notify_waiters(); loop { - // We need to know the lastest operations we should be retrieving + // We need to know the latest operations we should be retrieving let mut cloud_timestamps = { let timestamps = sync.timestamps.read().await; @@ -181,10 +182,14 @@ pub async fn run_actor( let operations = compressed_operations.into_ops(); debug!( - "Processing collection. Instance {}, Start {}, End {}", + "Processing collection. Instance {}, Start {:?}, End {:?}", &collection.instance_uuid, - operations.first().unwrap().timestamp.as_u64(), - operations.last().unwrap().timestamp.as_u64(), + operations + .first() + .map(|operation| operation.timestamp.as_u64()), + operations + .last() + .map(|operation| operation.timestamp.as_u64()), ); err_break!(write_cloud_ops_to_db(operations, &db).await); @@ -233,6 +238,7 @@ fn crdt_op_db(op: &CRDTOperation) -> cloud_crdt_operation::Create { } } +#[allow(clippy::too_many_arguments)] pub async fn upsert_instance( library_id: Uuid, db: &PrismaClient, diff --git a/core/src/cloud/sync/send.rs b/core/src/cloud/sync/send.rs index 120211fb6..e4744f306 100644 --- a/core/src/cloud/sync/send.rs +++ b/core/src/cloud/sync/send.rs @@ -1,9 +1,6 @@ -use super::CompressedCRDTOperations; +use sd_core_sync::{SyncMessage, NTP64}; use sd_cloud_api::RequestConfigProvider; -use sd_core_sync::{SyncMessage, NTP64}; -use tracing::debug; -use uuid::Uuid; use std::{ sync::{ @@ -14,10 +11,10 @@ use std::{ }; use tokio::{sync::Notify, time::sleep}; +use tracing::debug; +use uuid::Uuid; -use super::err_break; - -// Responsible for sending its instance's sync operations to the cloud. +use super::{err_break, CompressedCRDTOperations}; pub async fn run_actor( library_id: Uuid, diff --git a/core/src/custom_uri/mod.rs b/core/src/custom_uri/mod.rs index d54c6d918..37ad328f4 100644 --- a/core/src/custom_uri/mod.rs +++ b/core/src/custom_uri/mod.rs @@ -7,10 +7,10 @@ use crate::{ Node, }; -use http_body::combinators::UnsyncBoxBody; -use hyper::{header, upgrade::OnUpgrade}; +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_prisma_helpers::file_path_to_handle_custom_uri; + use sd_file_ext::text::is_text; -use sd_file_path_helper::{file_path_to_handle_custom_uri, IsolatedFilePathData}; use sd_p2p::{RemoteIdentity, P2P}; use sd_prisma::prisma::{file_path, location}; use sd_utils::db::maybe_missing; @@ -34,6 +34,8 @@ use axum::{ routing::get, Router, }; +use http_body::combinators::UnsyncBoxBody; +use hyper::{header, upgrade::OnUpgrade}; use mini_moka::sync::Cache; use tokio::{ fs::{self, File}, @@ -353,7 +355,7 @@ pub fn with_state(node: Arc) -> LocalState { if let CoreEvent::InvalidateOperation(e) = event { match e { InvalidateOperationEvent::Single(event) => { - // TODO: This is inefficent as any change will invalidate who cache. We need the new invalidation system!!! + // TODO: This is inefficient as any change will invalidate who cache. We need the new invalidation system!!! // TODO: It's also error prone and a fine-grained resource based invalidation system would avoid that. if event.key == "search.objects" || event.key == "search.paths" { file_metadata_cache.invalidate_all(); diff --git a/core/src/library/config.rs b/core/src/library/config.rs index d1a0c8c59..4a7bad016 100644 --- a/core/src/library/config.rs +++ b/core/src/library/config.rs @@ -405,9 +405,7 @@ impl LibraryConfig { .await? .into_iter() .filter_map(|i| { - let Some(identity) = i.identity else { - return None; - }; + let identity = i.identity?; let (remote_identity, identity) = if identity[0] == b'I' { // We have an `IdentityOrRemoteIdentity::Identity` diff --git a/core/src/library/library.rs b/core/src/library/library.rs index 96ee7606b..57ad5ef04 100644 --- a/core/src/library/library.rs +++ b/core/src/library/library.rs @@ -2,7 +2,9 @@ use crate::{ api::CoreEvent, cloud, object::media::old_thumbnail::get_indexed_thumbnail_path, sync, Node, }; -use sd_file_path_helper::{file_path_to_full_path, IsolatedFilePathData}; +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_prisma_helpers::file_path_to_full_path; + use sd_p2p::Identity; use sd_prisma::prisma::{file_path, location, PrismaClient}; use sd_utils::{db::maybe_missing, error::FileIOError}; diff --git a/core/src/library/manager/error.rs b/core/src/library/manager/error.rs index 134781bbd..d53b85687 100644 --- a/core/src/library/manager/error.rs +++ b/core/src/library/manager/error.rs @@ -1,7 +1,6 @@ -use crate::{ - library::LibraryConfigError, - location::{indexer, LocationManagerError}, -}; +use crate::{library::LibraryConfigError, location::LocationManagerError}; + +use sd_core_indexer_rules::seed::SeederError; use sd_p2p::IdentityErr; use sd_utils::{ @@ -23,7 +22,7 @@ pub enum LibraryManagerError { #[error("failed to parse uuid: {0}")] Uuid(#[from] uuid::Error), #[error("failed to run indexer rules seeder: {0}")] - IndexerRulesSeeder(#[from] indexer::rules::seed::SeederError), + IndexerRulesSeeder(#[from] SeederError), // #[error("failed to initialize the key manager: {0}")] // KeyManager(#[from] sd_crypto::Error), #[error("error migrating the library: {0}")] diff --git a/core/src/library/manager/mod.rs b/core/src/library/manager/mod.rs index ee3ec8c91..0cac2c132 100644 --- a/core/src/library/manager/mod.rs +++ b/core/src/library/manager/mod.rs @@ -1,10 +1,7 @@ use crate::{ api::{utils::InvalidateOperationEvent, CoreEvent}, cloud, invalidate_query, - location::{ - indexer, - metadata::{LocationMetadataError, SpacedriveLocationMetadataFile}, - }, + location::metadata::{LocationMetadataError, SpacedriveLocationMetadataFile}, object::tag, p2p, sync, util::{mpscrr, MaybeUndefined}, @@ -160,6 +157,7 @@ impl Libraries { .await } + #[allow(clippy::too_many_arguments)] pub(crate) async fn create_with_uuid( self: &Arc, id: Uuid, @@ -230,7 +228,7 @@ impl Libraries { if should_seed { tag::seed::new_library(&library).await?; - indexer::rules::seed::new_or_existing_library(&library).await?; + sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?; debug!("Seeded library '{id:?}'"); } @@ -452,7 +450,7 @@ impl Libraries { instance::node_id::set(node_config.id.as_bytes().to_vec()), instance::metadata::set(Some( serde_json::to_vec(&node.p2p.peer_metadata()) - .expect("invalid peer metdata"), + .expect("invalid peer metadata"), )), ], ) @@ -525,7 +523,7 @@ impl Libraries { if should_seed { // library.orphan_remover.invoke().await; - indexer::rules::seed::new_or_existing_library(&library).await?; + sd_core_indexer_rules::seed::new_or_existing_library(&library.db).await?; } for location in library diff --git a/core/src/location/error.rs b/core/src/location/error.rs index a0f5b1e20..23115cc87 100644 --- a/core/src/location/error.rs +++ b/core/src/location/error.rs @@ -1,4 +1,5 @@ -use sd_file_path_helper::FilePathError; +use sd_core_file_path_helper::FilePathError; + use sd_prisma::prisma::location; use sd_utils::{ db::MissingFieldError, diff --git a/core/src/location/indexer/mod.rs b/core/src/location/indexer/mod.rs index 5cf0e84f2..ef1dff558 100644 --- a/core/src/location/indexer/mod.rs +++ b/core/src/location/indexer/mod.rs @@ -1,10 +1,11 @@ use crate::library::Library; -use sd_file_path_helper::{ - file_path_pub_and_cas_ids, FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts, -}; +use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts}; +use sd_core_indexer_rules::IndexerRuleError; +use sd_core_prisma_helpers::file_path_pub_and_cas_ids; + use sd_prisma::{ - prisma::{file_path, location, object as prisma_object, PrismaClient}, + prisma::{file_path, location, PrismaClient}, prisma_sync, }; use sd_sync::*; @@ -26,10 +27,8 @@ use super::location_with_indexer_rules; pub mod old_indexer_job; mod old_shallow; mod old_walk; -pub mod rules; use old_walk::WalkedEntry; -use rules::IndexerRuleError; pub use old_indexer_job::OldIndexerJobInit; pub use old_shallow::*; @@ -84,13 +83,12 @@ impl From for rspc::Error { async fn execute_indexer_save_step( location: &location_with_indexer_rules::Data, - save_step: &OldIndexerJobSaveStep, + OldIndexerJobSaveStep { walked, .. }: &OldIndexerJobSaveStep, library: &Library, ) -> Result { let Library { sync, db, .. } = library; - let (sync_stuff, paths): (Vec<_>, Vec<_>) = save_step - .walked + let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked .iter() .map(|entry| { let IsolatedFilePathDataParts { @@ -181,8 +179,8 @@ async fn execute_indexer_update_step( let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); let should_unlink_object = if let Some(object_id) = entry.maybe_object_id { - db.object() - .count(vec![prisma_object::id::equals(object_id)]) + db.file_path() + .count(vec![file_path::object_id::equals(Some(object_id))]) .exec() .await? > 1 } else { @@ -310,7 +308,7 @@ macro_rules! file_paths_db_fetcher_fn { .find_many(vec![::prisma_client_rust::operator::or( founds.collect::>(), )]) - .select(::sd_file_path_helper::file_path_walker::select()) + .select(::sd_core_prisma_helpers::file_path_walker::select()) }) .collect::>(); @@ -332,7 +330,7 @@ macro_rules! to_remove_db_fetcher_fn { |parent_iso_file_path, unique_location_id_materialized_path_name_extension_params| async { let location_id: ::sd_prisma::prisma::location::id::Type = $location_id; let db: &::sd_prisma::prisma::PrismaClient = $db; - let parent_iso_file_path: ::sd_file_path_helper::IsolatedFilePathData< + let parent_iso_file_path: ::sd_core_file_path_helper::IsolatedFilePathData< 'static, > = parent_iso_file_path; let unique_location_id_materialized_path_name_extension_params: ::std::vec::Vec< @@ -396,7 +394,7 @@ macro_rules! to_remove_db_fetcher_fn { found .into_iter() .filter(|file_path| !founds_ids.contains(&file_path.id)) - .map(|file_path| ::sd_file_path_helper::file_path_pub_and_cas_ids::Data { + .map(|file_path| ::sd_core_prisma_helpers::file_path_pub_and_cas_ids::Data { id: file_path.id, pub_id: file_path.pub_id, cas_id: file_path.cas_id, diff --git a/core/src/location/indexer/old_indexer_job.rs b/core/src/location/indexer/old_indexer_job.rs index 0c7c96a38..84edfddbd 100644 --- a/core/src/location/indexer/old_indexer_job.rs +++ b/core/src/location/indexer/old_indexer_job.rs @@ -9,10 +9,12 @@ use crate::{ to_remove_db_fetcher_fn, }; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, IsolatedFilePathData, }; +use sd_core_indexer_rules::IndexerRule; + use sd_prisma::{ prisma::{file_path, location}, prisma_sync, @@ -38,9 +40,8 @@ use tracing::{debug, info, warn}; use super::{ execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory, old_walk::{keep_walking, walk, ToWalkEntry, WalkResult}, - remove_non_existing_file_paths, reverse_update_directories_sizes, - rules::IndexerRule, - IndexerError, OldIndexerJobSaveStep, OldIndexerJobUpdateStep, + remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError, + OldIndexerJobSaveStep, OldIndexerJobUpdateStep, }; /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. diff --git a/core/src/location/indexer/old_shallow.rs b/core/src/location/indexer/old_shallow.rs index 9882b7398..5d5b48b7f 100644 --- a/core/src/location/indexer/old_shallow.rs +++ b/core/src/location/indexer/old_shallow.rs @@ -11,10 +11,12 @@ use crate::{ to_remove_db_fetcher_fn, Node, }; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, IsolatedFilePathData, }; +use sd_core_indexer_rules::IndexerRule; + use sd_utils::db::maybe_missing; use std::{ @@ -29,8 +31,7 @@ use tracing::{debug, error}; use super::{ execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules, - old_walk::walk_single_dir, remove_non_existing_file_paths, rules::IndexerRule, IndexerError, - OldIndexerJobSaveStep, + old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep, }; /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. diff --git a/core/src/location/indexer/old_walk.rs b/core/src/location/indexer/old_walk.rs index 1b66ef96c..b7db99c7c 100644 --- a/core/src/location/indexer/old_walk.rs +++ b/core/src/location/indexer/old_walk.rs @@ -1,6 +1,7 @@ -use sd_file_path_helper::{ - file_path_pub_and_cas_ids, file_path_walker, FilePathMetadata, IsolatedFilePathData, -}; +use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData}; +use sd_core_indexer_rules::{IndexerRule, RuleKind}; +use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker}; + use sd_prisma::prisma::file_path; use sd_utils::{db::inode_from_db, error::FileIOError}; @@ -17,10 +18,7 @@ use tokio::fs; use tracing::trace; use uuid::Uuid; -use super::{ - rules::{IndexerRule, RuleKind}, - IndexerError, -}; +use super::IndexerError; const TO_WALK_QUEUE_INITIAL_CAPACITY: usize = 32; const WALKER_PATHS_BUFFER_INITIAL_CAPACITY: usize = 256; @@ -299,7 +297,7 @@ where indexed_paths.insert(WalkingEntry { iso_file_path: iso_file_path_factory(root, true)?, - maybe_metadata: Some(FilePathMetadata::from_path(&root, &metadata).await?), + maybe_metadata: Some(FilePathMetadata::from_path(root, &metadata)?), }); } @@ -605,7 +603,6 @@ where }; let Ok(metadata) = FilePathMetadata::from_path(¤t_path, &metadata) - .await .map_err(|e| errors.push(e.into())) else { continue; @@ -643,8 +640,7 @@ where continue; }; - let Ok(metadata) = FilePathMetadata::from_path(&ancestor, &metadata) - .await + let Ok(metadata) = FilePathMetadata::from_path(ancestor, &metadata) .map_err(|e| errors.push(e.into())) else { continue; @@ -696,10 +692,10 @@ where #[cfg(test)] #[allow(clippy::unwrap_used, clippy::panic)] mod tests { - use super::super::rules::RulePerKind; use super::*; use chrono::Utc; use globset::{Glob, GlobSetBuilder}; + use sd_core_indexer_rules::RulePerKind; use tempfile::{tempdir, TempDir}; // use tracing_test::traced_test; @@ -717,6 +713,21 @@ mod tests { } } + fn new_indexer_rule( + name: impl Into, + default: bool, + rules: Vec, + ) -> IndexerRule { + IndexerRule { + id: None, + name: name.into(), + default, + rules, + date_created: Utc::now(), + date_modified: Utc::now(), + } + } + async fn prepare_location() -> TempDir { let root = tempdir().unwrap(); let root_path = root.path(); @@ -872,7 +883,7 @@ mod tests { .into_iter() .collect::>(); - let only_photos_rule = &[IndexerRule::new( + let only_photos_rule = &[new_indexer_rule( "only photos".to_string(), false, vec![RulePerKind::AcceptFilesByGlob( @@ -950,7 +961,7 @@ mod tests { .into_iter() .collect::>(); - let git_repos = &[IndexerRule::new( + let git_repos = &[new_indexer_rule( "git repos".to_string(), false, vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( @@ -1019,14 +1030,14 @@ mod tests { .collect::>(); let git_repos_no_deps_no_build_dirs = &[ - IndexerRule::new( + new_indexer_rule( "git repos".to_string(), false, vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( [".git".to_string()].into_iter().collect(), )], ), - IndexerRule::new( + new_indexer_rule( "reject node_modules".to_string(), false, vec![RulePerKind::RejectFilesByGlob( @@ -1037,7 +1048,7 @@ mod tests { .unwrap(), )], ), - IndexerRule::new( + new_indexer_rule( "reject rust build dir".to_string(), false, vec![RulePerKind::RejectFilesByGlob( diff --git a/core/src/location/manager/mod.rs b/core/src/location/manager/mod.rs index 7867b9065..93e1e69ee 100644 --- a/core/src/location/manager/mod.rs +++ b/core/src/location/manager/mod.rs @@ -4,7 +4,8 @@ use crate::{ Node, }; -use sd_file_path_helper::FilePathError; +use sd_core_file_path_helper::FilePathError; + use sd_prisma::prisma::location; use sd_utils::{db::MissingFieldError, error::FileIOError}; @@ -18,11 +19,9 @@ use futures::executor::block_on; use thiserror::Error; use tokio::sync::{ broadcast::{self, Receiver}, - oneshot, RwLock, + mpsc, oneshot, RwLock, }; use tracing::{debug, error}; - -use tokio::sync::mpsc; use uuid::Uuid; mod watcher; diff --git a/core/src/location/manager/watcher/ios.rs b/core/src/location/manager/watcher/ios.rs index a29925b96..63f512e52 100644 --- a/core/src/location/manager/watcher/ios.rs +++ b/core/src/location/manager/watcher/ios.rs @@ -2,7 +2,10 @@ use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node}; -use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData}; +use sd_core_file_path_helper::{ + check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData, +}; + use sd_prisma::prisma::location; use sd_utils::error::FileIOError; diff --git a/core/src/location/manager/watcher/macos.rs b/core/src/location/manager/watcher/macos.rs index 18ae00500..99107e375 100644 --- a/core/src/location/manager/watcher/macos.rs +++ b/core/src/location/manager/watcher/macos.rs @@ -11,7 +11,10 @@ use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node}; -use sd_file_path_helper::{check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData}; +use sd_core_file_path_helper::{ + check_file_path_exists, get_inode, FilePathError, IsolatedFilePathData, +}; + use sd_prisma::prisma::location; use sd_utils::error::FileIOError; diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index e7d166079..0df4d10a0 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -18,13 +18,15 @@ use crate::{ Node, }; -use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind}; -use sd_file_path_helper::{ - check_file_path_exists, file_path_with_object, filter_existing_file_path_params, +use sd_core_file_path_helper::{ + check_file_path_exists, filter_existing_file_path_params, isolated_file_path_data::extract_normalized_materialized_path_str, loose_find_existing_file_path_params, path_is_hidden, FilePathError, FilePathMetadata, IsolatedFilePathData, MetadataExt, }; +use sd_core_prisma_helpers::file_path_with_object; + +use sd_file_ext::{extensions::ImageExtension, kind::ObjectKind}; use sd_prisma::{ prisma::{file_path, location, media_data, object}, prisma_sync, @@ -37,10 +39,10 @@ use sd_utils::{ }; #[cfg(target_family = "unix")] -use sd_file_path_helper::get_inode; +use sd_core_file_path_helper::get_inode; #[cfg(target_family = "windows")] -use sd_file_path_helper::get_inode_from_path; +use sd_core_file_path_helper::get_inode_from_path; use std::{ collections::{HashMap, HashSet}, @@ -120,7 +122,7 @@ pub(super) async fn create_dir( library, iso_file_path.to_parts(), None, - FilePathMetadata::from_path(&path, metadata).await?, + FilePathMetadata::from_path(path, metadata)?, ) .await?; @@ -177,7 +179,7 @@ async fn inner_create_file( let iso_file_path_parts = iso_file_path.to_parts(); let extension = iso_file_path_parts.extension.to_string(); - let metadata = FilePathMetadata::from_path(&path, metadata).await?; + let metadata = FilePathMetadata::from_path(path, metadata)?; // First we check if already exist a file with this same inode number // if it does, we just update it diff --git a/core/src/location/manager/watcher/windows.rs b/core/src/location/manager/watcher/windows.rs index 1f60b729f..f926f76b9 100644 --- a/core/src/location/manager/watcher/windows.rs +++ b/core/src/location/manager/watcher/windows.rs @@ -9,7 +9,8 @@ use crate::{invalidate_query, library::Library, location::manager::LocationManagerError, Node}; -use sd_file_path_helper::{get_inode_from_path, FilePathError}; +use sd_core_file_path_helper::{get_inode_from_path, FilePathError}; + use sd_prisma::prisma::location; use sd_utils::error::FileIOError; diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index 950650e43..9846f98f9 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -9,7 +9,11 @@ use crate::{ Node, }; -use sd_file_path_helper::{filter_existing_file_path_params, IsolatedFilePathData}; +use sd_core_file_path_helper::{ + filter_existing_file_path_params, IsolatedFilePathData, IsolatedFilePathDataParts, +}; +use sd_core_prisma_helpers::location_with_indexer_rules; + use sd_prisma::{ prisma::{file_path, indexer_rules_in_location, location, PrismaClient}, prisma_sync, @@ -21,8 +25,6 @@ use sd_utils::{ msgpack, uuid_to_bytes, }; -use sd_file_path_helper::IsolatedFilePathDataParts; - use std::{ collections::HashSet, path::{Component, Path, PathBuf}, @@ -53,11 +55,6 @@ use metadata::SpacedriveLocationMetadataFile; pub type LocationPubId = Uuid; -// Location includes! -location::include!(location_with_indexer_rules { - indexer_rules: select { indexer_rule } -}); - /// `LocationCreateArgs` is the argument received from the client using `rspc` to create a new location. /// It has the actual path and a vector of indexer rules ids, to create many-to-many relationships /// between the location and indexer rules. @@ -867,52 +864,6 @@ pub async fn delete_directory( Ok(()) } -impl From for location::Data { - fn from(data: location_with_indexer_rules::Data) -> Self { - Self { - id: data.id, - pub_id: data.pub_id, - path: data.path, - instance_id: data.instance_id, - name: data.name, - total_capacity: data.total_capacity, - available_capacity: data.available_capacity, - is_archived: data.is_archived, - size_in_bytes: data.size_in_bytes, - generate_preview_media: data.generate_preview_media, - sync_preview_media: data.sync_preview_media, - hidden: data.hidden, - date_created: data.date_created, - file_paths: None, - indexer_rules: None, - instance: None, - } - } -} - -impl From<&location_with_indexer_rules::Data> for location::Data { - fn from(data: &location_with_indexer_rules::Data) -> Self { - Self { - id: data.id, - pub_id: data.pub_id.clone(), - path: data.path.clone(), - instance_id: data.instance_id, - name: data.name.clone(), - total_capacity: data.total_capacity, - available_capacity: data.available_capacity, - size_in_bytes: data.size_in_bytes.clone(), - is_archived: data.is_archived, - generate_preview_media: data.generate_preview_media, - sync_preview_media: data.sync_preview_media, - hidden: data.hidden, - date_created: data.date_created, - file_paths: None, - indexer_rules: None, - instance: None, - } - } -} - async fn check_nested_location( location_path: impl AsRef, db: &PrismaClient, @@ -1049,8 +1000,8 @@ pub async fn create_file_path( .. }: IsolatedFilePathDataParts<'_>, cas_id: Option, - metadata: sd_file_path_helper::FilePathMetadata, -) -> Result { + metadata: sd_core_file_path_helper::FilePathMetadata, +) -> Result { use sd_utils::db::inode_to_db; use sd_prisma::prisma; @@ -1063,7 +1014,7 @@ pub async fn create_file_path( .select(location::select!({ id pub_id })) .exec() .await? - .ok_or(sd_file_path_helper::FilePathError::LocationNotFound( + .ok_or(sd_core_file_path_helper::FilePathError::LocationNotFound( location_id, ))?; diff --git a/core/src/location/non_indexed.rs b/core/src/location/non_indexed.rs index fd1ad32f9..79f0f8ce8 100644 --- a/core/src/location/non_indexed.rs +++ b/core/src/location/non_indexed.rs @@ -8,10 +8,13 @@ use crate::{ Node, }; -use futures::Stream; -use itertools::Either; +use sd_core_file_path_helper::{path_is_hidden, MetadataExt}; +use sd_core_indexer_rules::{ + seed::{no_hidden, no_os_protected}, + IndexerRule, RuleKind, +}; + use sd_file_ext::{extensions::Extension, kind::ObjectKind}; -use sd_file_path_helper::{path_is_hidden, MetadataExt}; use sd_prisma::prisma::location; use sd_utils::{chain_optional_iter, error::FileIOError}; @@ -23,6 +26,8 @@ use std::{ }; use chrono::{DateTime, Utc}; +use futures::Stream; +use itertools::Either; use rspc::ErrorCode; use serde::Serialize; use specta::Type; @@ -31,13 +36,7 @@ use tokio::{io, sync::mpsc, task::JoinError}; use tokio_stream::wrappers::ReceiverStream; use tracing::{error, span, warn, Level}; -use super::{ - indexer::rules::{ - seed::{no_hidden, no_os_protected}, - IndexerRule, RuleKind, - }, - normalize_path, -}; +use super::normalize_path; #[derive(Debug, Error)] pub enum NonIndexedLocationError { diff --git a/core/src/object/fs/error.rs b/core/src/object/fs/error.rs index d221d7169..2f610eab7 100644 --- a/core/src/object/fs/error.rs +++ b/core/src/object/fs/error.rs @@ -1,6 +1,7 @@ use crate::location::LocationError; -use sd_file_path_helper::FilePathError; +use sd_core_file_path_helper::FilePathError; + use sd_prisma::prisma::file_path; use sd_utils::{ db::MissingFieldError, diff --git a/core/src/object/fs/mod.rs b/core/src/object/fs/mod.rs index be2e32afe..ba2d68916 100644 --- a/core/src/object/fs/mod.rs +++ b/core/src/object/fs/mod.rs @@ -1,6 +1,8 @@ use crate::location::LocationError; -use sd_file_path_helper::{file_path_with_object, IsolatedFilePathData}; +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_prisma_helpers::file_path_with_object; + use sd_prisma::prisma::{file_path, location, PrismaClient}; use sd_utils::{ db::maybe_missing, diff --git a/core/src/object/fs/old_copy.rs b/core/src/object/fs/old_copy.rs index 602a4e5d9..cbe4c4f0a 100644 --- a/core/src/object/fs/old_copy.rs +++ b/core/src/object/fs/old_copy.rs @@ -7,7 +7,8 @@ use crate::{ }, }; -use sd_file_path_helper::{join_location_relative_path, IsolatedFilePathData}; +use sd_core_file_path_helper::{join_location_relative_path, IsolatedFilePathData}; + use sd_prisma::prisma::{file_path, location}; use sd_utils::{db::maybe_missing, error::FileIOError}; diff --git a/core/src/object/fs/old_cut.rs b/core/src/object/fs/old_cut.rs index 0335a0ff6..4135dc631 100644 --- a/core/src/object/fs/old_cut.rs +++ b/core/src/object/fs/old_cut.rs @@ -8,7 +8,8 @@ use crate::{ }, }; -use sd_file_path_helper::push_location_relative_path; +use sd_core_file_path_helper::push_location_relative_path; + use sd_prisma::prisma::{file_path, location}; use sd_utils::error::FileIOError; diff --git a/core/src/object/fs/old_erase.rs b/core/src/object/fs/old_erase.rs index ba20637fe..05c49d8e3 100644 --- a/core/src/object/fs/old_erase.rs +++ b/core/src/object/fs/old_erase.rs @@ -8,7 +8,8 @@ use crate::{ }, }; -use sd_file_path_helper::IsolatedFilePathData; +use sd_core_file_path_helper::IsolatedFilePathData; + use sd_prisma::prisma::{file_path, location}; use sd_utils::{db::maybe_missing, error::FileIOError}; diff --git a/core/src/object/media/media_data_extractor.rs b/core/src/object/media/media_data_extractor.rs index 9a0999f4c..7fd7b2b3c 100644 --- a/core/src/object/media/media_data_extractor.rs +++ b/core/src/object/media/media_data_extractor.rs @@ -1,7 +1,9 @@ use crate::old_job::JobRunErrors; +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_prisma_helpers::file_path_for_media_processor; + use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS}; -use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData}; use sd_media_metadata::ImageMetadata; use sd_prisma::prisma::{location, media_data, PrismaClient}; use sd_utils::error::FileIOError; diff --git a/core/src/object/media/old_media_processor/job.rs b/core/src/object/media/old_media_processor/job.rs index 746c7d09f..444dbbab0 100644 --- a/core/src/object/media/old_media_processor/job.rs +++ b/core/src/object/media/old_media_processor/job.rs @@ -11,11 +11,13 @@ use crate::{ #[cfg(feature = "ai")] use crate::old_job::JobRunErrors; -use sd_file_ext::extensions::Extension; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_media_processor, IsolatedFilePathData, + IsolatedFilePathData, }; +use sd_core_prisma_helpers::file_path_for_media_processor; + +use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; use sd_utils::db::maybe_missing; diff --git a/core/src/object/media/old_media_processor/mod.rs b/core/src/object/media/old_media_processor/mod.rs index 8a3485d43..6fefbb3a5 100644 --- a/core/src/object/media/old_media_processor/mod.rs +++ b/core/src/object/media/old_media_processor/mod.rs @@ -1,6 +1,8 @@ use crate::old_job::{JobRunErrors, JobRunMetadata}; -use sd_file_path_helper::{file_path_for_media_processor, FilePathError}; +use sd_core_file_path_helper::FilePathError; +use sd_core_prisma_helpers::file_path_for_media_processor; + use sd_prisma::prisma::{location, PrismaClient}; use std::path::Path; diff --git a/core/src/object/media/old_media_processor/shallow.rs b/core/src/object/media/old_media_processor/shallow.rs index c34ace220..59d89ca2f 100644 --- a/core/src/object/media/old_media_processor/shallow.rs +++ b/core/src/object/media/old_media_processor/shallow.rs @@ -6,11 +6,13 @@ use crate::{ Node, }; -use sd_file_ext::extensions::Extension; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_media_processor, IsolatedFilePathData, + IsolatedFilePathData, }; +use sd_core_prisma_helpers::file_path_for_media_processor; + +use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; use sd_utils::db::maybe_missing; diff --git a/core/src/object/mod.rs b/core/src/object/mod.rs index d29d8d91e..08c41f1a4 100644 --- a/core/src/object/mod.rs +++ b/core/src/object/mod.rs @@ -15,12 +15,6 @@ pub mod validation; // Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location // Objects are what can be added to Spaces -// Object selectables! -object::select!(object_for_file_identifier { - pub_id - file_paths: select { pub_id cas_id extension is_dir materialized_path name } -}); - // The response to provide the Explorer when looking at Objects #[derive(Debug, Serialize, Deserialize, Type)] pub struct ObjectsForExplorer { diff --git a/core/src/object/old_file_identifier/mod.rs b/core/src/object/old_file_identifier/mod.rs index e8a0c988f..a98495420 100644 --- a/core/src/object/old_file_identifier/mod.rs +++ b/core/src/object/old_file_identifier/mod.rs @@ -1,11 +1,9 @@ -use crate::{ - library::Library, - object::{cas::generate_cas_id, object_for_file_identifier}, - old_job::JobError, -}; +use crate::{library::Library, object::cas::generate_cas_id, old_job::JobError}; + +use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; +use sd_core_prisma_helpers::{file_path_for_file_identifier, object_for_file_identifier}; use sd_file_ext::{extensions::Extension, kind::ObjectKind}; -use sd_file_path_helper::{file_path_for_file_identifier, FilePathError, IsolatedFilePathData}; use sd_prisma::{ prisma::{file_path, location, object, PrismaClient}, prisma_sync, diff --git a/core/src/object/old_file_identifier/old_file_identifier_job.rs b/core/src/object/old_file_identifier/old_file_identifier_job.rs index 1abfa70f3..f862a49a4 100644 --- a/core/src/object/old_file_identifier/old_file_identifier_job.rs +++ b/core/src/object/old_file_identifier/old_file_identifier_job.rs @@ -6,10 +6,12 @@ use crate::{ }, }; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_file_identifier, IsolatedFilePathData, + IsolatedFilePathData, }; +use sd_core_prisma_helpers::file_path_for_file_identifier; + use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder}; use sd_utils::db::maybe_missing; diff --git a/core/src/object/old_file_identifier/shallow.rs b/core/src/object/old_file_identifier/shallow.rs index 5ddb40104..04355be15 100644 --- a/core/src/object/old_file_identifier/shallow.rs +++ b/core/src/object/old_file_identifier/shallow.rs @@ -1,9 +1,11 @@ use crate::{invalidate_query, library::Library, old_job::JobError}; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_file_identifier, IsolatedFilePathData, + IsolatedFilePathData, }; +use sd_core_prisma_helpers::file_path_for_file_identifier; + use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder}; use sd_utils::db::maybe_missing; diff --git a/core/src/object/validation/mod.rs b/core/src/object/validation/mod.rs index da82d102b..cda0b12c6 100644 --- a/core/src/object/validation/mod.rs +++ b/core/src/object/validation/mod.rs @@ -1,4 +1,4 @@ -use sd_file_path_helper::FilePathError; +use sd_core_file_path_helper::FilePathError; use sd_utils::error::FileIOError; use std::path::Path; diff --git a/core/src/object/validation/old_validator_job.rs b/core/src/object/validation/old_validator_job.rs index f8e2fbff2..4cbafcfab 100644 --- a/core/src/object/validation/old_validator_job.rs +++ b/core/src/object/validation/old_validator_job.rs @@ -5,10 +5,12 @@ use crate::{ }, }; -use sd_file_path_helper::{ +use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - file_path_for_object_validator, IsolatedFilePathData, + IsolatedFilePathData, }; +use sd_core_prisma_helpers::file_path_for_object_validator; + use sd_prisma::{ prisma::{file_path, location}, prisma_sync, diff --git a/core/src/old_job/report.rs b/core/src/old_job/report.rs index a6f100f87..1e620290f 100644 --- a/core/src/old_job/report.rs +++ b/core/src/old_job/report.rs @@ -1,5 +1,7 @@ use crate::library::Library; +use sd_core_prisma_helpers::job_without_data; + use sd_prisma::prisma::job; use sd_utils::db::{maybe_missing, MissingFieldError}; @@ -24,22 +26,6 @@ pub enum JobReportUpdate { Phase(String), } -job::select!(job_without_data { - id - name - action - status - parent_id - errors_text - metadata - date_created - date_started - date_completed - task_count - completed_task_count - date_estimated_completion -}); - #[derive(Debug, Serialize, Deserialize, Type, Clone)] pub struct JobReport { pub id: Uuid, diff --git a/crates/ai/Cargo.toml b/crates/ai/Cargo.toml index ca16579c8..f31fd9b8e 100644 --- a/crates/ai/Cargo.toml +++ b/crates/ai/Cargo.toml @@ -10,11 +10,15 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] -sd-prisma = { path = "../prisma" } +# Inner Core Sub-crates +sd-core-file-path-helper = { path = "../../core/crates/file-path-helper" } +sd-core-prisma-helpers = { path = "../../core/crates/prisma-helpers" } sd-core-sync = { path = "../../core/crates/sync" } + +# Spacedrive Sub-crates +sd-prisma = { path = "../prisma" } sd-sync = { path = "../sync" } sd-utils = { path = "../utils" } -sd-file-path-helper = { path = "../file-path-helper" } async-channel = { workspace = true } chrono = { workspace = true, features = ["serde"] } diff --git a/crates/ai/src/old_image_labeler/old_actor.rs b/crates/ai/src/old_image_labeler/old_actor.rs index 24b367f42..257f69986 100644 --- a/crates/ai/src/old_image_labeler/old_actor.rs +++ b/crates/ai/src/old_image_labeler/old_actor.rs @@ -1,4 +1,5 @@ -use sd_file_path_helper::file_path_for_media_processor; +use sd_core_prisma_helpers::file_path_for_media_processor; + use sd_prisma::prisma::{location, PrismaClient}; use sd_utils::error::FileIOError; diff --git a/crates/ai/src/old_image_labeler/process.rs b/crates/ai/src/old_image_labeler/process.rs index f5dfdcf17..8e674f3be 100644 --- a/crates/ai/src/old_image_labeler/process.rs +++ b/crates/ai/src/old_image_labeler/process.rs @@ -1,6 +1,6 @@ -#![allow(non_camel_case_types)] +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_prisma_helpers::file_path_for_media_processor; -use sd_file_path_helper::{file_path_for_media_processor, IsolatedFilePathData}; use sd_prisma::{ prisma::{file_path, label, label_on_object, object, PrismaClient}, prisma_sync, diff --git a/crates/cloud-api/Cargo.toml b/crates/cloud-api/Cargo.toml index 5a61dc58d..709bbe93c 100644 --- a/crates/cloud-api/Cargo.toml +++ b/crates/cloud-api/Cargo.toml @@ -6,14 +6,18 @@ edition.workspace = true repository.workspace = true [dependencies] +# Spacedrive Sub-crates sd-p2p = { path = "../p2p" } -reqwest = "0.11.22" -serde.workspace = true -serde_json.workspace = true -thiserror = "1.0.50" -uuid.workspace = true + +base64 = { workspace = true } +rmpv = { workspace = true } rspc = { workspace = true } -specta.workspace = true -base64.workspace = true -rmpv.workspace = true -tracing.workspace = true + +serde = { workspace = true } +serde_json = { workspace = true } +specta = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } + +reqwest = "0.11.22" diff --git a/crates/p2p-block/Cargo.toml b/crates/p2p-block/Cargo.toml index bc0b4ab32..7e23b35d3 100644 --- a/crates/p2p-block/Cargo.toml +++ b/crates/p2p-block/Cargo.toml @@ -7,9 +7,11 @@ edition.workspace = true repository.workspace = true [dependencies] +# Spacedrive Sub-crates sd-p2p = { path = "../p2p" } sd-p2p-proto = { path = "../p2p-proto" } -thiserror.workspace = true -tokio.workspace = true -tracing.workspace = true -uuid.workspace = true + +thiserror = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } diff --git a/crates/p2p-proto/Cargo.toml b/crates/p2p-proto/Cargo.toml index 84d79bc7f..4d7b43135 100644 --- a/crates/p2p-proto/Cargo.toml +++ b/crates/p2p-proto/Cargo.toml @@ -7,7 +7,8 @@ edition.workspace = true repository.workspace = true [dependencies] -ed25519-dalek = "2.1.1" -thiserror.workspace = true +thiserror = { workspace = true } tokio = { workspace = true, features = ["io-util"] } -uuid.workspace = true +uuid = { workspace = true } + +ed25519-dalek = "2.1.1" diff --git a/crates/p2p-tunnel/Cargo.toml b/crates/p2p-tunnel/Cargo.toml index 6b53dc8bc..de13cbab9 100644 --- a/crates/p2p-tunnel/Cargo.toml +++ b/crates/p2p-tunnel/Cargo.toml @@ -7,5 +7,7 @@ edition.workspace = true repository.workspace = true [dependencies] +# Spacedrive Sub-crates sd-p2p = { path = "../p2p" } + tokio = { workspace = true, features = ["io-util"] } diff --git a/crates/prisma-cli/Cargo.toml b/crates/prisma-cli/Cargo.toml index c8f4be971..4c531ff61 100644 --- a/crates/prisma-cli/Cargo.toml +++ b/crates/prisma-cli/Cargo.toml @@ -6,6 +6,7 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] +# Spacedrive Sub-crates sd-sync-generator = { path = "../sync-generator" } prisma-client-rust-cli = { workspace = true } diff --git a/crates/prisma/Cargo.toml b/crates/prisma/Cargo.toml index 79c0f51c3..d83158cd1 100644 --- a/crates/prisma/Cargo.toml +++ b/crates/prisma/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +# Spacedrive Sub-crates sd-cache = { path = "../cache" } sd-sync = { path = "../sync" } diff --git a/crates/sync/example/Cargo.toml b/crates/sync/example/Cargo.toml index 400d833fa..993a15369 100644 --- a/crates/sync/example/Cargo.toml +++ b/crates/sync/example/Cargo.toml @@ -8,14 +8,17 @@ repository = { workspace = true } edition = { workspace = true } [dependencies] -serde_json = "1.0.85" -serde = { version = "1.0.145", features = ["derive"] } +# Spacedrive Sub-crates +sd-sync = { path = ".." } + axum = { workspace = true } rspc = { workspace = true, features = ["axum"] } tokio = { workspace = true, features = ["full"] } prisma-client-rust = { workspace = true } +serde_json = { workspace = true } +serde = { workspace = true, features = ["derive"] } +uuid = { workspace = true, features = ["v4"] } + dotenv = "0.15.0" tower-http = { version = "0.3.4", features = ["cors"] } -sd-sync = { path = ".." } -uuid = { workspace = true, features = ["v4"] } http = "0.2.8" diff --git a/crates/task-system/Cargo.toml b/crates/task-system/Cargo.toml index 964076683..062983eac 100644 --- a/crates/task-system/Cargo.toml +++ b/crates/task-system/Cargo.toml @@ -34,9 +34,11 @@ pin-project = "1.1.4" tokio = { workspace = true, features = ["macros", "test-util", "fs"] } tempfile = { workspace = true } rand = "0.8.5" -tracing-test = { version = "^0.2.4", features = ["no-env-filter"] } +tracing-test = { workspace.dev-dependencies = true, features = [ + "no-env-filter", +] } thiserror = { workspace = true } -lending-stream = "1.0.0" +lending-stream = { workspace = true } serde = { workspace = true, features = ["derive"] } rmp-serde = { workspace = true } uuid = { workspace = true, features = ["serde"] } diff --git a/crates/task-system/src/lib.rs b/crates/task-system/src/lib.rs index b8dffe8e7..ef2ed8eb7 100644 --- a/crates/task-system/src/lib.rs +++ b/crates/task-system/src/lib.rs @@ -94,8 +94,11 @@ mod task; mod worker; pub use error::{RunError, SystemError as TaskSystemError}; -pub use system::{Dispatcher as TaskDispatcher, System as TaskSystem}; -pub use task::{ - AnyTaskOutput, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind, IntoAnyTaskOutput, - IntoTask, Task, TaskHandle, TaskId, TaskOutput, TaskStatus, +pub use system::{ + BaseDispatcher as BaseTaskDispatcher, Dispatcher as TaskDispatcher, System as TaskSystem, +}; +pub use task::{ + AnyTaskOutput, CancelTaskOnDrop, ExecStatus, Interrupter, InterrupterFuture, InterruptionKind, + IntoAnyTaskOutput, IntoTask, SerializableTask, Task, TaskHandle, TaskId, TaskOutput, + TaskRemoteController, TaskStatus, }; diff --git a/crates/task-system/src/message.rs b/crates/task-system/src/message.rs index d993b4cb7..f6f8265c7 100644 --- a/crates/task-system/src/message.rs +++ b/crates/task-system/src/message.rs @@ -23,7 +23,7 @@ pub enum SystemMessage { CancelNotRunningTask { task_id: TaskId, worker_id: WorkerId, - ack: oneshot::Sender>, + ack: oneshot::Sender<()>, }, ForceAbortion { task_id: TaskId, @@ -51,7 +51,7 @@ pub enum WorkerMessage { }, CancelNotRunningTask { task_id: TaskId, - ack: oneshot::Sender>, + ack: oneshot::Sender<()>, }, ForceAbortion { task_id: TaskId, diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index 3d51bfa09..41a2c802d 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -1,6 +1,8 @@ use std::{ cell::RefCell, collections::HashSet, + fmt, + future::Future, num::NonZeroUsize, pin::pin, sync::{ @@ -30,7 +32,7 @@ use super::{ pub struct System { workers: Arc>>, msgs_tx: chan::Sender, - dispatcher: Dispatcher, + dispatcher: BaseDispatcher, handle: RefCell>>, } @@ -94,7 +96,7 @@ impl System { Self { workers: Arc::clone(&workers), msgs_tx, - dispatcher: Dispatcher { + dispatcher: BaseDispatcher { workers, idle_workers, last_worker_id: Arc::new(AtomicWorkerId::new(0)), @@ -115,12 +117,18 @@ impl System { } /// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible. - pub async fn dispatch_many(&self, into_tasks: Vec>) -> Vec> { + pub async fn dispatch_many> + Send>( + &self, + into_tasks: I, + ) -> Vec> + where + ::IntoIter: Send, + { self.dispatcher.dispatch_many(into_tasks).await } /// Returns a dispatcher that can be used to remotely dispatch tasks to the system. - pub fn get_dispatcher(&self) -> Dispatcher { + pub fn get_dispatcher(&self) -> BaseDispatcher { self.dispatcher.clone() } @@ -314,11 +322,7 @@ impl SystemComm { .expect("System channel closed trying receive pause not running task response") } - pub async fn cancel_not_running_task( - &self, - task_id: TaskId, - worker_id: WorkerId, - ) -> Result<(), SystemError> { + pub async fn cancel_not_running_task(&self, task_id: TaskId, worker_id: WorkerId) { let (tx, rx) = oneshot::channel(); self.0 @@ -331,7 +335,7 @@ impl SystemComm { .expect("System channel closed trying to cancel a not running task"); rx.await - .expect("System channel closed trying receive cancel a not running task response") + .expect("System channel closed trying receive cancel a not running task response"); } pub async fn request_help(&self, worker_id: WorkerId, task_count: usize) { @@ -390,13 +394,45 @@ impl SystemComm { /// It can be used to dispatch tasks to the system from other threads or tasks. /// It uses [`Arc`] internally so it can be cheaply cloned and put inside tasks so tasks can dispatch other tasks. #[derive(Debug)] -pub struct Dispatcher { +pub struct BaseDispatcher { workers: Arc>>, idle_workers: Arc>, last_worker_id: Arc, } -impl Clone for Dispatcher { +pub trait Dispatcher: fmt::Debug + Clone + Send + Sync + 'static { + /// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible. + fn dispatch(&self, into_task: impl IntoTask) -> impl Future> + Send { + self.dispatch_boxed(into_task.into_task()) + } + + /// Dispatches an already boxed task to the system, the task will be assigned to a worker and executed as + /// soon as possible. + fn dispatch_boxed( + &self, + boxed_task: Box>, + ) -> impl Future> + Send; + + /// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible. + fn dispatch_many> + Send>( + &self, + into_tasks: I, + ) -> impl Future>> + Send + where + ::IntoIter: Send, + { + self.dispatch_many_boxed(into_tasks.into_iter().map(IntoTask::into_task)) + } + + /// Dispatches many already boxed tasks to the system, the tasks will be assigned to workers and executed as + /// soon as possible. + fn dispatch_many_boxed( + &self, + boxed_tasks: impl IntoIterator>> + Send, + ) -> impl Future>> + Send; +} + +impl Clone for BaseDispatcher { fn clone(&self) -> Self { Self { workers: Arc::clone(&self.workers), @@ -406,33 +442,35 @@ impl Clone for Dispatcher { } } -impl Dispatcher { - /// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible. - pub async fn dispatch(&self, into_task: impl IntoTask) -> TaskHandle { - async fn inner(this: &Dispatcher, task: Box>) -> TaskHandle { - let worker_id = this +impl Dispatcher for BaseDispatcher { + async fn dispatch(&self, into_task: impl IntoTask) -> TaskHandle { + self.dispatch_boxed(into_task.into_task()).await + } + + #[allow(clippy::missing_panics_doc)] + async fn dispatch_boxed(&self, task: Box>) -> TaskHandle { + let worker_id = self .last_worker_id .fetch_update(Ordering::Release, Ordering::Acquire, |last_worker_id| { - Some((last_worker_id + 1) % this.workers.len()) + Some((last_worker_id + 1) % self.workers.len()) }) .expect("we hardcoded the update function to always return Some(next_worker_id) through dispatcher"); - trace!( - "Dispatching task to worker: ", - task.id() - ); - let handle = this.workers[worker_id].add_task(task).await; + trace!( + "Dispatching task to worker: ", + task.id() + ); + let handle = self.workers[worker_id].add_task(task).await; - this.idle_workers[worker_id].store(false, Ordering::Relaxed); + self.idle_workers[worker_id].store(false, Ordering::Relaxed); - handle - } - - inner(self, into_task.into_task()).await + handle } - /// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible. - pub async fn dispatch_many(&self, into_tasks: Vec>) -> Vec> { + async fn dispatch_many_boxed( + &self, + into_tasks: impl IntoIterator>> + Send, + ) -> Vec> { let mut workers_task_count = self .workers .iter() @@ -445,7 +483,6 @@ impl Dispatcher { let (handles, workers_ids_set) = into_tasks .into_iter() - .map(IntoTask::into_task) .zip(workers_task_count.into_iter().cycle()) .map(|(task, (worker_id, _))| async move { (self.workers[worker_id].add_task(task).await, worker_id) @@ -462,7 +499,9 @@ impl Dispatcher { handles } +} +impl BaseDispatcher { /// Returns the number of workers in the system. #[must_use] pub fn workers_count(&self) -> usize { diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 68279af34..bf7c18b49 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -13,6 +13,7 @@ use async_channel as chan; use async_trait::async_trait; use chan::{Recv, RecvError}; use downcast_rs::{impl_downcast, Downcast}; +use futures::executor::block_on; use tokio::sync::oneshot; use tracing::{trace, warn}; use uuid::Uuid; @@ -61,7 +62,7 @@ pub enum TaskOutput { #[derive(Debug)] pub enum TaskStatus { /// The task has finished successfully and maybe has some output for the user. - Done(TaskOutput), + Done((TaskId, TaskOutput)), /// Task was gracefully cancelled by the user. Canceled, /// Task was forcefully aborted by the user. @@ -123,7 +124,7 @@ impl + 'static, E: RunError> IntoTask for T { /// We're currently using the [`async_trait`](https://docs.rs/async-trait) crate to allow dyn async traits, /// due to a limitation in the Rust language. #[async_trait] -pub trait Task: fmt::Debug + Downcast + Send + 'static { +pub trait Task: fmt::Debug + Downcast + Send + Sync + 'static { /// This method represent the work that should be done by the worker, it will be called by the /// worker when there is a slot available in its internal queue. /// We receive a `&mut self` so any internal data can be mutated on each `run` invocation. @@ -147,6 +148,21 @@ pub trait Task: fmt::Debug + Downcast + Send + 'static { impl_downcast!(Task where E: RunError); +pub trait SerializableTask: Task +where + Self: Sized, +{ + type SerializeError: std::error::Error + 'static; + type DeserializeError: std::error::Error + 'static; + type DeserializeCtx: 'static; + + fn serialize(self) -> impl Future, Self::SerializeError>> + Send; + fn deserialize( + data: &[u8], + ctx: Self::DeserializeCtx, + ) -> impl Future> + Send; +} + /// Intermediate struct to wait until a pause or a cancel commands are sent by the user. #[must_use = "`InterrupterFuture` does nothing unless polled"] #[pin_project::pin_project] @@ -164,7 +180,7 @@ impl Future for InterrupterFuture<'_> { match this.fut.poll(cx) { Poll::Ready(Ok(InterruptionRequest { kind, ack })) => { - if ack.send(Ok(())).is_err() { + if ack.send(()).is_err() { warn!("TaskInterrupter ack channel closed"); } this.has_interrupted.store(kind as u8, Ordering::Relaxed); @@ -218,7 +234,7 @@ impl Interrupter { InterruptionKind::load(&self.has_interrupted).map_or_else( || { if let Ok(InterruptionRequest { kind, ack }) = self.interrupt_rx.try_recv() { - if ack.send(Ok(())).is_err() { + if ack.send(()).is_err() { warn!("TaskInterrupter ack channel closed"); } @@ -245,6 +261,39 @@ impl Interrupter { } } +#[macro_export] +macro_rules! check_interruption { + ($interrupter:ident) => { + let interrupter: &Interrupter = $interrupter; + + match interrupter.try_check_interrupt() { + Some($crate::InterruptionKind::Cancel) => return Ok($crate::ExecStatus::Canceled), + Some($crate::InterruptionKind::Pause) => return Ok($crate::ExecStatus::Paused), + None => { /* Everything is Awesome! */ } + } + }; + + ($interrupter:ident, $instant:ident, $duration_accumulator:ident) => { + let interrupter: &Interrupter = $interrupter; + let instant: Instant = $instant; + let duration_accumulator: &mut Duration = $duration_accumulator; + + match interrupter.try_check_interrupt() { + Some($crate::InterruptionKind::Cancel) => { + *duration_accumulator += instant.elapsed(); + + return Ok($crate::ExecStatus::Canceled); + } + Some($crate::InterruptionKind::Pause) => { + *duration_accumulator += instant.elapsed(); + + return Ok($crate::ExecStatus::Paused); + } + None => { /* Everything is Awesome! */ } + } + }; +} + /// The kind of interruption that can be requested by the user, a pause or a cancel #[derive(Debug, Clone, Copy)] #[repr(u8)] @@ -266,30 +315,18 @@ impl InterruptionKind { #[derive(Debug)] pub struct InterruptionRequest { kind: InterruptionKind, - ack: oneshot::Sender>, + ack: oneshot::Sender<()>, } -/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait -/// until the task gets completed. -#[derive(Debug)] -pub struct TaskHandle { +/// A remote controller of a task that can be used to pause, cancel, resume, or force abortion. +#[derive(Debug, Clone)] +pub struct TaskRemoteController { pub(crate) worktable: Arc, - pub(crate) done_rx: oneshot::Receiver, SystemError>>, pub(crate) system_comm: SystemComm, pub(crate) task_id: TaskId, } -impl Future for TaskHandle { - type Output = Result, SystemError>; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - Pin::new(&mut self.done_rx) - .poll(cx) - .map(|res| res.expect("TaskHandle done channel unexpectedly closed")) - } -} - -impl TaskHandle { +impl TaskRemoteController { /// Get the unique identifier of the task #[must_use] pub const fn task_id(&self) -> TaskId { @@ -316,7 +353,7 @@ impl TaskHandle { self.worktable.pause(tx).await; - rx.await.expect("Worker failed to ack pause request")?; + rx.await.expect("Worker failed to ack pause request"); } else { trace!("Task is not running, setting is_paused flag"); self.worktable.is_paused.store(true, Ordering::Relaxed); @@ -338,7 +375,7 @@ impl TaskHandle { /// # Panics /// /// Will panic if the worker failed to ack the cancel request - pub async fn cancel(&self) -> Result<(), SystemError> { + pub async fn cancel(&self) { let is_canceled = self.worktable.is_canceled.load(Ordering::Relaxed); let is_done = self.worktable.is_done.load(Ordering::Relaxed); @@ -352,12 +389,11 @@ impl TaskHandle { self.worktable.cancel(tx).await; - rx.await.expect("Worker failed to ack cancel request")?; + rx.await.expect("Worker failed to ack cancel request"); } else { trace!("Task is not running, setting is_canceled flag"); self.worktable.is_canceled.store(true, Ordering::Relaxed); - return self - .system_comm + self.system_comm .cancel_not_running_task( self.task_id, self.worktable.current_worker_id.load(Ordering::Relaxed), @@ -365,8 +401,6 @@ impl TaskHandle { .await; } } - - Ok(()) } /// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution. @@ -390,6 +424,92 @@ impl TaskHandle { ) .await } + + /// Verify if the task was already completed + #[must_use] + pub fn is_done(&self) -> bool { + self.worktable.is_done.load(Ordering::Relaxed) + } +} + +/// A handle returned when a task is dispatched to the task system, it can be used to pause, cancel, resume, or wait +/// until the task gets completed. +#[derive(Debug)] +pub struct TaskHandle { + pub(crate) done_rx: oneshot::Receiver, SystemError>>, + pub(crate) controller: TaskRemoteController, +} + +impl Future for TaskHandle { + type Output = Result, SystemError>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.done_rx) + .poll(cx) + .map(|res| res.expect("TaskHandle done channel unexpectedly closed")) + } +} + +impl TaskHandle { + /// Get the unique identifier of the task + #[must_use] + pub const fn task_id(&self) -> TaskId { + self.controller.task_id + } + + /// Gracefully pause the task at a safe point defined by the user using the [`Interrupter`] + /// + /// # Panics + /// + /// Will panic if the worker failed to ack the pause request + pub async fn pause(&self) -> Result<(), SystemError> { + self.controller.pause().await + } + + /// Gracefully cancel the task at a safe point defined by the user using the [`Interrupter`] + /// + /// # Panics + /// + /// Will panic if the worker failed to ack the cancel request + pub async fn cancel(&self) { + self.controller.cancel().await; + } + + /// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution. + pub async fn force_abortion(&self) -> Result<(), SystemError> { + self.controller.force_abortion().await + } + + /// Marks the task to be resumed by the task system, the worker will start processing it if there is a slot + /// available or will be enqueued otherwise. + pub async fn resume(&self) -> Result<(), SystemError> { + self.controller.resume().await + } + + /// Gets the [`TaskRemoteController`] object that can be used to control the task remotely, to + /// pause, cancel, resume, or force abortion. + #[must_use] + pub fn remote_controller(&self) -> TaskRemoteController { + self.controller.clone() + } +} + +/// A helper struct when you just want to cancel a task if its `TaskHandle` gets dropped. +pub struct CancelTaskOnDrop(pub TaskHandle); + +impl Future for CancelTaskOnDrop { + type Output = Result, SystemError>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.0).poll(cx) + } +} + +impl Drop for CancelTaskOnDrop { + fn drop(&mut self) { + // FIXME: We should use async drop when it becomes stable + block_on(self.0.cancel()); + } } #[derive(Debug)] @@ -436,7 +556,7 @@ impl TaskWorktable { self.is_aborted.store(true, Ordering::Relaxed); } - pub async fn pause(&self, tx: oneshot::Sender>) { + pub async fn pause(&self, tx: oneshot::Sender<()>) { self.is_paused.store(true, Ordering::Relaxed); self.is_running.store(false, Ordering::Relaxed); @@ -451,7 +571,7 @@ impl TaskWorktable { .expect("Worker channel closed trying to pause task"); } - pub async fn cancel(&self, tx: oneshot::Sender>) { + pub async fn cancel(&self, tx: oneshot::Sender<()>) { self.is_canceled.store(true, Ordering::Relaxed); self.is_running.store(false, Ordering::Relaxed); diff --git a/crates/task-system/src/worker/mod.rs b/crates/task-system/src/worker/mod.rs index 6fc1072e3..cdeae4ddc 100644 --- a/crates/task-system/src/worker/mod.rs +++ b/crates/task-system/src/worker/mod.rs @@ -8,6 +8,8 @@ use async_channel as chan; use tokio::{spawn, sync::oneshot, task::JoinHandle}; use tracing::{error, info, trace, warn}; +use crate::task::TaskRemoteController; + use super::{ error::{RunError, SystemError}, message::WorkerMessage, @@ -127,10 +129,12 @@ impl Worker { .expect("Worker channel closed trying to add task"); TaskHandle { - worktable, done_rx, - system_comm: self.system_comm.clone(), - task_id, + controller: TaskRemoteController { + worktable, + system_comm: self.system_comm.clone(), + task_id, + }, } } @@ -168,11 +172,7 @@ impl Worker { .expect("Worker channel closed trying to pause a not running task"); } - pub async fn cancel_not_running_task( - &self, - task_id: TaskId, - ack: oneshot::Sender>, - ) { + pub async fn cancel_not_running_task(&self, task_id: TaskId, ack: oneshot::Sender<()>) { self.msgs_tx .send(WorkerMessage::CancelNotRunningTask { task_id, ack }) .await diff --git a/crates/task-system/src/worker/run.rs b/crates/task-system/src/worker/run.rs index d176b3310..70de8c65c 100644 --- a/crates/task-system/src/worker/run.rs +++ b/crates/task-system/src/worker/run.rs @@ -65,7 +65,7 @@ pub(super) async fn run( StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => { runner.cancel_not_running_task(task_id); - if ack.send(Ok(())).is_err() { + if ack.send(()).is_err() { warn!("Resume task channel closed before sending ack"); } } diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index 099ffd85e..d3cc3d91e 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -965,6 +965,7 @@ impl Runner { ) { match status { InternalTaskExecStatus::Done(out) => { + self.task_kinds.remove(&task_id); send_complete_task_response(self.worker_id, task_id, task_work_state, out); } @@ -977,10 +978,12 @@ impl Runner { } InternalTaskExecStatus::Canceled => { + self.task_kinds.remove(&task_id); send_cancel_task_response(self.worker_id, task_id, task_work_state); } InternalTaskExecStatus::Error(e) => { + self.task_kinds.remove(&task_id); send_error_task_response(self.worker_id, task_id, task_work_state, e); } @@ -1057,7 +1060,7 @@ impl Runner { } if self.task_kinds.capacity() > TASK_QUEUE_INITIAL_SIZE { - assert_eq!(self.task_kinds.len(), 0); + assert_eq!(self.task_kinds.len(), self.paused_tasks.len()); self.task_kinds.shrink_to(TASK_QUEUE_INITIAL_SIZE); } @@ -1190,15 +1193,10 @@ fn handle_task_suspension( worktable.pause(tx).await; match rx.await { - Ok(Ok(())) => { + Ok(()) => { trace!("Suspending: "); has_suspended.store(true, Ordering::Relaxed); } - Ok(Err(e)) => { - error!( - "Task failed to suspend: {e:#?}", - ); - } Err(_) => { // The task probably finished before we could suspend it so the channel was dropped trace!( @@ -1408,7 +1406,7 @@ fn send_complete_task_response( out: TaskOutput, ) { worktable.set_completed(); - if done_tx.send(Ok(TaskStatus::Done(out))).is_err() { + if done_tx.send(Ok(TaskStatus::Done((task_id, out)))).is_err() { warn!( "Task done channel closed before sending done response for task: \ " diff --git a/crates/task-system/tests/common/actors.rs b/crates/task-system/tests/common/actors.rs index f166b70c1..37bcbcefc 100644 --- a/crates/task-system/tests/common/actors.rs +++ b/crates/task-system/tests/common/actors.rs @@ -1,5 +1,6 @@ use sd_task_system::{ - ExecStatus, Interrupter, Task, TaskDispatcher, TaskHandle, TaskId, TaskOutput, TaskStatus, + BaseTaskDispatcher, ExecStatus, Interrupter, Task, TaskDispatcher, TaskHandle, TaskId, + TaskOutput, TaskStatus, }; use std::{ @@ -24,7 +25,7 @@ const SAMPLE_ACTOR_SAVE_STATE_FILE_NAME: &str = "sample_actor_save_state.bin"; pub struct SampleActor { data: Arc, // Can hold any kind of actor data, like an AI model - task_dispatcher: TaskDispatcher, + task_dispatcher: BaseTaskDispatcher, task_handles_tx: chan::Sender>, } @@ -32,7 +33,7 @@ impl SampleActor { pub async fn new( data_directory: impl AsRef, data: String, - task_dispatcher: TaskDispatcher, + task_dispatcher: BaseTaskDispatcher, ) -> (Self, broadcast::Receiver<()>) { let (task_handles_tx, task_handles_rx) = chan::bounded(8); @@ -162,7 +163,7 @@ impl SampleActor { async { if let Some(out) = handles.next().await { match out { - Ok(TaskStatus::Done(maybe_out)) => { + Ok(TaskStatus::Done((_task_id, maybe_out))) => { if let TaskOutput::Out(out) = maybe_out { info!( "Task completed: {:?}", @@ -226,7 +227,7 @@ impl SampleActor { ) .chain(handles.filter_map(|handle| async move { match handle { - Ok(TaskStatus::Done(maybe_out)) => { + Ok(TaskStatus::Done((_task_id, maybe_out))) => { if let TaskOutput::Out(out) = maybe_out { info!( "Task completed: {:?}", diff --git a/crates/task-system/tests/common/jobs.rs b/crates/task-system/tests/common/jobs.rs index 9792fa943..7055c3473 100644 --- a/crates/task-system/tests/common/jobs.rs +++ b/crates/task-system/tests/common/jobs.rs @@ -2,8 +2,8 @@ use async_trait::async_trait; use futures_concurrency::future::FutureGroup; use lending_stream::{LendingStream, StreamExt}; use sd_task_system::{ - ExecStatus, Interrupter, IntoAnyTaskOutput, Task, TaskDispatcher, TaskHandle, TaskId, - TaskOutput, TaskStatus, + BaseTaskDispatcher, ExecStatus, Interrupter, IntoAnyTaskOutput, Task, TaskDispatcher, + TaskHandle, TaskId, TaskOutput, TaskStatus, }; use tracing::trace; @@ -12,11 +12,11 @@ use super::tasks::SampleError; #[derive(Debug)] pub struct SampleJob { total_steps: u32, - task_dispatcher: TaskDispatcher, + task_dispatcher: BaseTaskDispatcher, } impl SampleJob { - pub fn new(total_steps: u32, task_dispatcher: TaskDispatcher) -> Self { + pub fn new(total_steps: u32, task_dispatcher: BaseTaskDispatcher) -> Self { Self { total_steps, task_dispatcher, @@ -47,7 +47,7 @@ impl SampleJob { while let Some((group, res)) = group.next().await { match res.unwrap() { - TaskStatus::Done(TaskOutput::Out(out)) => { + TaskStatus::Done((_task_id, TaskOutput::Out(out))) => { group.insert( out.downcast::() .expect("we know the output type") @@ -55,7 +55,7 @@ impl SampleJob { ); trace!("Received more tasks to wait for ({} left)", group.len()); } - TaskStatus::Done(TaskOutput::Empty) => { + TaskStatus::Done((_task_id, TaskOutput::Empty)) => { trace!( "Step done, waiting for all children to finish ({} left)", group.len() @@ -83,7 +83,7 @@ impl SampleJob { struct SampleJobTask { id: TaskId, expected_children: u32, - task_dispatcher: TaskDispatcher, + task_dispatcher: BaseTaskDispatcher, } #[derive(Debug)] diff --git a/crates/task-system/tests/integration_test.rs b/crates/task-system/tests/integration_test.rs index bf3ce697b..db563754d 100644 --- a/crates/task-system/tests/integration_test.rs +++ b/crates/task-system/tests/integration_test.rs @@ -72,7 +72,7 @@ async fn cancel_test() { let handle = system.dispatch(NeverTask::default()).await; info!("issuing cancel"); - handle.cancel().await.unwrap(); + handle.cancel().await; assert!(matches!(handle.await, Ok(TaskStatus::Canceled))); @@ -88,7 +88,7 @@ async fn done_test() { assert!(matches!( handle.await, - Ok(TaskStatus::Done(TaskOutput::Empty)) + Ok(TaskStatus::Done((_task_id, TaskOutput::Empty))) )); system.shutdown().await; @@ -150,7 +150,7 @@ async fn pause_test() { assert!(matches!( handle.await, - Ok(TaskStatus::Done(TaskOutput::Empty)) + Ok(TaskStatus::Done((_task_id, TaskOutput::Empty))) )); system.shutdown().await; @@ -185,7 +185,7 @@ async fn steal_test() { let mut pause_handles = VecDeque::from(system.dispatch_many(pause_tasks).await); let ready_handles = system - .dispatch_many((0..100).map(|_| ReadyTask::default()).collect()) + .dispatch_many((0..100).map(|_| ReadyTask::default())) .await; pause_begans diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 53f18aa1a..5114f756f 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +# Spacedrive Sub-crates sd-prisma = { path = "../prisma" } prisma-client-rust = { workspace = true } diff --git a/crates/utils/src/db.rs b/crates/utils/src/db.rs index 221d58c9a..e02df080d 100644 --- a/crates/utils/src/db.rs +++ b/crates/utils/src/db.rs @@ -63,6 +63,23 @@ pub fn inode_to_db(inode: u64) -> Vec { inode.to_le_bytes().to_vec() } +pub fn size_in_bytes_from_db(db_size_in_bytes: &[u8]) -> u64 { + u64::from_be_bytes([ + db_size_in_bytes[0], + db_size_in_bytes[1], + db_size_in_bytes[2], + db_size_in_bytes[3], + db_size_in_bytes[4], + db_size_in_bytes[5], + db_size_in_bytes[6], + db_size_in_bytes[7], + ]) +} + +pub fn size_in_bytes_to_db(size: u64) -> Vec { + size.to_be_bytes().to_vec() +} + #[derive(Error, Debug)] #[error("Missing field {0}")] pub struct MissingFieldError(&'static str); diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 3815110fe..e46b68061 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -12,7 +12,7 @@ export type Procedures = { { key: "cloud.locations.list", input: never, result: CloudLocation[] } | { key: "ephemeralFiles.getMediaData", input: string, result: ({ type: "Image" } & ImageMetadata) | ({ type: "Video" } & VideoMetadata) | ({ type: "Audio" } & AudioMetadata) | null } | { key: "files.get", input: LibraryArgs, result: { item: Reference; nodes: CacheNode[] } | null } | - { key: "files.getConvertableImageExtensions", input: never, result: string[] } | + { key: "files.getConvertibleImageExtensions", input: never, result: string[] } | { key: "files.getMediaData", input: LibraryArgs, result: MediaMetadata } | { key: "files.getPath", input: LibraryArgs, result: string | null } | { key: "invalidation.test-invalidate", input: never, result: number } |