From edf60394bc3ab6d75bb22dc8c7c830b9b77bf466 Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Mon, 13 Oct 2025 20:34:56 -0700 Subject: [PATCH] Add cloud volume management operations - Introduced VolumeCmd for handling cloud volume operations in the CLI. - Implemented VolumeAddCloudArgs and VolumeRemoveCloudArgs for adding and removing cloud storage volumes. - Created VolumeAddCloudAction and VolumeRemoveCloudAction for managing cloud volume actions. - Updated directory listing and file query operations to support cloud paths. - Enhanced volume backend integration to accommodate cloud storage services. --- apps/cli/src/domains/mod.rs | 1 + apps/cli/src/domains/volume/args.rs | 127 +++++++++ apps/cli/src/domains/volume/mod.rs | 65 +++++ apps/cli/src/main.rs | 5 + core/src/crypto/cloud_credentials.rs | 7 +- core/src/domain/content_identity.rs | 259 ++++++++---------- core/src/domain/entry.rs | 11 +- core/src/domain/file.rs | 1 - core/src/domain/mod.rs | 4 +- core/src/filetype/builtin.rs | 8 + core/src/filetype/definitions/calendar.toml | 48 ++++ core/src/filetype/definitions/contacts.toml | 47 ++++ core/src/filetype/definitions/email.toml | 97 +++++++ core/src/filetype/definitions/misc.toml | 96 +------ core/src/filetype/definitions/packages.toml | 185 +++++++++++++ .../filetype/definitions/presentations.toml | 111 ++++++++ core/src/filetype/definitions/shortcuts.toml | 87 ++++++ .../filetype/definitions/spreadsheets.toml | 131 +++++++++ core/src/filetype/definitions/web.toml | 110 ++++++++ core/src/filetype/registry.rs | 8 + .../src/infra/db/entities/content_identity.rs | 3 +- .../m20240101_000001_initial_schema.rs | 2 - core/src/ops/files/query/directory_listing.rs | 124 +++++---- core/src/ops/files/query/file_by_path.rs | 34 +-- core/src/ops/indexing/entry.rs | 1 - core/src/ops/indexing/phases/processing.rs | 2 - core/src/ops/media/live_photo.rs | 2 + core/src/ops/volumes/add_cloud/action.rs | 158 +++++++++++ core/src/ops/volumes/add_cloud/mod.rs | 7 + core/src/ops/volumes/add_cloud/output.rs | 22 ++ core/src/ops/volumes/mod.rs | 5 + core/src/ops/volumes/remove_cloud/action.rs | 70 +++++ core/src/ops/volumes/remove_cloud/mod.rs | 7 + core/src/ops/volumes/remove_cloud/output.rs | 16 ++ core/src/volume/backend/mod.rs | 2 +- 35 files changed, 1532 insertions(+), 331 deletions(-) create mode 100644 apps/cli/src/domains/volume/args.rs create mode 100644 apps/cli/src/domains/volume/mod.rs create mode 100644 core/src/filetype/definitions/calendar.toml create mode 100644 core/src/filetype/definitions/contacts.toml create mode 100644 core/src/filetype/definitions/email.toml create mode 100644 core/src/filetype/definitions/packages.toml create mode 100644 core/src/filetype/definitions/presentations.toml create mode 100644 core/src/filetype/definitions/shortcuts.toml create mode 100644 core/src/filetype/definitions/spreadsheets.toml create mode 100644 core/src/filetype/definitions/web.toml create mode 100644 core/src/ops/volumes/add_cloud/action.rs create mode 100644 core/src/ops/volumes/add_cloud/mod.rs create mode 100644 core/src/ops/volumes/add_cloud/output.rs create mode 100644 core/src/ops/volumes/remove_cloud/action.rs create mode 100644 core/src/ops/volumes/remove_cloud/mod.rs create mode 100644 core/src/ops/volumes/remove_cloud/output.rs diff --git a/apps/cli/src/domains/mod.rs b/apps/cli/src/domains/mod.rs index 291c788b0..0a653ec5b 100644 --- a/apps/cli/src/domains/mod.rs +++ b/apps/cli/src/domains/mod.rs @@ -8,3 +8,4 @@ pub mod logs; pub mod network; pub mod search; pub mod tag; +pub mod volume; diff --git a/apps/cli/src/domains/volume/args.rs b/apps/cli/src/domains/volume/args.rs new file mode 100644 index 000000000..f572cda6e --- /dev/null +++ b/apps/cli/src/domains/volume/args.rs @@ -0,0 +1,127 @@ +use clap::Args; +use sd_core::{ + ops::volumes::{ + add_cloud::{CloudStorageConfig, VolumeAddCloudInput}, + remove_cloud::VolumeRemoveCloudInput, + }, + volume::{backend::CloudServiceType, VolumeFingerprint}, +}; + +#[derive(Args, Debug)] +pub struct VolumeAddCloudArgs { + /// Display name for the cloud volume + pub name: String, + + /// Cloud service type + #[arg(long, value_enum)] + pub service: CloudServiceArg, + + /// S3 bucket name (for S3 service) + #[arg(long, required_if_eq("service", "s3"))] + pub bucket: Option, + + /// S3 region (for S3 service) + #[arg(long, required_if_eq("service", "s3"))] + pub region: Option, + + /// S3 access key ID (for S3 service) + #[arg(long, required_if_eq("service", "s3"))] + pub access_key_id: Option, + + /// S3 secret access key (for S3 service) + #[arg(long, required_if_eq("service", "s3"))] + pub secret_access_key: Option, + + /// Custom S3 endpoint (optional, for S3-compatible services like MinIO, R2, etc.) + #[arg(long)] + pub endpoint: Option, +} + +#[derive(clap::ValueEnum, Clone, Debug)] +pub enum CloudServiceArg { + S3, + GoogleDrive, + Dropbox, + OneDrive, + GoogleCloudStorage, + AzureBlob, + BackblazeB2, + Wasabi, + DigitalOceanSpaces, +} + +impl From for CloudServiceType { + fn from(arg: CloudServiceArg) -> Self { + match arg { + CloudServiceArg::S3 => CloudServiceType::S3, + CloudServiceArg::GoogleDrive => CloudServiceType::GoogleDrive, + CloudServiceArg::Dropbox => CloudServiceType::Dropbox, + CloudServiceArg::OneDrive => CloudServiceType::OneDrive, + CloudServiceArg::GoogleCloudStorage => CloudServiceType::GoogleCloudStorage, + CloudServiceArg::AzureBlob => CloudServiceType::AzureBlob, + CloudServiceArg::BackblazeB2 => CloudServiceType::BackblazeB2, + CloudServiceArg::Wasabi => CloudServiceType::Wasabi, + CloudServiceArg::DigitalOceanSpaces => CloudServiceType::DigitalOceanSpaces, + } + } +} + +impl VolumeAddCloudArgs { + pub fn validate_and_build(self) -> Result { + let service = CloudServiceType::from(self.service.clone()); + + let config = match self.service { + CloudServiceArg::S3 => { + let bucket = self.bucket.ok_or("--bucket is required for S3")?; + let region = self.region.ok_or("--region is required for S3")?; + let access_key_id = self + .access_key_id + .ok_or("--access-key-id is required for S3")?; + let secret_access_key = self + .secret_access_key + .ok_or("--secret-access-key is required for S3")?; + + CloudStorageConfig::S3 { + bucket, + region, + access_key_id, + secret_access_key, + endpoint: self.endpoint, + } + } + _ => { + return Err(format!( + "Service {:?} is not yet supported. Only S3 is currently available.", + self.service + )) + } + }; + + Ok(VolumeAddCloudInput { + service, + display_name: self.name, + config, + }) + } +} + +#[derive(Args, Debug)] +pub struct VolumeRemoveCloudArgs { + /// Volume fingerprint (from volume list) + pub fingerprint: String, + + /// Skip confirmation prompt + #[arg(long, short = 'y', default_value_t = false)] + pub yes: bool, +} + +impl TryFrom for VolumeRemoveCloudInput { + type Error = String; + + fn try_from(args: VolumeRemoveCloudArgs) -> Result { + let fingerprint = VolumeFingerprint::from_string(&args.fingerprint) + .map_err(|e| format!("Invalid fingerprint: {}", e))?; + + Ok(Self { fingerprint }) + } +} diff --git a/apps/cli/src/domains/volume/mod.rs b/apps/cli/src/domains/volume/mod.rs new file mode 100644 index 000000000..cd9a627df --- /dev/null +++ b/apps/cli/src/domains/volume/mod.rs @@ -0,0 +1,65 @@ +mod args; + +use anyhow::Result; +use clap::Subcommand; + +use crate::util::prelude::*; + +use crate::context::Context; +use sd_core::ops::volumes::{ + add_cloud::VolumeAddCloudOutput, remove_cloud::VolumeRemoveCloudOutput, +}; + +use self::args::*; + +#[derive(Subcommand, Debug)] +pub enum VolumeCmd { + /// Add a cloud storage volume to the library + AddCloud(VolumeAddCloudArgs), + /// Remove a cloud storage volume from the library + RemoveCloud(VolumeRemoveCloudArgs), +} + +pub async fn run(ctx: &Context, cmd: VolumeCmd) -> Result<()> { + match cmd { + VolumeCmd::AddCloud(args) => { + let display_name = args.name.clone(); + let service = format!("{:?}", args.service); + + let input = args.validate_and_build().map_err(|e| anyhow::anyhow!(e))?; + + let out: VolumeAddCloudOutput = execute_action!(ctx, input); + + print_output!(ctx, &out, |o: &VolumeAddCloudOutput| { + println!( + "Added cloud volume '{}' ({})", + o.volume_name, + o.fingerprint.short_id() + ); + println!("Service: {:?}", o.service); + println!("Fingerprint: {}", o.fingerprint); + }); + } + VolumeCmd::RemoveCloud(args) => { + let fingerprint_display = args.fingerprint.clone(); + + confirm_or_abort( + &format!( + "This will remove cloud volume {} from the library. Credentials will be deleted. Continue?", + fingerprint_display + ), + args.yes, + )?; + + let input: sd_core::ops::volumes::remove_cloud::VolumeRemoveCloudInput = + args.try_into().map_err(|e: String| anyhow::anyhow!(e))?; + + let out: VolumeRemoveCloudOutput = execute_action!(ctx, input); + + print_output!(ctx, &out, |o: &VolumeRemoveCloudOutput| { + println!("Removed cloud volume {}", o.fingerprint); + }); + } + } + Ok(()) +} diff --git a/apps/cli/src/main.rs b/apps/cli/src/main.rs index e6736db0e..18a0c35d9 100644 --- a/apps/cli/src/main.rs +++ b/apps/cli/src/main.rs @@ -57,6 +57,7 @@ use crate::domains::{ network::{self, NetworkCmd}, search::{self, SearchCmd}, tag::{self, TagCmd}, + volume::{self, VolumeCmd}, }; // OutputFormat is defined in context.rs and shared across domains @@ -195,6 +196,9 @@ enum Commands { /// Tag operations #[command(subcommand)] Tag(TagCmd), + /// Volume operations + #[command(subcommand)] + Volume(VolumeCmd), } #[tokio::main] @@ -658,6 +662,7 @@ async fn run_client_command( Commands::Logs(cmd) => logs::run(&ctx, cmd).await?, Commands::Search(cmd) => search::run(&ctx, cmd).await?, Commands::Tag(cmd) => tag::run(&ctx, cmd).await?, + Commands::Volume(cmd) => volume::run(&ctx, cmd).await?, _ => {} // Start and Stop are handled in main } Ok(()) diff --git a/core/src/crypto/cloud_credentials.rs b/core/src/crypto/cloud_credentials.rs index c74127c20..722b57d0e 100644 --- a/core/src/crypto/cloud_credentials.rs +++ b/core/src/crypto/cloud_credentials.rs @@ -14,6 +14,7 @@ use thiserror::Error; use uuid::Uuid; use super::library_key_manager::LibraryKeyManager; +use std::sync::Arc; const KEYRING_SERVICE: &str = "SpacedriveCloudCredentials"; @@ -43,11 +44,11 @@ pub enum CloudCredentialError { /// Manages cloud service credentials encrypted with library keys pub struct CloudCredentialManager { - library_key_manager: LibraryKeyManager, + library_key_manager: Arc, } impl CloudCredentialManager { - pub fn new(library_key_manager: LibraryKeyManager) -> Self { + pub fn new(library_key_manager: Arc) -> Self { Self { library_key_manager, } @@ -274,7 +275,7 @@ mod tests { #[test] fn test_encrypt_decrypt_credential() { - let library_key_manager = LibraryKeyManager::new().unwrap(); + let library_key_manager = Arc::new(LibraryKeyManager::new().unwrap()); let manager = CloudCredentialManager::new(library_key_manager); let library_id = Uuid::new_v4(); diff --git a/core/src/domain/content_identity.rs b/core/src/domain/content_identity.rs index 72c0931a1..ce786ea41 100644 --- a/core/src/domain/content_identity.rs +++ b/core/src/domain/content_identity.rs @@ -10,8 +10,24 @@ use serde_json::Value as JsonValue; use specta::Type; use uuid::Uuid; +use crate::infra::db::entities::*; use crate::volume::VolumeBackend; +/// Domain representation of content identity +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct ContentIdentity { + pub uuid: Uuid, + pub kind: ContentKind, + pub content_hash: String, + pub integrity_hash: Option, + pub mime_type_id: Option, + pub text_content: Option, + pub total_size: i64, + pub entry_count: i32, + pub first_seen_at: DateTime, + pub last_verified_at: DateTime, +} + /// Type of content #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, IntEnum, Type)] #[serde(rename_all = "snake_case")] @@ -34,138 +50,36 @@ pub enum ContentKind { Key = 14, Executable = 15, Binary = 16, + Spreadsheet = 17, + Presentation = 18, + Email = 19, + Calendar = 20, + Contact = 21, + Web = 22, + Shortcut = 23, + Package = 24, + ModelEntry = 25, } -impl std::fmt::Display for ContentKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let s = match self { - ContentKind::Unknown => "unknown", - ContentKind::Image => "image", - ContentKind::Video => "video", - ContentKind::Audio => "audio", - ContentKind::Document => "document", - ContentKind::Archive => "archive", - ContentKind::Code => "code", - ContentKind::Text => "text", - ContentKind::Database => "database", - ContentKind::Book => "book", - ContentKind::Font => "font", - ContentKind::Mesh => "mesh", - ContentKind::Config => "config", - ContentKind::Encrypted => "encrypted", - ContentKind::Key => "key", - ContentKind::Executable => "executable", - ContentKind::Binary => "binary", - }; - write!(f, "{}", s) - } -} - -impl From<&str> for ContentKind { - fn from(name: &str) -> Self { - match name { - "image" => ContentKind::Image, - "video" => ContentKind::Video, - "audio" => ContentKind::Audio, - "document" => ContentKind::Document, - "archive" => ContentKind::Archive, - "code" => ContentKind::Code, - "text" => ContentKind::Text, - "database" => ContentKind::Database, - "book" => ContentKind::Book, - "font" => ContentKind::Font, - "mesh" => ContentKind::Mesh, - "config" => ContentKind::Config, - "encrypted" => ContentKind::Encrypted, - "key" => ContentKind::Key, - "executable" => ContentKind::Executable, - "binary" => ContentKind::Binary, - _ => ContentKind::Unknown, +// Translate database entity into domain model +impl From for ContentIdentity { + fn from(model: content_identity::Model) -> Self { + Self { + uuid: model.uuid.unwrap_or_else(Uuid::new_v4), + kind: ContentKind::try_from(model.kind_id).unwrap_or(ContentKind::Unknown), + content_hash: model.content_hash, + integrity_hash: model.integrity_hash, + mime_type_id: model.mime_type_id, + text_content: model.text_content, + total_size: model.total_size, + entry_count: model.entry_count, + first_seen_at: model.first_seen_at, + last_verified_at: model.last_verified_at, } } } -impl From for ContentKind { - fn from(name: String) -> Self { - Self::from(name.as_str()) - } -} - -/// Media-specific metadata -#[derive(Debug, Clone, Serialize, Deserialize, Type)] -pub struct MediaData { - /// Width in pixels (for images/video) - pub width: Option, - - /// Height in pixels (for images/video) - pub height: Option, - - /// Duration in seconds (for audio/video) - pub duration: Option, - - /// Bitrate in bits per second - pub bitrate: Option, - - /// Frame rate (for video) - pub fps: Option, - - /// EXIF data (for images) - pub exif: Option, - - /// Additional metadata as JSON - pub extra: JsonValue, -} - -/// EXIF metadata for images -#[derive(Debug, Clone, Serialize, Deserialize, Type)] -pub struct ExifData { - /// Camera make - pub make: Option, - - /// Camera model - pub model: Option, - - /// Date taken - pub date_taken: Option>, - - /// GPS coordinates - pub gps: Option, - - /// ISO speed - pub iso: Option, - - /// Aperture (f-stop) - pub aperture: Option, - - /// Shutter speed in seconds - pub shutter_speed: Option, - - /// Focal length in mm - pub focal_length: Option, -} - -/// GPS coordinates -#[derive(Debug, Clone, Serialize, Deserialize, Type)] -pub struct GpsCoordinates { - pub latitude: f64, - pub longitude: f64, - pub altitude: Option, -} - impl ContentKind { - /// Determine content kind from MIME type - pub fn from_mime_type(mime_type: &str) -> Self { - match mime_type.split('/').next() { - Some("image") => ContentKind::Image, - Some("video") => ContentKind::Video, - Some("audio") => ContentKind::Audio, - Some("text") => ContentKind::Text, - _ if mime_type.contains("pdf") => ContentKind::Document, - _ if mime_type.contains("zip") || mime_type.contains("tar") => ContentKind::Archive, - _ => ContentKind::Unknown, - } - } - /// Get content kind from file type pub fn from_file_type(file_type: &crate::filetype::FileType) -> Self { file_type.category @@ -213,7 +127,7 @@ impl ContentHashGenerator { hasher.finalize().to_hex()[..16].to_string() } - /// Generate content hash using a volume backend (supports cloud storage) + /// Generate content hash using a volume backend /// /// This uses the same sampling algorithm but works with any VolumeBackend, /// enabling efficient content hashing for cloud files without full downloads. @@ -321,34 +235,75 @@ pub enum ContentHashError { FileTooLarge, } -/// Domain representation of content identity -#[derive(Debug, Clone, Serialize, Deserialize, Type)] -pub struct ContentIdentity { - pub uuid: Uuid, - pub kind: ContentKind, - pub hash: String, - pub media_data: Option, - pub created_at: DateTime, +impl std::fmt::Display for ContentKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + ContentKind::Unknown => "unknown", + ContentKind::Image => "image", + ContentKind::Video => "video", + ContentKind::Audio => "audio", + ContentKind::Document => "document", + ContentKind::Archive => "archive", + ContentKind::Code => "code", + ContentKind::Text => "text", + ContentKind::Database => "database", + ContentKind::Book => "book", + ContentKind::Font => "font", + ContentKind::Mesh => "mesh", + ContentKind::Config => "config", + ContentKind::Encrypted => "encrypted", + ContentKind::Key => "key", + ContentKind::Executable => "executable", + ContentKind::Binary => "binary", + ContentKind::Spreadsheet => "spreadsheet", + ContentKind::Presentation => "presentation", + ContentKind::Email => "email", + ContentKind::Calendar => "calendar", + ContentKind::Contact => "contact", + ContentKind::Web => "web", + ContentKind::Shortcut => "shortcut", + ContentKind::Package => "package", + ContentKind::ModelEntry => "model_entry", + }; + write!(f, "{}", s) + } } -impl From for ContentIdentity { - fn from(model: crate::infra::db::entities::content_identity::Model) -> Self { - Self { - uuid: model.uuid.unwrap_or_else(Uuid::new_v4), - kind: ContentKind::Unknown, // TODO: Implement proper conversion from kind_id - hash: model.content_hash, - media_data: model.media_data.map(|json| { - serde_json::from_value(json).unwrap_or_else(|_| MediaData { - width: None, - height: None, - duration: None, - bitrate: None, - fps: None, - exif: None, - extra: serde_json::Value::Null, - }) - }), - created_at: model.first_seen_at, +impl From<&str> for ContentKind { + fn from(name: &str) -> Self { + match name { + "image" => ContentKind::Image, + "video" => ContentKind::Video, + "audio" => ContentKind::Audio, + "document" => ContentKind::Document, + "archive" => ContentKind::Archive, + "code" => ContentKind::Code, + "text" => ContentKind::Text, + "database" => ContentKind::Database, + "book" => ContentKind::Book, + "font" => ContentKind::Font, + "mesh" => ContentKind::Mesh, + "config" => ContentKind::Config, + "encrypted" => ContentKind::Encrypted, + "key" => ContentKind::Key, + "executable" => ContentKind::Executable, + "binary" => ContentKind::Binary, + "spreadsheet" => ContentKind::Spreadsheet, + "presentation" => ContentKind::Presentation, + "email" => ContentKind::Email, + "calendar" => ContentKind::Calendar, + "contact" => ContentKind::Contact, + "web" => ContentKind::Web, + "shortcut" => ContentKind::Shortcut, + "package" => ContentKind::Package, + "model_entry" => ContentKind::ModelEntry, + _ => ContentKind::Unknown, } } } + +impl From for ContentKind { + fn from(name: String) -> Self { + Self::from(name.as_str()) + } +} diff --git a/core/src/domain/entry.rs b/core/src/domain/entry.rs index e27b13ef0..9687922eb 100644 --- a/core/src/domain/entry.rs +++ b/core/src/domain/entry.rs @@ -90,7 +90,10 @@ impl SdPathSerialized { device_id: *device_id, path: path.to_string_lossy().to_string(), }), - SdPath::Cloud { .. } => None, // Can't serialize cloud paths to this format + SdPath::Cloud { volume_id, path } => Some(Self { + device_id: *volume_id, // Use volume_id as device_id for cloud paths + path: path.clone(), + }), SdPath::Content { .. } => None, // Can't serialize content paths to this format } } @@ -184,11 +187,7 @@ impl TryFrom<(crate::infra::db::entities::entry::Model, SdPath)> for Entry { ) -> Result { let device_uuid = match &parent_sd_path { SdPath::Physical { device_id, .. } => *device_id, - SdPath::Cloud { .. } => { - return Err(anyhow::anyhow!( - "Cloud storage paths not yet supported for directory listing" - )) - } + SdPath::Cloud { volume_id, .. } => *volume_id, SdPath::Content { .. } => { return Err(anyhow::anyhow!( "Content-addressed paths not supported for directory listing" diff --git a/core/src/domain/file.rs b/core/src/domain/file.rs index e139cd7d6..93ceac0d6 100644 --- a/core/src/domain/file.rs +++ b/core/src/domain/file.rs @@ -347,7 +347,6 @@ mod tests { uuid: Uuid::new_v4(), kind: ContentKind::Image, hash: "abc123".to_string(), - media_data: None, created_at: Utc::now(), }; diff --git a/core/src/domain/mod.rs b/core/src/domain/mod.rs index 0c6bc9bc1..40bfe3d41 100644 --- a/core/src/domain/mod.rs +++ b/core/src/domain/mod.rs @@ -17,9 +17,7 @@ pub mod volume; // Re-export commonly used types pub use addressing::{PathResolutionError, SdPath, SdPathBatch, SdPathParseError}; -pub use content_identity::{ - ContentHashError, ContentHashGenerator, ContentIdentity, ContentKind, MediaData, -}; +pub use content_identity::{ContentHashError, ContentHashGenerator, ContentIdentity, ContentKind}; pub use device::{Device, OperatingSystem}; pub use entry::{Entry, EntryKind, SdPathSerialized}; pub use file::{File, FileConstructionData, Sidecar}; diff --git a/core/src/filetype/builtin.rs b/core/src/filetype/builtin.rs index 9f8f0c2f4..c63a340b8 100644 --- a/core/src/filetype/builtin.rs +++ b/core/src/filetype/builtin.rs @@ -13,6 +13,14 @@ pub static BUILTIN_DEFINITIONS: Lazy> = Lazy::new(|| { include_str!("definitions/documents.toml"), include_str!("definitions/code.toml"), include_str!("definitions/archives.toml"), + include_str!("definitions/spreadsheets.toml"), + include_str!("definitions/presentations.toml"), + include_str!("definitions/email.toml"), + include_str!("definitions/calendar.toml"), + include_str!("definitions/contacts.toml"), + include_str!("definitions/web.toml"), + include_str!("definitions/shortcuts.toml"), + include_str!("definitions/packages.toml"), include_str!("definitions/misc.toml"), ] }); diff --git a/core/src/filetype/definitions/calendar.toml b/core/src/filetype/definitions/calendar.toml new file mode 100644 index 000000000..9fdcc0ee6 --- /dev/null +++ b/core/src/filetype/definitions/calendar.toml @@ -0,0 +1,48 @@ +# Calendar file type definitions + +[[file_types]] +id = "text/calendar" +name = "iCalendar" +extensions = ["ics", "ical", "ifb", "icalendar"] +mime_types = ["text/calendar"] +uti = "com.apple.ical.ics" +category = "calendar" +priority = 100 + +[file_types.metadata] +text_file = true + +[[file_types]] +id = "text/x-vcalendar" +name = "vCalendar" +extensions = ["vcs"] +mime_types = ["text/x-vcalendar"] +category = "calendar" +priority = 90 + +[file_types.metadata] +text_file = true +legacy = true + +[[file_types]] +id = "application/vnd.google-apps.calendar" +name = "Google Calendar" +extensions = ["gcalendar"] +mime_types = ["application/vnd.google-apps.calendar"] +category = "calendar" +priority = 100 + +[file_types.metadata] +google_workspace = true +cloud_native = true + +[[file_types]] +id = "application/x-outlook-calendar" +name = "Outlook Calendar Item" +extensions = ["icalendar"] +mime_types = ["text/calendar"] +category = "calendar" +priority = 95 + +[file_types.metadata] +outlook = true diff --git a/core/src/filetype/definitions/contacts.toml b/core/src/filetype/definitions/contacts.toml new file mode 100644 index 000000000..4f242de9a --- /dev/null +++ b/core/src/filetype/definitions/contacts.toml @@ -0,0 +1,47 @@ +# Contact file type definitions + +[[file_types]] +id = "text/vcard" +name = "vCard" +extensions = ["vcf", "vcard"] +mime_types = ["text/vcard", "text/x-vcard"] +uti = "public.vcard" +category = "contact" +priority = 100 + +[file_types.metadata] +text_file = true + +[[file_types]] +id = "text/directory" +name = "vCard Directory" +extensions = ["vcf"] +mime_types = ["text/directory"] +category = "contact" +priority = 95 + +[file_types.metadata] +text_file = true + +[[file_types]] +id = "application/vnd.apple.contacts" +name = "Apple Contacts" +extensions = ["abcdp"] +mime_types = ["application/vnd.apple.contacts"] +category = "contact" +priority = 100 + +[file_types.metadata] +apple = true + +[[file_types]] +id = "application/x-ldif" +name = "LDAP Data Interchange Format" +extensions = ["ldif"] +mime_types = ["application/x-ldif"] +category = "contact" +priority = 90 + +[file_types.metadata] +text_file = true +directory_service = true diff --git a/core/src/filetype/definitions/email.toml b/core/src/filetype/definitions/email.toml new file mode 100644 index 000000000..6c4c0f90e --- /dev/null +++ b/core/src/filetype/definitions/email.toml @@ -0,0 +1,97 @@ +# Email file type definitions + +[[file_types]] +id = "message/rfc822" +name = "Email Message" +extensions = ["eml", "emlx"] +mime_types = ["message/rfc822"] +category = "email" +priority = 100 + +[file_types.metadata] +text_based = true + +[[file_types]] +id = "application/vnd.ms-outlook" +name = "Outlook Message" +extensions = ["msg"] +mime_types = ["application/vnd.ms-outlook"] +uti = "com.microsoft.outlook.msg" +category = "email" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "D0 CF 11 E0 A1 B1 1A E1" +offset = 0 +priority = 100 + +[[file_types]] +id = "application/mbox" +name = "MBOX Mailbox" +extensions = ["mbox"] +mime_types = ["application/mbox"] +category = "email" +priority = 100 + +[file_types.metadata] +text_based = true +container = true + +[[file_types]] +id = "application/vnd.ms-outlook.pst" +name = "Outlook Personal Folders" +extensions = ["pst"] +mime_types = ["application/vnd.ms-outlook.pst"] +category = "email" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "21 42 44 4E" +offset = 0 +priority = 100 + +[file_types.metadata] +container = true +database = true + +[[file_types]] +id = "application/vnd.ms-outlook.ost" +name = "Outlook Offline Folders" +extensions = ["ost"] +mime_types = ["application/vnd.ms-outlook.ost"] +category = "email" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "21 42 44 4E" +offset = 0 +priority = 100 + +[file_types.metadata] +container = true +database = true + +[[file_types]] +id = "application/vnd.apple.mail" +name = "Apple Mail" +extensions = ["mailbundle"] +mime_types = ["application/vnd.apple.mail"] +category = "email" +priority = 100 + +[file_types.metadata] +apple = true +bundle = true + +[[file_types]] +id = "application/x-gmail-archive" +name = "Gmail Archive" +extensions = ["mbox"] +mime_types = ["application/x-gmail-archive"] +category = "email" +priority = 95 + +[file_types.metadata] +text_based = true +container = true +google = true diff --git a/core/src/filetype/definitions/misc.toml b/core/src/filetype/definitions/misc.toml index 80cc50c28..86d48c9f9 100644 --- a/core/src/filetype/definitions/misc.toml +++ b/core/src/filetype/definitions/misc.toml @@ -59,75 +59,6 @@ pattern = "4D 5A" offset = 0 priority = 100 -[[file_types]] -id = "application/x-apple-diskimage" -name = "Apple Disk Image" -extensions = ["dmg"] -mime_types = ["application/x-apple-diskimage"] -uti = "com.apple.disk-image" -category = "executable" -priority = 100 - -[[file_types]] -id = "application/vnd.android.package-archive" -name = "Android Package" -extensions = ["apk"] -mime_types = ["application/vnd.android.package-archive"] -category = "executable" -priority = 100 - -[[file_types.magic_bytes]] -pattern = "50 4B 03 04" -offset = 0 -priority = 90 - -[[file_types]] -id = "application/x-debian-package" -name = "Debian Package" -extensions = ["deb"] -mime_types = ["application/x-debian-package", "application/vnd.debian.binary-package"] -category = "executable" -priority = 100 - -[[file_types.magic_bytes]] -pattern = "21 3C 61 72 63 68 3E" -offset = 0 -priority = 100 - -[[file_types]] -id = "application/x-redhat-package" -name = "RPM Package" -extensions = ["rpm"] -mime_types = ["application/x-rpm", "application/x-redhat-package-manager"] -category = "executable" -priority = 100 - -[[file_types.magic_bytes]] -pattern = "ED AB EE DB" -offset = 0 -priority = 100 - -[[file_types]] -id = "application/x-apple-installer" -name = "macOS Package" -extensions = ["pkg"] -mime_types = ["application/x-apple-installer"] -category = "executable" -priority = 100 - -[[file_types]] -id = "application/x-msi" -name = "Windows Installer" -extensions = ["msi"] -mime_types = ["application/x-msi", "application/x-msdownload"] -category = "executable" -priority = 100 - -[[file_types.magic_bytes]] -pattern = "D0 CF 11 E0 A1 B1 1A E1" -offset = 0 -priority = 90 - [[file_types]] id = "application/java-archive" name = "Java Archive" @@ -157,18 +88,6 @@ priority = 80 text_file = true windows_script = true -[[file_types]] -id = "application/x-apple-application" -name = "macOS Application" -extensions = ["app"] -mime_types = ["application/x-apple-application"] -uti = "com.apple.application-bundle" -category = "executable" -priority = 100 - -[file_types.metadata] -bundle = true - # Fonts [[file_types]] id = "font/ttf" @@ -377,19 +296,6 @@ priority = 80 text_file = true feed = true -[[file_types]] -id = "text/csv" -name = "CSV" -extensions = ["csv"] -mime_types = ["text/csv"] -uti = "public.comma-separated-values-text" -category = "config" -priority = 70 - -[file_types.metadata] -text_file = true -tabular = true - [[file_types]] id = "application/x-config" name = "Configuration File" @@ -523,4 +429,4 @@ name = "Apple Keychain" extensions = ["keychain"] mime_types = ["application/x-apple-keychain"] category = "key" -priority = 100 \ No newline at end of file +priority = 100 diff --git a/core/src/filetype/definitions/packages.toml b/core/src/filetype/definitions/packages.toml new file mode 100644 index 000000000..fa49328eb --- /dev/null +++ b/core/src/filetype/definitions/packages.toml @@ -0,0 +1,185 @@ +# Package and installer file type definitions + +[[file_types]] +id = "application/x-debian-package" +name = "Debian Package" +extensions = ["deb"] +mime_types = ["application/x-debian-package", "application/vnd.debian.binary-package"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "21 3C 61 72 63 68 3E" +offset = 0 +priority = 100 + +[file_types.metadata] +linux = true +installer = true + +[[file_types]] +id = "application/x-redhat-package" +name = "RPM Package" +extensions = ["rpm"] +mime_types = ["application/x-rpm", "application/x-redhat-package-manager"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "ED AB EE DB" +offset = 0 +priority = 100 + +[file_types.metadata] +linux = true +installer = true + +[[file_types]] +id = "application/x-apple-installer" +name = "macOS Package" +extensions = ["pkg"] +mime_types = ["application/x-apple-installer"] +uti = "com.apple.installer-package-archive" +category = "package" +priority = 100 + +[file_types.metadata] +apple = true +installer = true + +[[file_types]] +id = "application/x-apple-diskimage" +name = "Apple Disk Image" +extensions = ["dmg"] +mime_types = ["application/x-apple-diskimage"] +uti = "com.apple.disk-image" +category = "package" +priority = 100 + +[file_types.metadata] +apple = true +installer = true +disk_image = true + +[[file_types]] +id = "application/x-msi" +name = "Windows Installer" +extensions = ["msi"] +mime_types = ["application/x-msi", "application/x-msdownload"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "D0 CF 11 E0 A1 B1 1A E1" +offset = 0 +priority = 90 + +[file_types.metadata] +windows = true +installer = true + +[[file_types]] +id = "application/vnd.android.package-archive" +name = "Android Package" +extensions = ["apk"] +mime_types = ["application/vnd.android.package-archive"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[file_types.metadata] +android = true +installer = true + +[[file_types]] +id = "application/x-xpinstall" +name = "Firefox Extension" +extensions = ["xpi"] +mime_types = ["application/x-xpinstall"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[file_types.metadata] +firefox = true +extension = true + +[[file_types]] +id = "application/x-chrome-extension" +name = "Chrome Extension" +extensions = ["crx"] +mime_types = ["application/x-chrome-extension"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "43 72 32 34" +offset = 0 +priority = 100 + +[file_types.metadata] +chrome = true +extension = true + +[[file_types]] +id = "application/vnd.snap" +name = "Snap Package" +extensions = ["snap"] +mime_types = ["application/vnd.snap"] +category = "package" +priority = 100 + +[file_types.metadata] +linux = true +installer = true + +[[file_types]] +id = "application/x-flatpak" +name = "Flatpak Package" +extensions = ["flatpak", "flatpakref"] +mime_types = ["application/x-flatpak"] +category = "package" +priority = 100 + +[file_types.metadata] +linux = true +installer = true + +[[file_types]] +id = "application/vnd.microsoft.portable-executable" +name = "Windows Installer (MSI)" +extensions = ["msix", "appx"] +mime_types = ["application/vnd.microsoft.portable-executable"] +category = "package" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[file_types.metadata] +windows = true +installer = true +uwp = true + +[[file_types]] +id = "application/x-apple-bundle" +name = "macOS Application Bundle" +extensions = ["app"] +mime_types = ["application/x-apple-application"] +uti = "com.apple.application-bundle" +category = "package" +priority = 100 + +[file_types.metadata] +apple = true +bundle = true diff --git a/core/src/filetype/definitions/presentations.toml b/core/src/filetype/definitions/presentations.toml new file mode 100644 index 000000000..73bfa4d91 --- /dev/null +++ b/core/src/filetype/definitions/presentations.toml @@ -0,0 +1,111 @@ +# Presentation file type definitions + +[[file_types]] +id = "application/vnd.ms-powerpoint" +name = "Microsoft PowerPoint" +extensions = ["ppt"] +mime_types = ["application/vnd.ms-powerpoint"] +uti = "com.microsoft.powerpoint.ppt" +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "D0 CF 11 E0 A1 B1 1A E1" +offset = 0 +priority = 100 + +[[file_types]] +id = "application/vnd.openxmlformats-officedocument.presentationml.presentation" +name = "Microsoft PowerPoint (OpenXML)" +extensions = ["pptx"] +mime_types = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] +uti = "org.openxmlformats.presentationml.presentation" +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.openxmlformats-officedocument.presentationml.template" +name = "PowerPoint Template (OpenXML)" +extensions = ["potx"] +mime_types = ["application/vnd.openxmlformats-officedocument.presentationml.template"] +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.ms-powerpoint.presentation.macroEnabled.12" +name = "PowerPoint Macro-Enabled" +extensions = ["pptm"] +mime_types = ["application/vnd.ms-powerpoint.presentation.macroEnabled.12"] +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.openxmlformats-officedocument.presentationml.slideshow" +name = "PowerPoint Slideshow" +extensions = ["ppsx"] +mime_types = ["application/vnd.openxmlformats-officedocument.presentationml.slideshow"] +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.oasis.opendocument.presentation" +name = "OpenDocument Presentation" +extensions = ["odp"] +mime_types = ["application/vnd.oasis.opendocument.presentation"] +uti = "org.oasis-open.opendocument.presentation" +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/x-iwork-keynote-sffkey" +name = "Apple Keynote" +extensions = ["key"] +mime_types = ["application/x-iwork-keynote-sffkey"] +uti = "com.apple.iwork.keynote.key" +category = "presentation" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[file_types.metadata] +iwork = true + +[[file_types]] +id = "application/vnd.google-apps.presentation" +name = "Google Slides" +extensions = ["gslides"] +mime_types = ["application/vnd.google-apps.presentation"] +category = "presentation" +priority = 100 + +[file_types.metadata] +google_workspace = true +cloud_native = true diff --git a/core/src/filetype/definitions/shortcuts.toml b/core/src/filetype/definitions/shortcuts.toml new file mode 100644 index 000000000..e623a6bf2 --- /dev/null +++ b/core/src/filetype/definitions/shortcuts.toml @@ -0,0 +1,87 @@ +# Shortcut and link file type definitions + +[[file_types]] +id = "application/x-ms-shortcut" +name = "Windows Shortcut" +extensions = ["lnk"] +mime_types = ["application/x-ms-shortcut"] +uti = "com.microsoft.windows-shortcut" +category = "shortcut" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "4C 00 00 00 01 14 02 00" +offset = 0 +priority = 100 + +[[file_types]] +id = "application/internet-shortcut" +name = "Windows Internet Shortcut" +extensions = ["url"] +mime_types = ["application/internet-shortcut"] +category = "shortcut" +priority = 100 + +[file_types.metadata] +text_file = true +internet = true + +[[file_types]] +id = "application/x-apple-alias" +name = "macOS Alias" +extensions = ["alias"] +mime_types = ["application/x-apple-alias"] +uti = "com.apple.alias-file" +category = "shortcut" +priority = 100 + +[file_types.metadata] +apple = true + +[[file_types]] +id = "application/x-apple-webloc" +name = "macOS Web Location" +extensions = ["webloc"] +mime_types = ["application/x-apple-webloc"] +uti = "com.apple.web-internet-location" +category = "shortcut" +priority = 100 + +[file_types.metadata] +apple = true +internet = true + +[[file_types]] +id = "application/x-desktop" +name = "Linux Desktop Entry" +extensions = ["desktop"] +mime_types = ["application/x-desktop"] +category = "shortcut" +priority = 100 + +[file_types.metadata] +text_file = true +linux = true + +[[file_types]] +id = "inode/symlink" +name = "Symbolic Link" +extensions = [] +mime_types = ["inode/symlink"] +category = "shortcut" +priority = 100 + +[file_types.metadata] +filesystem = true + +[[file_types]] +id = "application/x-wine-extension-ini" +name = "Wine Desktop Entry" +extensions = ["lnk"] +mime_types = ["application/x-wine-extension-ini"] +category = "shortcut" +priority = 90 + +[file_types.metadata] +text_file = true +wine = true diff --git a/core/src/filetype/definitions/spreadsheets.toml b/core/src/filetype/definitions/spreadsheets.toml new file mode 100644 index 000000000..4f871c9b2 --- /dev/null +++ b/core/src/filetype/definitions/spreadsheets.toml @@ -0,0 +1,131 @@ +# Spreadsheet file type definitions + +[[file_types]] +id = "application/vnd.ms-excel" +name = "Microsoft Excel" +extensions = ["xls"] +mime_types = ["application/vnd.ms-excel"] +uti = "com.microsoft.excel.xls" +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "D0 CF 11 E0 A1 B1 1A E1" +offset = 0 +priority = 100 + +[[file_types]] +id = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" +name = "Microsoft Excel (OpenXML)" +extensions = ["xlsx"] +mime_types = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] +uti = "org.openxmlformats.spreadsheetml.sheet" +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.openxmlformats-officedocument.spreadsheetml.template" +name = "Excel Template (OpenXML)" +extensions = ["xltx"] +mime_types = ["application/vnd.openxmlformats-officedocument.spreadsheetml.template"] +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.ms-excel.sheet.macroEnabled.12" +name = "Excel Macro-Enabled" +extensions = ["xlsm"] +mime_types = ["application/vnd.ms-excel.sheet.macroEnabled.12"] +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/vnd.ms-excel.sheet.binary.macroEnabled.12" +name = "Excel Binary Workbook" +extensions = ["xlsb"] +mime_types = ["application/vnd.ms-excel.sheet.binary.macroEnabled.12"] +category = "spreadsheet" +priority = 100 + +[[file_types]] +id = "application/vnd.oasis.opendocument.spreadsheet" +name = "OpenDocument Spreadsheet" +extensions = ["ods"] +mime_types = ["application/vnd.oasis.opendocument.spreadsheet"] +uti = "org.oasis-open.opendocument.spreadsheet" +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[[file_types]] +id = "application/x-iwork-numbers-sffnumbers" +name = "Apple Numbers" +extensions = ["numbers"] +mime_types = ["application/x-iwork-numbers-sffnumbers"] +uti = "com.apple.iwork.numbers.numbers" +category = "spreadsheet" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "50 4B 03 04" +offset = 0 +priority = 90 + +[file_types.metadata] +iwork = true + +[[file_types]] +id = "text/csv" +name = "CSV" +extensions = ["csv"] +mime_types = ["text/csv"] +uti = "public.comma-separated-values-text" +category = "spreadsheet" +priority = 90 + +[file_types.metadata] +text_file = true +tabular = true + +[[file_types]] +id = "text/tab-separated-values" +name = "TSV" +extensions = ["tsv"] +mime_types = ["text/tab-separated-values"] +category = "spreadsheet" +priority = 90 + +[file_types.metadata] +text_file = true +tabular = true + +[[file_types]] +id = "application/vnd.google-apps.spreadsheet" +name = "Google Sheets" +extensions = ["gsheet"] +mime_types = ["application/vnd.google-apps.spreadsheet"] +category = "spreadsheet" +priority = 100 + +[file_types.metadata] +google_workspace = true +cloud_native = true diff --git a/core/src/filetype/definitions/web.toml b/core/src/filetype/definitions/web.toml new file mode 100644 index 000000000..b59d9f180 --- /dev/null +++ b/core/src/filetype/definitions/web.toml @@ -0,0 +1,110 @@ +# Web file type definitions + +[[file_types]] +id = "text/html" +name = "HTML Document" +extensions = ["html", "htm"] +mime_types = ["text/html"] +uti = "public.html" +category = "web" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "3C 21 44 4F 43 54 59 50 45 20 68 74 6D 6C" +offset = 0 +priority = 100 + +[file_types.metadata] +text_file = true +markup = true + +[[file_types]] +id = "application/xhtml+xml" +name = "XHTML Document" +extensions = ["xhtml", "xht"] +mime_types = ["application/xhtml+xml"] +uti = "public.xhtml" +category = "web" +priority = 100 + +[[file_types.magic_bytes]] +pattern = "3C 3F 78 6D 6C" +offset = 0 +priority = 90 + +[file_types.metadata] +text_file = true +markup = true + +[[file_types]] +id = "message/rfc822-html" +name = "MHTML Web Archive" +extensions = ["mhtml", "mht"] +mime_types = ["message/rfc822", "multipart/related"] +category = "web" +priority = 100 + +[file_types.metadata] +archive = true +web_archive = true + +[[file_types]] +id = "application/x-webarchive" +name = "Safari Web Archive" +extensions = ["webarchive"] +mime_types = ["application/x-webarchive"] +uti = "com.apple.webarchive" +category = "web" +priority = 100 + +[file_types.metadata] +apple = true +archive = true +web_archive = true + +[[file_types]] +id = "application/x-httpd-php" +name = "PHP Script" +extensions = ["php", "phtml"] +mime_types = ["application/x-httpd-php", "text/x-php"] +category = "web" +priority = 90 + +[file_types.metadata] +text_file = true +server_side = true + +[[file_types]] +id = "text/asp" +name = "ASP Script" +extensions = ["asp", "aspx"] +mime_types = ["text/asp"] +category = "web" +priority = 90 + +[file_types.metadata] +text_file = true +server_side = true + +[[file_types]] +id = "application/x-jsp" +name = "JavaServer Pages" +extensions = ["jsp"] +mime_types = ["application/x-jsp"] +category = "web" +priority = 90 + +[file_types.metadata] +text_file = true +server_side = true + +[[file_types]] +id = "text/x-component" +name = "Web Component" +extensions = ["htc"] +mime_types = ["text/x-component"] +category = "web" +priority = 80 + +[file_types.metadata] +text_file = true diff --git a/core/src/filetype/registry.rs b/core/src/filetype/registry.rs index 83553789c..98653c9ce 100644 --- a/core/src/filetype/registry.rs +++ b/core/src/filetype/registry.rs @@ -331,6 +331,14 @@ impl FileTypeRegistry { "config" => ContentKind::Config, "encrypted" => ContentKind::Encrypted, "key" => ContentKind::Key, + "spreadsheet" => ContentKind::Spreadsheet, + "presentation" => ContentKind::Presentation, + "email" => ContentKind::Email, + "calendar" => ContentKind::Calendar, + "contact" => ContentKind::Contact, + "web" => ContentKind::Web, + "shortcut" => ContentKind::Shortcut, + "package" => ContentKind::Package, _ => ContentKind::Unknown, }; diff --git a/core/src/infra/db/entities/content_identity.rs b/core/src/infra/db/entities/content_identity.rs index 07f488780..4cac54fd7 100644 --- a/core/src/infra/db/entities/content_identity.rs +++ b/core/src/infra/db/entities/content_identity.rs @@ -12,8 +12,7 @@ pub struct Model { pub integrity_hash: Option, // Full hash for file validation (generated by validate job) pub content_hash: String, // Fast sampled hash for deduplication (generated during content identification) pub mime_type_id: Option, - pub kind_id: i32, // ContentKind foreign key - pub media_data: Option, // MediaData as JSON + pub kind_id: i32, // ContentKind foreign key pub text_content: Option, pub total_size: i64, // Size of one instance of this content pub entry_count: i32, // Entries in THIS library only diff --git a/core/src/infra/db/migration/m20240101_000001_initial_schema.rs b/core/src/infra/db/migration/m20240101_000001_initial_schema.rs index 816f5b4d2..b2f267140 100644 --- a/core/src/infra/db/migration/m20240101_000001_initial_schema.rs +++ b/core/src/infra/db/migration/m20240101_000001_initial_schema.rs @@ -214,7 +214,6 @@ impl MigrationTrait for Migration { .integer() .not_null(), ) - .col(ColumnDef::new(ContentIdentities::MediaData).json()) .col(ColumnDef::new(ContentIdentities::TextContent).text()) .col( ColumnDef::new(ContentIdentities::TotalSize) @@ -835,7 +834,6 @@ enum ContentIdentities { ContentHash, MimeTypeId, KindId, - MediaData, TextContent, TotalSize, EntryCount, diff --git a/core/src/ops/files/query/directory_listing.rs b/core/src/ops/files/query/directory_listing.rs index ee5be1998..1e7a5a800 100644 --- a/core/src/ops/files/query/directory_listing.rs +++ b/core/src/ops/files/query/directory_listing.rs @@ -156,9 +156,14 @@ impl LibraryQuery for DirectoryListingQuery { e.parent_id as entry_parent_id, ci.id as content_identity_id, ci.uuid as content_identity_uuid, - ci.content_hash as content_identity_hash, - ci.media_data as content_identity_media_data, - ci.first_seen_at as content_identity_first_seen_at, + ci.content_hash as content_hash, + ci.integrity_hash as integrity_hash, + ci.mime_type_id as mime_type_id, + ci.text_content as text_content, + ci.total_size as total_size, + ci.entry_count as entry_count, + ci.first_seen_at as first_seen_at, + ci.last_verified_at as last_verified_at, ck.id as content_kind_id, ck.name as content_kind_name FROM entries e @@ -233,17 +238,19 @@ impl LibraryQuery for DirectoryListingQuery { let entry_inode: Option = row.try_get("", "entry_inode").ok(); // Content identity data - let content_identity_id: Option = row.try_get("", "content_identity_id").ok(); let content_identity_uuid: Option = row.try_get("", "content_identity_uuid").ok(); - let content_identity_hash: Option = - row.try_get("", "content_identity_hash").ok(); - let content_identity_media_data: Option = - row.try_get("", "content_identity_media_data").ok(); - let content_identity_first_seen_at: Option> = - row.try_get("", "content_identity_first_seen_at").ok(); + let content_hash: Option = row.try_get("", "content_hash").ok(); + let integrity_hash: Option = row.try_get("", "integrity_hash").ok(); + let mime_type_id: Option = row.try_get("", "mime_type_id").ok(); + let text_content: Option = row.try_get("", "text_content").ok(); + let total_size: Option = row.try_get("", "total_size").ok(); + let entry_count: Option = row.try_get("", "entry_count").ok(); + let first_seen_at: Option> = + row.try_get("", "first_seen_at").ok(); + let last_verified_at: Option> = + row.try_get("", "last_verified_at").ok(); // Content kind data - let content_kind_id: Option = row.try_get("", "content_kind_id").ok(); let content_kind_name: Option = row.try_get("", "content_kind_name").ok(); // Use entry ID as UUID if uuid is None @@ -291,37 +298,31 @@ impl LibraryQuery for DirectoryListingQuery { }; // Create content identity if available - let content_identity = if let (Some(ci_uuid), Some(ci_hash), Some(ci_first_seen)) = ( - content_identity_uuid, - content_identity_hash, - content_identity_first_seen_at, - ) { - // Convert content_kind name to ContentKind enum - let kind = content_kind_name - .as_ref() - .map(|name| crate::domain::ContentKind::from(name.as_str())) - .unwrap_or(crate::domain::ContentKind::Unknown); + let content_identity = + if let (Some(ci_uuid), Some(ci_hash), Some(ci_first_seen), Some(ci_last_verified)) = + (content_identity_uuid, content_hash, first_seen_at, last_verified_at) + { + // Convert content_kind name to ContentKind enum + let kind = content_kind_name + .as_ref() + .map(|name| crate::domain::ContentKind::from(name.as_str())) + .unwrap_or(crate::domain::ContentKind::Unknown); - Some(crate::domain::ContentIdentity { - uuid: ci_uuid, - kind, - hash: ci_hash, - media_data: content_identity_media_data.map(|json| { - serde_json::from_value(json).unwrap_or_else(|_| crate::domain::MediaData { - width: None, - height: None, - duration: None, - bitrate: None, - fps: None, - exif: None, - extra: serde_json::Value::Null, - }) - }), - created_at: ci_first_seen, - }) - } else { - None - }; + Some(crate::domain::ContentIdentity { + uuid: ci_uuid, + kind, + content_hash: ci_hash, + integrity_hash, + mime_type_id, + text_content, + total_size: total_size.unwrap_or(0), + entry_count: entry_count.unwrap_or(0), + first_seen_at: ci_first_seen, + last_verified_at: ci_last_verified, + }) + } else { + None + }; // Create file construction data let file_data = FileConstructionData { @@ -400,11 +401,42 @@ impl DirectoryListingQuery { } } } - SdPath::Cloud { .. } => { - // Cloud storage directory browsing is not yet implemented - Err(QueryError::Internal( - "Cloud storage directory browsing is not yet implemented".to_string(), - )) + SdPath::Cloud { volume_id, path } => { + // Cloud storage directory browsing + tracing::debug!(" Looking for cloud directory: volume={}, path='{}'", volume_id, path); + + // Find directory entry by path in directory_paths table + // Cloud paths are stored the same way as physical paths + tracing::debug!(" Querying directory_paths table..."); + let directory_path = directory_paths::Entity::find() + .filter(directory_paths::Column::Path.eq(path)) + .one(db) + .await?; + tracing::debug!(" Directory path query result: {:?}", directory_path); + + match directory_path { + Some(dp) => { + tracing::debug!(" Found directory path entry: {:?}", dp); + tracing::debug!(" Looking for entry with ID: {}", dp.entry_id); + + // Get the entry for this directory + let entry_result = entry::Entity::find_by_id(dp.entry_id).one(db).await?; + tracing::debug!(" Entry query result: {:?}", entry_result); + + entry_result.ok_or_else(|| { + QueryError::Internal(format!( + "Entry not found for cloud directory: {}", + dp.entry_id + )) + }) + } + None => { + tracing::debug!(" Cloud directory not found in directory_paths table"); + Err(QueryError::Internal( + format!("Cloud directory '{}' has not been indexed yet. Please ensure the cloud volume is connected and indexing is complete.", path) + )) + } + } } SdPath::Content { .. } => { // Content-addressed paths are not supported for directory browsing diff --git a/core/src/ops/files/query/file_by_path.rs b/core/src/ops/files/query/file_by_path.rs index 1fc728a34..dc4d05d1b 100644 --- a/core/src/ops/files/query/file_by_path.rs +++ b/core/src/ops/files/query/file_by_path.rs @@ -132,21 +132,20 @@ impl FileByPathQuery { db: &DatabaseConnection, ) -> QueryResult { match sd_path { - SdPath::Physical { device_id, path } => { - // More efficient approach: find the file by its filename and parent directory - let file_name = path + SdPath::Physical { .. } | SdPath::Cloud { .. } => { + // Use SdPath API for consistent path handling + let file_name = sd_path .file_name() - .and_then(|n| n.to_str()) .ok_or_else(|| QueryError::Internal("Invalid file name in path".to_string()))?; - let parent_path = path + let parent_sd_path = sd_path .parent() .ok_or_else(|| QueryError::Internal("No parent directory".to_string()))?; - // Extract filename without extension for the database query - let (name, extension) = if let Some(ext) = path.extension().and_then(|e| e.to_str()) - { - let name_without_ext = file_name.trim_end_matches(&format!(".{}", ext)); + // Parse extension from filename + let (name, extension) = if let Some(dot_idx) = file_name.rfind('.') { + let name_without_ext = &file_name[..dot_idx]; + let ext = &file_name[dot_idx + 1..]; (name_without_ext.to_string(), Some(ext.to_string())) } else { (file_name.to_string(), None) @@ -163,6 +162,13 @@ impl FileByPathQuery { let entries = query.all(db).await?; + // Get parent path string for comparison + let parent_path_str = match &parent_sd_path { + SdPath::Physical { path, .. } => path.to_string_lossy().to_string(), + SdPath::Cloud { path, .. } => path.clone(), + _ => return Err(QueryError::Internal("Invalid parent path".to_string())), + }; + // For each matching entry, check if its parent directory path matches for entry_model in entries { if let Some(parent_id) = entry_model.parent_id { @@ -176,7 +182,7 @@ impl FileByPathQuery { { if let Some(parent_path_model) = parent_path_model { // Check if the parent directory path matches - if PathBuf::from(&parent_path_model.path) == parent_path { + if parent_path_model.path == parent_path_str { return Ok(entry_model); } } @@ -186,15 +192,9 @@ impl FileByPathQuery { Err(QueryError::Internal(format!( "File not found at path: {}", - path.display() + sd_path.display() ))) } - SdPath::Cloud { .. } => { - // Cloud storage file queries are not yet implemented - Err(QueryError::Internal( - "Cloud storage file queries are not yet implemented".to_string(), - )) - } SdPath::Content { content_id } => { // For content-addressed paths, find any entry with this content_id // First we need to find the content_identity with this UUID diff --git a/core/src/ops/indexing/entry.rs b/core/src/ops/indexing/entry.rs index 7d9bb458f..63c88bf0b 100644 --- a/core/src/ops/indexing/entry.rs +++ b/core/src/ops/indexing/entry.rs @@ -709,7 +709,6 @@ impl EntryProcessor { content_hash: Set(content_hash.clone()), mime_type_id: Set(mime_type_id), kind_id: Set(kind_id), - media_data: Set(None), // Set during media analysis text_content: Set(None), // TODO: Extract text content for indexing total_size: Set(file_size), entry_count: Set(1), diff --git a/core/src/ops/indexing/phases/processing.rs b/core/src/ops/indexing/phases/processing.rs index 15e63763c..aeca57463 100644 --- a/core/src/ops/indexing/phases/processing.rs +++ b/core/src/ops/indexing/phases/processing.rs @@ -15,7 +15,6 @@ use crate::{ }; use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, TransactionTrait}; use std::path::Path; -use std::sync::Arc; use tracing::warn; use uuid::Uuid; @@ -26,7 +25,6 @@ pub async fn run_processing_phase( ctx: &JobContext<'_>, mode: IndexMode, location_root_path: &Path, - volume_backend: Option<&Arc>, ) -> Result<(), JobError> { let total_batches = state.entry_batches.len(); ctx.log(format!( diff --git a/core/src/ops/media/live_photo.rs b/core/src/ops/media/live_photo.rs index 514f1f087..3ea6cf2dc 100644 --- a/core/src/ops/media/live_photo.rs +++ b/core/src/ops/media/live_photo.rs @@ -1,5 +1,7 @@ //! Live Photo detection and handling //! +//! NOTE: This should be moved to the Photos extension +//! //! When enabled, Live Photos are handled as follows: //! 1. During indexing, when we encounter an image file (HEIC/JPEG), we check for a matching video (MOV/MP4) //! 2. If found, the video becomes a virtual sidecar of the image diff --git a/core/src/ops/volumes/add_cloud/action.rs b/core/src/ops/volumes/add_cloud/action.rs new file mode 100644 index 000000000..ef0dfee0b --- /dev/null +++ b/core/src/ops/volumes/add_cloud/action.rs @@ -0,0 +1,158 @@ +//! Add cloud volume action +//! +//! This action adds a cloud storage volume (S3, Google Drive, etc.) to a library, +//! storing encrypted credentials and creating a virtual volume for indexing. + +use super::output::VolumeAddCloudOutput; +use crate::{ + context::CoreContext, + crypto::cloud_credentials::{CloudCredential, CloudCredentialManager}, + infra::action::{error::ActionError, LibraryAction}, + volume::{backend::CloudServiceType, CloudBackend, Volume, VolumeFingerprint}, +}; +use serde::{Deserialize, Serialize}; +use specta::Type; +use std::{path::PathBuf, sync::Arc}; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VolumeAddCloudInput { + pub service: CloudServiceType, + pub display_name: String, + pub config: CloudStorageConfig, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +#[serde(tag = "type")] +pub enum CloudStorageConfig { + S3 { + bucket: String, + region: String, + access_key_id: String, + secret_access_key: String, + endpoint: Option, + }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VolumeAddCloudAction { + input: VolumeAddCloudInput, +} + +impl VolumeAddCloudAction { + pub fn new(input: VolumeAddCloudInput) -> Self { + Self { input } + } +} + +impl LibraryAction for VolumeAddCloudAction { + type Input = VolumeAddCloudInput; + type Output = VolumeAddCloudOutput; + + fn from_input(input: VolumeAddCloudInput) -> Result { + Ok(VolumeAddCloudAction::new(input)) + } + + async fn execute( + self, + library: Arc, + context: Arc, + ) -> Result { + let device_id = context + .device_manager + .device_id() + .map_err(|e| ActionError::InvalidInput(format!("Failed to get device ID: {}", e)))?; + let library_id = library.id(); + + let (backend, credential, mount_point) = match &self.input.config { + CloudStorageConfig::S3 { + bucket, + region, + access_key_id, + secret_access_key, + endpoint, + } => { + let backend = CloudBackend::new_s3( + bucket, + region, + access_key_id, + secret_access_key, + endpoint.clone(), + ) + .await + .map_err(|e| ActionError::InvalidInput(format!("Failed to create S3 backend: {}", e)))?; + + let credential = CloudCredential::new_access_key( + CloudServiceType::S3, + access_key_id.clone(), + secret_access_key.clone(), + None, + ); + + let mount_point = PathBuf::from(format!("cloud://s3/{}", bucket)); + + (backend, credential, mount_point) + } + }; + + let fingerprint = VolumeFingerprint::new( + &self.input.display_name, + 0, // Cloud volumes don't have a fixed size + &format!("{:?}", self.input.service), + ); + + let backend_arc: Arc = Arc::new(backend); + + let volume = Volume { + fingerprint: fingerprint.clone(), + device_id, + name: self.input.display_name.clone(), + mount_type: crate::volume::types::MountType::Network, + volume_type: crate::volume::types::VolumeType::Network, + mount_point: mount_point.clone(), + mount_points: vec![mount_point], + is_mounted: true, + disk_type: crate::volume::types::DiskType::Unknown, + file_system: crate::volume::types::FileSystem::Other(format!("{:?}", self.input.service)), + total_bytes_capacity: 0, + total_bytes_available: 0, + read_only: false, + hardware_id: None, + error_status: None, + apfs_container: None, + container_volume_id: None, + path_mappings: Vec::new(), + backend: Some(backend_arc), + read_speed_mbps: None, + write_speed_mbps: None, + auto_track_eligible: false, + is_user_visible: true, + last_updated: chrono::Utc::now(), + }; + + let credential_manager = CloudCredentialManager::new(context.library_key_manager.clone()); + credential_manager + .store_credential(library_id, &fingerprint.0, &credential) + .map_err(|e| { + ActionError::InvalidInput(format!("Failed to store credentials: {}", e)) + })?; + + let tracked = context + .volume_manager + .track_volume(&library, &fingerprint, Some(self.input.display_name.clone())) + .await + .map_err(|e| ActionError::InvalidInput(format!("Volume tracking failed: {}", e)))?; + + Ok(VolumeAddCloudOutput::new( + fingerprint, + self.input.display_name, + self.input.service, + )) + } + + fn action_kind(&self) -> &'static str { + "volumes.add_cloud" + } +} + +crate::register_library_action!(VolumeAddCloudAction, "volumes.add_cloud"); diff --git a/core/src/ops/volumes/add_cloud/mod.rs b/core/src/ops/volumes/add_cloud/mod.rs new file mode 100644 index 000000000..9b1e5979f --- /dev/null +++ b/core/src/ops/volumes/add_cloud/mod.rs @@ -0,0 +1,7 @@ +//! Add cloud volume operation + +pub mod action; +pub mod output; + +pub use action::{CloudStorageConfig, VolumeAddCloudAction, VolumeAddCloudInput}; +pub use output::VolumeAddCloudOutput; diff --git a/core/src/ops/volumes/add_cloud/output.rs b/core/src/ops/volumes/add_cloud/output.rs new file mode 100644 index 000000000..056bb72dc --- /dev/null +++ b/core/src/ops/volumes/add_cloud/output.rs @@ -0,0 +1,22 @@ +//! Volume add cloud operation output types + +use crate::volume::{backend::CloudServiceType, VolumeFingerprint}; +use serde::{Deserialize, Serialize}; +use specta::Type; + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VolumeAddCloudOutput { + pub fingerprint: VolumeFingerprint, + pub volume_name: String, + pub service: CloudServiceType, +} + +impl VolumeAddCloudOutput { + pub fn new(fingerprint: VolumeFingerprint, volume_name: String, service: CloudServiceType) -> Self { + Self { + fingerprint, + volume_name, + service, + } + } +} diff --git a/core/src/ops/volumes/mod.rs b/core/src/ops/volumes/mod.rs index b0994d82b..e277a2e2d 100644 --- a/core/src/ops/volumes/mod.rs +++ b/core/src/ops/volumes/mod.rs @@ -3,11 +3,16 @@ //! This module provides operations for managing volumes in Spacedrive: //! - Tracking/untracking volumes in libraries //! - Speed testing volume performance +//! - Adding/removing cloud volumes +pub mod add_cloud; +pub mod remove_cloud; pub mod speed_test; pub mod track; pub mod untrack; +pub use add_cloud::{action::VolumeAddCloudAction, VolumeAddCloudOutput}; +pub use remove_cloud::{action::VolumeRemoveCloudAction, VolumeRemoveCloudOutput}; pub use speed_test::{action::VolumeSpeedTestAction, VolumeSpeedTestOutput}; pub use track::{action::VolumeTrackAction, VolumeTrackOutput}; pub use untrack::{action::VolumeUntrackAction, VolumeUntrackOutput}; diff --git a/core/src/ops/volumes/remove_cloud/action.rs b/core/src/ops/volumes/remove_cloud/action.rs new file mode 100644 index 000000000..eb72d1b87 --- /dev/null +++ b/core/src/ops/volumes/remove_cloud/action.rs @@ -0,0 +1,70 @@ +//! Remove cloud volume action +//! +//! This action removes a cloud storage volume from a library, deleting encrypted +//! credentials and untracking the volume. + +use super::output::VolumeRemoveCloudOutput; +use crate::{ + context::CoreContext, + crypto::cloud_credentials::CloudCredentialManager, + infra::action::{error::ActionError, LibraryAction}, + volume::VolumeFingerprint, +}; +use serde::{Deserialize, Serialize}; +use specta::Type; + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VolumeRemoveCloudInput { + pub fingerprint: VolumeFingerprint, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VolumeRemoveCloudAction { + input: VolumeRemoveCloudInput, +} + +impl VolumeRemoveCloudAction { + pub fn new(input: VolumeRemoveCloudInput) -> Self { + Self { input } + } +} + +impl LibraryAction for VolumeRemoveCloudAction { + type Input = VolumeRemoveCloudInput; + type Output = VolumeRemoveCloudOutput; + + fn from_input(input: VolumeRemoveCloudInput) -> Result { + Ok(VolumeRemoveCloudAction::new(input)) + } + + async fn execute( + self, + library: std::sync::Arc, + context: std::sync::Arc, + ) -> Result { + let library_id = library.id(); + + context + .volume_manager + .untrack_volume(&library, &self.input.fingerprint) + .await + .map_err(|e| ActionError::InvalidInput(format!("Volume untracking failed: {}", e)))?; + + let credential_manager = CloudCredentialManager::new(context.library_key_manager.clone()); + if let Err(e) = credential_manager.delete_credential(library_id, &self.input.fingerprint.0) { + tracing::warn!( + "Failed to delete credentials for volume {}: {}", + self.input.fingerprint.0, + e + ); + } + + Ok(VolumeRemoveCloudOutput::new(self.input.fingerprint)) + } + + fn action_kind(&self) -> &'static str { + "volumes.remove_cloud" + } +} + +crate::register_library_action!(VolumeRemoveCloudAction, "volumes.remove_cloud"); diff --git a/core/src/ops/volumes/remove_cloud/mod.rs b/core/src/ops/volumes/remove_cloud/mod.rs new file mode 100644 index 000000000..9fdb7a75e --- /dev/null +++ b/core/src/ops/volumes/remove_cloud/mod.rs @@ -0,0 +1,7 @@ +//! Remove cloud volume operation + +pub mod action; +pub mod output; + +pub use action::{VolumeRemoveCloudAction, VolumeRemoveCloudInput}; +pub use output::VolumeRemoveCloudOutput; diff --git a/core/src/ops/volumes/remove_cloud/output.rs b/core/src/ops/volumes/remove_cloud/output.rs new file mode 100644 index 000000000..5d8399ecc --- /dev/null +++ b/core/src/ops/volumes/remove_cloud/output.rs @@ -0,0 +1,16 @@ +//! Volume remove cloud operation output types + +use crate::volume::VolumeFingerprint; +use serde::{Deserialize, Serialize}; +use specta::Type; + +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +pub struct VolumeRemoveCloudOutput { + pub fingerprint: VolumeFingerprint, +} + +impl VolumeRemoveCloudOutput { + pub fn new(fingerprint: VolumeFingerprint) -> Self { + Self { fingerprint } + } +} diff --git a/core/src/volume/backend/mod.rs b/core/src/volume/backend/mod.rs index d4c398c13..21b020b5c 100644 --- a/core/src/volume/backend/mod.rs +++ b/core/src/volume/backend/mod.rs @@ -58,7 +58,7 @@ pub enum BackendType { } /// Cloud service type identifier -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize, specta::Type)] pub enum CloudServiceType { S3, GoogleDrive,