mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-05-18 13:26:00 -04:00
cleanup
This commit is contained in:
@@ -45,7 +45,7 @@ pub async fn boot_isolated_with_core(
|
||||
.save()
|
||||
.map_err(|e| anyhow::anyhow!("save bench config: {}", e))?;
|
||||
|
||||
let core = sd_core::Core::new_with_config(bench_data_dir.clone())
|
||||
let core = sd_core::Core::new(bench_data_dir.clone())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("init core: {}", e))?;
|
||||
let core = Arc::new(core);
|
||||
|
||||
@@ -56,7 +56,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
);
|
||||
}
|
||||
|
||||
let core = Core::new_with_config(data_dir.clone()).await?;
|
||||
let core = Core::new(data_dir.clone()).await?;
|
||||
println!(" Core initialized with job logging");
|
||||
println!(" Device ID: {}", core.device.device_id()?);
|
||||
println!(" Data directory: {:?}", data_dir);
|
||||
|
||||
@@ -39,7 +39,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!(" Job logging enabled");
|
||||
}
|
||||
|
||||
let core = Core::new_with_config(data_dir.clone()).await?;
|
||||
let core = Core::new(data_dir.clone()).await?;
|
||||
let job_logs_dir = data_dir.join("job_logs");
|
||||
println!(" Job logs directory: {:?}", job_logs_dir);
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// 1. Initialize core with custom data directory
|
||||
println!("1. Initializing Spacedrive Core...");
|
||||
let data_dir = PathBuf::from("./data/spacedrive-demo-data");
|
||||
let core = Core::new_with_config(data_dir.clone()).await?;
|
||||
let core = Core::new(data_dir.clone()).await?;
|
||||
println!(" ✓ Core initialized with data directory: {:?}", data_dir);
|
||||
println!(" ✓ Device UUID: {}", core.device.device_id()?);
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Setup test environment
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Create library
|
||||
println!("1. Creating library...");
|
||||
|
||||
@@ -25,7 +25,7 @@ async fn main() -> Result<()> {
|
||||
|
||||
// Initialize core
|
||||
let data_dir = PathBuf::from("./data/spacedrive-search-test");
|
||||
let core = Core::new_with_config(data_dir.clone())
|
||||
let core = Core::new(data_dir.clone())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to initialize core: {}", e))?;
|
||||
println!("✓ Core initialized");
|
||||
|
||||
@@ -13,7 +13,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Create Core instance
|
||||
let data_dir = std::env::temp_dir().join("spacedrive-shutdown-demo");
|
||||
let core = Core::new_with_config(data_dir).await?;
|
||||
let core = Core::new(data_dir).await?;
|
||||
|
||||
// Get open libraries
|
||||
let libraries = core.libraries.get_open_libraries().await;
|
||||
|
||||
@@ -16,7 +16,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Create Core instance
|
||||
let data_dir = std::env::temp_dir().join("spacedrive-simple-pause-resume");
|
||||
let core = Core::new_with_config(data_dir).await?;
|
||||
let core = Core::new(data_dir).await?;
|
||||
|
||||
// Get open libraries
|
||||
let libraries = core.libraries.get_open_libraries().await;
|
||||
|
||||
@@ -20,7 +20,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Initialize core (which includes volume manager)
|
||||
println!("1. Initializing Spacedrive Core with volume detection...");
|
||||
let data_dir = std::env::temp_dir().join("spacedrive-volume-demo");
|
||||
let core = Core::new_with_config(data_dir).await?;
|
||||
let core = Core::new(data_dir).await?;
|
||||
println!(" ✓ Core initialized");
|
||||
|
||||
// Get volume statistics
|
||||
|
||||
@@ -15,7 +15,7 @@ pub async fn start_default_server(
|
||||
initialize_tracing_with_file_logging(&data_dir)?;
|
||||
|
||||
// Create a single Core instance
|
||||
let mut core = Core::new_with_config(data_dir.clone())
|
||||
let mut core = Core::new(data_dir.clone())
|
||||
.await
|
||||
.map_err(|e| format!("Failed to create core: {}", e))?;
|
||||
|
||||
|
||||
143
core/src/lib.rs
143
core/src/lib.rs
@@ -43,73 +43,6 @@ use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Pending pairing request information
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PendingPairingRequest {
|
||||
pub request_id: uuid::Uuid,
|
||||
pub device_id: uuid::Uuid,
|
||||
pub device_name: String,
|
||||
pub received_at: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
/// Spacedrop request message
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
struct SpacedropRequest {
|
||||
transfer_id: uuid::Uuid,
|
||||
file_path: String,
|
||||
sender_name: String,
|
||||
message: Option<String>,
|
||||
file_size: u64,
|
||||
}
|
||||
/// Bridge between networking events and core events
|
||||
/// TODO: why? - james
|
||||
pub struct NetworkEventBridge {
|
||||
network_events: mpsc::UnboundedReceiver<service::network::NetworkEvent>,
|
||||
core_events: Arc<EventBus>,
|
||||
}
|
||||
|
||||
impl NetworkEventBridge {
|
||||
pub fn new(
|
||||
network_events: mpsc::UnboundedReceiver<service::network::NetworkEvent>,
|
||||
core_events: Arc<EventBus>,
|
||||
) -> Self {
|
||||
Self {
|
||||
network_events,
|
||||
core_events,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
while let Some(event) = self.network_events.recv().await {
|
||||
if let Some(core_event) = self.translate_event(event) {
|
||||
self.core_events.emit(core_event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_event(&self, event: service::network::NetworkEvent) -> Option<Event> {
|
||||
match event {
|
||||
service::network::NetworkEvent::ConnectionEstablished { device_id, .. } => {
|
||||
Some(Event::DeviceConnected {
|
||||
device_id,
|
||||
device_name: "Connected Device".to_string(),
|
||||
})
|
||||
}
|
||||
service::network::NetworkEvent::ConnectionLost { device_id, .. } => {
|
||||
Some(Event::DeviceDisconnected { device_id })
|
||||
}
|
||||
service::network::NetworkEvent::PairingCompleted {
|
||||
device_id,
|
||||
device_info,
|
||||
} => Some(Event::DeviceConnected {
|
||||
device_id,
|
||||
device_name: device_info.device_name,
|
||||
}),
|
||||
_ => None, // Some events don't map to core events
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main context for all core operations
|
||||
#[derive(Clone)]
|
||||
pub struct Core {
|
||||
@@ -143,48 +76,38 @@ pub struct Core {
|
||||
|
||||
impl Core {
|
||||
/// Initialize a new Core instance with custom data directory
|
||||
pub async fn new_with_config(data_dir: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::new_with_config_and_device_name(data_dir, None).await
|
||||
pub async fn new(data_dir: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
Self::new_with_device_name(data_dir, None).await
|
||||
}
|
||||
|
||||
/// Initialize a new Core instance with custom data directory and optional device name
|
||||
///
|
||||
/// This is primarily for mobile platforms (iOS, Android) where the device name
|
||||
/// should be provided by the native platform APIs (e.g., UIDevice.name on iOS)
|
||||
pub async fn new_with_config_and_device_name(
|
||||
pub async fn new_with_device_name(
|
||||
data_dir: PathBuf,
|
||||
device_name: Option<String>,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
info!("Initializing Spacedrive Core at {:?}", data_dir);
|
||||
info!("Initializing Spacedrive at {:?}", data_dir);
|
||||
|
||||
// Load or create app config
|
||||
info!("Loading app config...");
|
||||
let config = AppConfig::load_or_create(&data_dir)?;
|
||||
info!("App config loaded");
|
||||
|
||||
info!("Ensuring directories... GAYYY");
|
||||
config.ensure_directories()?;
|
||||
info!("Directories ensured");
|
||||
|
||||
let config = Arc::new(RwLock::new(config));
|
||||
|
||||
// Initialize device manager
|
||||
info!("Initializing device manager...");
|
||||
let device = Arc::new(DeviceManager::init_with_path_and_name(
|
||||
&data_dir,
|
||||
device_name,
|
||||
)?);
|
||||
info!("Device manager initialized");
|
||||
|
||||
// Set a global device ID for convenience
|
||||
info!("Setting current device ID...");
|
||||
crate::device::set_current_device_id(device.device_id()?);
|
||||
info!("Current device ID set");
|
||||
|
||||
// Create event bus
|
||||
info!("Creating event bus...");
|
||||
let events = Arc::new(EventBus::default());
|
||||
info!("Event bus created");
|
||||
|
||||
// Initialize volume manager
|
||||
let volume_config = VolumeDetectionConfig::default();
|
||||
@@ -363,19 +286,19 @@ impl Core {
|
||||
Err(e) => error!("Failed to start services: {}", e),
|
||||
}
|
||||
|
||||
// 12. Initialize ActionManager and set it in context
|
||||
//Initialize ActionManager and set it in context
|
||||
let action_manager = Arc::new(crate::infra::action::manager::ActionManager::new(
|
||||
context.clone(),
|
||||
));
|
||||
context.set_action_manager(action_manager).await;
|
||||
|
||||
// 13. Set up log event emitter
|
||||
// Set up log event emitter
|
||||
setup_log_event_emitter(events.clone());
|
||||
|
||||
// 14. Initialize API dispatcher
|
||||
// Initialize API dispatcher
|
||||
let api_dispatcher = ApiDispatcher::new(context.clone());
|
||||
|
||||
// 15. Initialize plugin manager (WASM extensions)
|
||||
// Initialize plugin manager (WASM extensions)
|
||||
let plugin_dir = data_dir.join("extensions");
|
||||
let _ = std::fs::create_dir_all(&plugin_dir); // Ensure directory exists
|
||||
|
||||
@@ -388,7 +311,6 @@ impl Core {
|
||||
// Set in context so jobs can access it
|
||||
context.set_plugin_manager(plugin_manager.clone()).await;
|
||||
|
||||
// 16. Emit startup event
|
||||
events.emit(Event::CoreStarted);
|
||||
|
||||
Ok(Self {
|
||||
@@ -631,3 +553,52 @@ fn setup_log_event_emitter(event_bus: Arc<crate::infra::event::EventBus>) {
|
||||
pub mod networking {
|
||||
pub use crate::service::network::*;
|
||||
}
|
||||
|
||||
/// Bridge between networking events and core events
|
||||
/// TODO: why? - james
|
||||
pub struct NetworkEventBridge {
|
||||
network_events: mpsc::UnboundedReceiver<service::network::NetworkEvent>,
|
||||
core_events: Arc<EventBus>,
|
||||
}
|
||||
|
||||
impl NetworkEventBridge {
|
||||
pub fn new(
|
||||
network_events: mpsc::UnboundedReceiver<service::network::NetworkEvent>,
|
||||
core_events: Arc<EventBus>,
|
||||
) -> Self {
|
||||
Self {
|
||||
network_events,
|
||||
core_events,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
while let Some(event) = self.network_events.recv().await {
|
||||
if let Some(core_event) = self.translate_event(event) {
|
||||
self.core_events.emit(core_event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_event(&self, event: service::network::NetworkEvent) -> Option<Event> {
|
||||
match event {
|
||||
service::network::NetworkEvent::ConnectionEstablished { device_id, .. } => {
|
||||
Some(Event::DeviceConnected {
|
||||
device_id,
|
||||
device_name: "Connected Device".to_string(),
|
||||
})
|
||||
}
|
||||
service::network::NetworkEvent::ConnectionLost { device_id, .. } => {
|
||||
Some(Event::DeviceDisconnected { device_id })
|
||||
}
|
||||
service::network::NetworkEvent::PairingCompleted {
|
||||
device_id,
|
||||
device_info,
|
||||
} => Some(Event::DeviceConnected {
|
||||
device_id,
|
||||
device_name: device_info.device_name,
|
||||
}),
|
||||
_ => None, // Some events don't map to core events
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ impl TestConfigBuilder {
|
||||
// Ensure the data directory exists
|
||||
std::fs::create_dir_all(&config.data_dir)?;
|
||||
|
||||
// Save the config so Core::new_with_config() will load our custom settings
|
||||
// Save the config so Core::new() will load our custom settings
|
||||
config.save()?;
|
||||
info!(
|
||||
"Created test configuration at: {} with custom settings",
|
||||
@@ -405,8 +405,8 @@ impl IntegrationTestSetup {
|
||||
self.data_dir().display()
|
||||
);
|
||||
|
||||
// Core::new_with_config() will load our saved AppConfig from disk
|
||||
let core = crate::Core::new_with_config(self.data_dir().clone()).await?;
|
||||
// Core::new() will load our saved AppConfig from disk
|
||||
let core = crate::Core::new(self.data_dir().clone()).await?;
|
||||
|
||||
// Verify our config was loaded correctly
|
||||
{
|
||||
|
||||
@@ -96,7 +96,7 @@ async fn test_copy_progress_monitoring_large_file() {
|
||||
|
||||
// Initialize core with custom data directory
|
||||
let core_data_dir = test_root.join("core_data");
|
||||
let core = Core::new_with_config(core_data_dir).await.unwrap();
|
||||
let core = Core::new(core_data_dir).await.unwrap();
|
||||
|
||||
// Create a test library
|
||||
let library = core
|
||||
@@ -397,7 +397,7 @@ async fn test_copy_progress_multiple_files() {
|
||||
|
||||
// Initialize core and library
|
||||
let core_data_dir = test_root.join("core_data");
|
||||
let core = Core::new_with_config(core_data_dir).await.unwrap();
|
||||
let core = Core::new(core_data_dir).await.unwrap();
|
||||
let library = core
|
||||
.libraries
|
||||
.create_library("Multi-file Progress Test", None, core.context.clone())
|
||||
|
||||
@@ -36,13 +36,10 @@ async fn alice_cross_device_copy_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Alice: Initializing Core...");
|
||||
let mut core = timeout(
|
||||
Duration::from_secs(10),
|
||||
Core::new_with_config(data_dir.clone()),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir.clone()))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
println!("Alice: Core initialized successfully");
|
||||
|
||||
// Set device name
|
||||
@@ -309,7 +306,7 @@ async fn bob_cross_device_copy_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Bob: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -30,7 +30,7 @@ async fn alice_pairing_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Alice: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -156,7 +156,7 @@ async fn bob_pairing_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Bob: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -37,7 +37,7 @@ async fn alice_persistence_scenario() {
|
||||
|
||||
// Initialize Core - this should load persisted devices
|
||||
println!("Alice: Initializing Core after restart...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -125,7 +125,7 @@ async fn alice_persistence_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Alice: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -246,7 +246,7 @@ async fn bob_persistence_scenario() {
|
||||
|
||||
// Initialize Core - this should load persisted devices
|
||||
println!("Bob: Initializing Core after restart...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -333,7 +333,7 @@ async fn bob_persistence_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Bob: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -62,7 +62,7 @@ async fn test_entry_metadata_preservation_on_move() {
|
||||
std::fs::create_dir_all(&data_dir).unwrap();
|
||||
println!("Created fresh data directory: {:?}", data_dir);
|
||||
|
||||
let core = Arc::new(Core::new_with_config(data_dir.clone()).await.unwrap());
|
||||
let core = Arc::new(Core::new(data_dir.clone()).await.unwrap());
|
||||
println!("Core initialized successfully");
|
||||
|
||||
// Create fresh library
|
||||
@@ -390,7 +390,7 @@ async fn test_child_entry_metadata_preservation_on_parent_move() {
|
||||
}
|
||||
std::fs::create_dir_all(&data_dir).unwrap();
|
||||
|
||||
let core = Arc::new(Core::new_with_config(data_dir.clone()).await.unwrap());
|
||||
let core = Arc::new(Core::new(data_dir.clone()).await.unwrap());
|
||||
|
||||
// Create fresh library
|
||||
let library = core
|
||||
|
||||
@@ -33,7 +33,7 @@ async fn test_core_and_library_events() -> Result<(), Box<dyn std::error::Error>
|
||||
let events_clone = collected_events.clone();
|
||||
|
||||
// Initialize core
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Note: CoreStarted is emitted during core initialization, so we won't catch it
|
||||
// Start collecting events from now on
|
||||
@@ -124,7 +124,7 @@ async fn test_core_and_library_events() -> Result<(), Box<dyn std::error::Error>
|
||||
#[tokio::test]
|
||||
async fn test_location_and_job_events() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Create library
|
||||
let library = core
|
||||
@@ -243,7 +243,7 @@ async fn test_location_and_job_events() -> Result<(), Box<dyn std::error::Error>
|
||||
#[tokio::test]
|
||||
async fn test_event_filtering() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Create two libraries
|
||||
let library1 = core
|
||||
@@ -310,7 +310,7 @@ async fn test_event_filtering() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_event_subscribers() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Create multiple subscribers
|
||||
let subscriber1_events = Arc::new(Mutex::new(Vec::new()));
|
||||
@@ -393,7 +393,7 @@ async fn test_concurrent_event_subscribers() -> Result<(), Box<dyn std::error::E
|
||||
#[tokio::test]
|
||||
async fn test_custom_events() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
let collected_events = Arc::new(Mutex::new(Vec::new()));
|
||||
let events_clone = collected_events.clone();
|
||||
|
||||
@@ -30,13 +30,10 @@ async fn alice_file_transfer_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Alice: Initializing Core...");
|
||||
let mut core = timeout(
|
||||
Duration::from_secs(10),
|
||||
Core::new_with_config(data_dir.clone()),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir.clone()))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
println!("Alice: Core initialized successfully");
|
||||
|
||||
// Set device name
|
||||
@@ -360,7 +357,7 @@ async fn bob_file_transfer_scenario() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Bob: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -23,7 +23,7 @@ use tokio::time::Duration;
|
||||
async fn test_location_indexing() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// 1. Setup test environment
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// 2. Create library
|
||||
let library = core
|
||||
@@ -246,7 +246,7 @@ async fn test_location_indexing() -> Result<(), Box<dyn std::error::Error>> {
|
||||
async fn test_incremental_indexing() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// 1. Setup
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
let library = core
|
||||
.libraries
|
||||
@@ -369,7 +369,7 @@ async fn test_incremental_indexing() -> Result<(), Box<dyn std::error::Error>> {
|
||||
#[tokio::test]
|
||||
async fn test_indexing_error_handling() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
let library = core
|
||||
.libraries
|
||||
|
||||
@@ -18,7 +18,7 @@ async fn test_jobs_paused_on_shutdown() -> Result<(), Box<dyn std::error::Error>
|
||||
let core_dir = temp_dir.path().join("core");
|
||||
tokio::fs::create_dir_all(&core_dir).await?;
|
||||
|
||||
let core = Core::new_with_config(core_dir).await?;
|
||||
let core = Core::new(core_dir).await?;
|
||||
|
||||
// Create library
|
||||
let library = core
|
||||
@@ -106,7 +106,7 @@ async fn test_jobs_paused_on_shutdown() -> Result<(), Box<dyn std::error::Error>
|
||||
async fn test_shutdown_with_no_running_jobs() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// This test ensures shutdown works correctly when no jobs are running
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
let library = core
|
||||
.libraries
|
||||
|
||||
@@ -9,9 +9,7 @@ async fn test_library_lifecycle() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
|
||||
// Initialize core with custom data directory
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// Create library (will be created in the libraries directory)
|
||||
let library = core
|
||||
@@ -82,9 +80,7 @@ async fn test_library_lifecycle() {
|
||||
#[tokio::test]
|
||||
async fn test_library_locking() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// Create library
|
||||
let library = core
|
||||
@@ -121,9 +117,7 @@ async fn test_library_locking() {
|
||||
#[tokio::test]
|
||||
async fn test_library_discovery() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// Create multiple libraries
|
||||
let lib1 = core
|
||||
@@ -164,9 +158,7 @@ async fn test_library_discovery() {
|
||||
#[tokio::test]
|
||||
async fn test_library_name_sanitization() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// Create library with problematic name
|
||||
let library = core
|
||||
@@ -188,9 +180,7 @@ async fn test_default_library_creation() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
|
||||
// Initialize core with fresh temporary directory (no existing libraries)
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// Check that a default library was created automatically
|
||||
let open_libraries = core.libraries.list().await;
|
||||
|
||||
@@ -169,7 +169,7 @@ impl TestHarness {
|
||||
|
||||
// Create core
|
||||
let temp_dir = TempDir::new()?;
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf()).await?;
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await?;
|
||||
|
||||
// Create library
|
||||
let library = core
|
||||
|
||||
@@ -30,7 +30,7 @@ async fn alice_relay_only_pairing() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Alice: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
@@ -182,7 +182,7 @@ async fn bob_relay_only_pairing() {
|
||||
|
||||
// Initialize Core
|
||||
println!("Bob: Initializing Core...");
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new_with_config(data_dir))
|
||||
let mut core = timeout(Duration::from_secs(10), Core::new(data_dir))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
@@ -11,7 +11,7 @@ async fn test_enhanced_pairing_code_with_relay_info() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut core = timeout(
|
||||
Duration::from_secs(10),
|
||||
Core::new_with_config(temp_dir.path().to_path_buf()),
|
||||
Core::new(temp_dir.path().to_path_buf()),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
@@ -66,7 +66,7 @@ async fn test_enhanced_pairing_codes_always_have_relay_info() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut core = timeout(
|
||||
Duration::from_secs(10),
|
||||
Core::new_with_config(temp_dir.path().to_path_buf()),
|
||||
Core::new(temp_dir.path().to_path_buf()),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
@@ -111,7 +111,7 @@ async fn test_relay_discovery_flow() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut core = timeout(
|
||||
Duration::from_secs(10),
|
||||
Core::new_with_config(temp_dir.path().to_path_buf()),
|
||||
Core::new(temp_dir.path().to_path_buf()),
|
||||
)
|
||||
.await
|
||||
.unwrap()
|
||||
|
||||
@@ -511,14 +511,14 @@ impl SyncTestSetup {
|
||||
config_b.save()?;
|
||||
|
||||
// Initialize Core A (will load config from disk with networking disabled)
|
||||
let core_a = Core::new_with_config(temp_dir_a.path().to_path_buf())
|
||||
let core_a = Core::new(temp_dir_a.path().to_path_buf())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let device_a_id = core_a.device.device_id()?;
|
||||
info!("️ Device A ID: {}", device_a_id);
|
||||
|
||||
// Initialize Core B
|
||||
let core_b = Core::new_with_config(temp_dir_b.path().to_path_buf())
|
||||
let core_b = Core::new(temp_dir_b.path().to_path_buf())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let device_b_id = core_b.device.device_id()?;
|
||||
@@ -1358,19 +1358,19 @@ async fn test_sync_transitive_three_devices() -> anyhow::Result<()> {
|
||||
config_c.save()?;
|
||||
|
||||
// Initialize cores
|
||||
let core_a = Core::new_with_config(temp_dir_a.path().to_path_buf())
|
||||
let core_a = Core::new(temp_dir_a.path().to_path_buf())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let device_a_id = core_a.device.device_id()?;
|
||||
info!("️ Device A ID: {}", device_a_id);
|
||||
|
||||
let core_b = Core::new_with_config(temp_dir_b.path().to_path_buf())
|
||||
let core_b = Core::new(temp_dir_b.path().to_path_buf())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let device_b_id = core_b.device.device_id()?;
|
||||
info!("️ Device B ID: {}", device_b_id);
|
||||
|
||||
let core_c = Core::new_with_config(temp_dir_c.path().to_path_buf())
|
||||
let core_c = Core::new(temp_dir_c.path().to_path_buf())
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("{}", e))?;
|
||||
let device_c_id = core_c.device.device_id()?;
|
||||
|
||||
@@ -49,7 +49,7 @@ async fn test_tagging_persists_to_database() {
|
||||
std::fs::create_dir_all(&data_dir).unwrap();
|
||||
|
||||
// Init Core and a fresh library
|
||||
let core = Arc::new(Core::new_with_config(data_dir.clone()).await.unwrap());
|
||||
let core = Arc::new(Core::new(data_dir.clone()).await.unwrap());
|
||||
let library = core
|
||||
.libraries
|
||||
.create_library(
|
||||
|
||||
@@ -25,7 +25,7 @@ async fn test_volume_tracking_lifecycle() {
|
||||
|
||||
// Initialize core - this handles all the setup automatically
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -221,7 +221,7 @@ async fn test_volume_tracking_multiple_libraries() {
|
||||
|
||||
// Initialize core - this handles all the setup automatically
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -406,7 +406,7 @@ async fn test_automatic_system_volume_tracking() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -467,7 +467,7 @@ async fn test_auto_tracking_disabled() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -552,7 +552,7 @@ async fn test_volume_state_updates() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -643,7 +643,7 @@ async fn test_volume_speed_test() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -722,7 +722,7 @@ async fn test_volume_types_and_properties() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -798,7 +798,7 @@ async fn test_volume_tracking_persistence() {
|
||||
|
||||
// Create core and library
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -883,7 +883,7 @@ async fn test_volume_tracking_persistence() {
|
||||
|
||||
// Create new core instance
|
||||
let core2 = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create second core"),
|
||||
);
|
||||
@@ -931,7 +931,7 @@ async fn test_volume_tracking_edge_cases() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -1046,7 +1046,7 @@ async fn test_volume_refresh_and_detection() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -1108,7 +1108,7 @@ async fn test_volume_monitor_service() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
|
||||
@@ -38,7 +38,7 @@ async fn test_real_volume_tracking_lifecycle() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -155,7 +155,7 @@ async fn test_different_filesystems() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -265,7 +265,7 @@ async fn test_volume_capacity_scenarios() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -377,7 +377,7 @@ async fn test_ram_disk_performance() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -474,7 +474,7 @@ async fn test_volume_mount_unmount_tracking() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
@@ -585,7 +585,7 @@ async fn test_concurrent_volume_operations() {
|
||||
let data_path = data_dir.path().to_path_buf();
|
||||
|
||||
let core = Arc::new(
|
||||
Core::new_with_config(data_path.clone())
|
||||
Core::new(data_path.clone())
|
||||
.await
|
||||
.expect("Failed to create core"),
|
||||
);
|
||||
|
||||
@@ -18,9 +18,7 @@ async fn test_load_wasm_extension() {
|
||||
|
||||
// 1. Initialize Core (same as other tests)
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
tracing::info!("Core initialized");
|
||||
|
||||
|
||||
@@ -18,9 +18,7 @@ async fn test_dispatch_wasm_job() {
|
||||
|
||||
// 1. Initialize Core
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let core = Core::new_with_config(temp_dir.path().to_path_buf())
|
||||
.await
|
||||
.unwrap();
|
||||
let core = Core::new(temp_dir.path().to_path_buf()).await.unwrap();
|
||||
|
||||
// 2. Get the default library that Core creates
|
||||
// (Avoids database migration issues in tests)
|
||||
|
||||
@@ -1,389 +0,0 @@
|
||||
# Photos Extension Architecture
|
||||
|
||||
## Overview
|
||||
|
||||
This extension demonstrates **every major SDK feature** through a real-world use case: intelligent photo management mirroring Apple Photos and Google Photos.
|
||||
|
||||
---
|
||||
|
||||
## Core Design Patterns
|
||||
|
||||
### Pattern 1: Core Does Generic, Extension Does Specialized
|
||||
|
||||
**Core extracts:**
|
||||
- EXIF metadata (camera, GPS, date)
|
||||
- Thumbnails (for quick preview)
|
||||
- Basic embeddings (for semantic search)
|
||||
|
||||
**Photos extension adds:**
|
||||
- Face detection (RetinaFace model)
|
||||
- Place clustering (from GPS + reverse geocoding)
|
||||
- Scene classification (ResNet50)
|
||||
- Aesthetic scoring
|
||||
- Person identification with clustering
|
||||
|
||||
### Pattern 2: On-Demand, User-Scoped Analysis
|
||||
|
||||
**Not automatic:**
|
||||
```rust
|
||||
// Photos does NOT analyze every screenshot automatically
|
||||
// User enables Photos on specific locations:
|
||||
// [x] /My Photos
|
||||
// [x] /Family Vacation 2025
|
||||
// [ ] /Work Documents (not relevant)
|
||||
```
|
||||
|
||||
**User triggers:**
|
||||
1. Install Photos extension
|
||||
2. Grant access to photo locations
|
||||
3. Click "Analyze for Faces" button
|
||||
4. Job processes photos in background
|
||||
5. Results appear progressively
|
||||
|
||||
### Pattern 3: Sidecar → Tags → Search
|
||||
|
||||
**Step 1: Detailed sidecar**
|
||||
```json
|
||||
// .sdlibrary/sidecars/content/{uuid}/extensions/photos/faces.json
|
||||
{
|
||||
"model_version": "retinaface_v1",
|
||||
"faces": [
|
||||
{
|
||||
"bbox": { "x": 0.2, "y": 0.3, "width": 0.1, "height": 0.15 },
|
||||
"confidence": 0.95,
|
||||
"embedding": [0.123, 0.456, ...], // 512 dims
|
||||
"identified_as": "person_uuid_123"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Searchable tags**
|
||||
```sql
|
||||
-- Core tags table
|
||||
INSERT INTO metadata_tag VALUES (photo_metadata_id, tag_id);
|
||||
-- Tag: "#person:Alice"
|
||||
```
|
||||
|
||||
**Step 3: User searches**
|
||||
```
|
||||
User types: "photos of alice"
|
||||
↓
|
||||
Query: tags LIKE '#person:alice'
|
||||
↓
|
||||
Results: All photos with Alice
|
||||
```
|
||||
|
||||
### Pattern 4: Enum-Based Memory for Multi-Domain Knowledge
|
||||
|
||||
```rust
|
||||
enum PhotoKnowledge {
|
||||
FaceCluster { person_id, embeddings, photos },
|
||||
PlaceCluster { place_id, center, photos },
|
||||
ScenePattern { scene_type, common_times, locations },
|
||||
}
|
||||
|
||||
// Single AssociativeMemory stores all three types
|
||||
// Enables queries like: "Find places where Alice appears often"
|
||||
knowledge
|
||||
.query()
|
||||
.where_variant(PhotoKnowledge::FaceCluster)
|
||||
.where_field("person_id", equals(alice_id))
|
||||
.and_related_concepts(PhotoKnowledge::PlaceCluster)
|
||||
.collect()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Flow
|
||||
|
||||
### User Adds 1000 Photos to Spacedrive
|
||||
|
||||
```
|
||||
1. Core Indexer runs (5 phases)
|
||||
- Discovery: Finds 1000 JPGs
|
||||
- Processing: Creates Entry records
|
||||
- Aggregation: Updates directory stats
|
||||
- Content ID: Generates CAS IDs
|
||||
- Analysis Queueing: Extracts EXIF, generates thumbnails
|
||||
↓
|
||||
2. Core emits Event::EntryCreated × 1000
|
||||
↓
|
||||
3. Photos agent receives events
|
||||
- Checks if photos are in granted scope
|
||||
- Adds to analysis queue (batches of 50)
|
||||
↓
|
||||
4. Agent dispatches analyze_photos_batch job
|
||||
- detect_faces_in_photo task × 1000 (parallel: 4)
|
||||
- Saves faces.json sidecars to VSS
|
||||
↓
|
||||
5. cluster_faces_into_people task
|
||||
- DBSCANgroups similar face embeddings
|
||||
- Creates/updates Person models
|
||||
↓
|
||||
6. generate_face_tags task
|
||||
- Reads faces.json sidecars
|
||||
- Writes tags to core tag system
|
||||
↓
|
||||
7. User searches "photos of alice"
|
||||
- Core tag query: tags LIKE '#person:alice'
|
||||
- Instant results
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Memory System Usage
|
||||
|
||||
### Temporal Memory (Event Timeline)
|
||||
|
||||
```rust
|
||||
history: TemporalMemory<PhotoEvent>
|
||||
|
||||
// Stores:
|
||||
PhotoEvent::PhotoAnalyzed { faces_detected: 2, scene_tags: ["beach"], ... }
|
||||
PhotoEvent::PersonIdentified { person_id, photo_id, confidence: 0.95 }
|
||||
PhotoEvent::MomentCreated { moment_id, photo_count: 45, date_range }
|
||||
|
||||
// Queries:
|
||||
memory.history
|
||||
.query()
|
||||
.where_variant(PhotoEvent::PhotoAnalyzed)
|
||||
.since(Duration::days(7))
|
||||
.where_field("scene_tags", contains("beach"))
|
||||
.collect()
|
||||
// → "Photos analyzed last week with beach scenes"
|
||||
```
|
||||
|
||||
### Associative Memory (Knowledge Graph)
|
||||
|
||||
```rust
|
||||
knowledge: AssociativeMemory<PhotoKnowledge>
|
||||
|
||||
// Stores:
|
||||
PhotoKnowledge::FaceCluster { person_id, embeddings, photo_ids }
|
||||
PhotoKnowledge::PlaceCluster { place_id, center, photos }
|
||||
PhotoKnowledge::ScenePattern { scene_type, typical_times }
|
||||
|
||||
// Queries:
|
||||
memory.knowledge
|
||||
.query_similar("vacation photos")
|
||||
.where_variant(PhotoKnowledge::PlaceCluster)
|
||||
.min_similarity(0.7)
|
||||
.top_k(10)
|
||||
// → "Places semantically similar to 'vacation photos'"
|
||||
|
||||
// Cross-domain:
|
||||
memory.knowledge
|
||||
.query()
|
||||
.where_field("person_id", equals(alice_id))
|
||||
.and_related_concepts(PhotoKnowledge::PlaceCluster)
|
||||
// → "Places where Alice frequently appears"
|
||||
```
|
||||
|
||||
### Working Memory (Current State)
|
||||
|
||||
```rust
|
||||
plan: WorkingMemory<AnalysisPlan>
|
||||
|
||||
// Stores:
|
||||
AnalysisPlan {
|
||||
pending_locations: ["/New Photos"],
|
||||
photos_needing_faces: [uuid1, uuid2, ...],
|
||||
moments_to_generate: [(start_date, end_date), ...]
|
||||
}
|
||||
|
||||
// Transactional updates:
|
||||
plan.update(|mut p| {
|
||||
p.photos_needing_faces.push(new_photo_id);
|
||||
Ok(p)
|
||||
}).await?
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## AI Model Integration
|
||||
|
||||
### Models Used
|
||||
|
||||
1. **Face Detection** (RetinaFace, 12MB)
|
||||
- Input: Image bytes
|
||||
- Output: Bounding boxes + 512-dim embeddings
|
||||
- Registered as: `face_detection:photos_v1`
|
||||
|
||||
2. **Scene Classification** (ResNet50 Places365, 95MB)
|
||||
- Input: Image bytes
|
||||
- Output: Scene probabilities (beach, sunset, indoor, etc.)
|
||||
- Registered as: `scene_classification:resnet50`
|
||||
|
||||
3. **LLM for Titles** (Llama 3 via Ollama, managed separately)
|
||||
- Input: Scene tags + location + date
|
||||
- Output: Creative moment title
|
||||
- Registered as: `llm:local`
|
||||
|
||||
### Registration Flow
|
||||
|
||||
```rust
|
||||
#[on_install]
|
||||
async fn install(ctx: &InstallContext) -> InstallResult<()> {
|
||||
// Register face detection
|
||||
ctx.models().register(
|
||||
"face_detection",
|
||||
"photos_v1",
|
||||
ModelSource::Download {
|
||||
url: "https://models.spacedrive.com/photos/face_v1.onnx",
|
||||
sha256: "abc123...",
|
||||
}
|
||||
).await?;
|
||||
|
||||
// Register scene classification
|
||||
ctx.models().register(
|
||||
"scene_classification",
|
||||
"resnet50",
|
||||
ModelSource::Download {
|
||||
url: "https://models.spacedrive.com/photos/scene_v1.onnx",
|
||||
sha256: "def456...",
|
||||
}
|
||||
).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
Models stored in: `~/.spacedrive/models/face_detection/photos_v1.onnx`
|
||||
|
||||
---
|
||||
|
||||
## Permission Scoping
|
||||
|
||||
### Extension Requests
|
||||
|
||||
```rust
|
||||
permissions = [
|
||||
Permission::ReadEntries, // Broad request
|
||||
Permission::WriteSidecars(kinds = ["faces", "places"]),
|
||||
Permission::WriteTags,
|
||||
Permission::UseModel(category = "face_detection"),
|
||||
]
|
||||
```
|
||||
|
||||
### User Grants & Scopes
|
||||
|
||||
```
|
||||
User during setup:
|
||||
┌───────────────────────────────────┐
|
||||
│ Photos Extension Permissions │
|
||||
├───────────────────────────────────┤
|
||||
│ ✓ Read image files │
|
||||
│ ✓ Detect faces (local AI) │
|
||||
│ ✓ Add tags │
|
||||
│ │
|
||||
│ Grant access to: │
|
||||
│ [x] /My Photos │
|
||||
│ [x] /Family Photos │
|
||||
│ [ ] /Work Documents │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Runtime Enforcement
|
||||
|
||||
Every WASM host function checks:
|
||||
1. Permission granted? (`WriteTags` ✓)
|
||||
2. Entry in scope? (`/My Photos/...` ✓)
|
||||
3. Execute or deny
|
||||
|
||||
---
|
||||
|
||||
## UI Integration
|
||||
|
||||
### Sidebar (from ui_manifest.json)
|
||||
|
||||
```
|
||||
Photos
|
||||
├── Library (photo_grid)
|
||||
├── Albums (album_grid)
|
||||
├── People (person_cluster_grid)
|
||||
├── Places (map_view)
|
||||
├── Moments (moment_timeline)
|
||||
└── Favorites (photo_grid filtered)
|
||||
```
|
||||
|
||||
### Context Menu
|
||||
|
||||
- Right-click photo → "Add to Album..."
|
||||
- Right-click face → "This is..."
|
||||
- Right-click album → "Set as Cover"
|
||||
|
||||
### Toolbar
|
||||
|
||||
- Location view → "Analyze for Faces" button
|
||||
- Location view → "Identify Places" button
|
||||
- Selection → "Create Moment"
|
||||
|
||||
---
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Smart Albums (Rule-Based)
|
||||
|
||||
```rust
|
||||
#[model]
|
||||
struct SmartAlbum {
|
||||
name: String,
|
||||
rules: Vec<AlbumRule>, // "scene:beach" AND "person:family"
|
||||
}
|
||||
|
||||
enum AlbumRule {
|
||||
HasTag(String),
|
||||
HasPerson(PersonId),
|
||||
AtPlace(PlaceId),
|
||||
DateRange(DateTime<Utc>, DateTime<Utc>),
|
||||
SceneType(String),
|
||||
}
|
||||
|
||||
// Automatically updates as photos are tagged
|
||||
```
|
||||
|
||||
### Memory-Based Suggestions
|
||||
|
||||
```rust
|
||||
#[query("suggest featured photos")]
|
||||
async fn suggest_featured(ctx: &QueryContext<PhotosMind>) -> QueryResult<Vec<Photo>> {
|
||||
let memory = ctx.memory().read().await;
|
||||
|
||||
// Find photos with:
|
||||
// - High aesthetic score
|
||||
// - Multiple people
|
||||
// - Taken at interesting places
|
||||
// - Not recently featured
|
||||
|
||||
let candidates = memory.history
|
||||
.query()
|
||||
.where_field("faces_detected", greater_than(2))
|
||||
.where_field("location", is_not_null())
|
||||
.since(Duration::days(365))
|
||||
.limit(100)
|
||||
.collect()
|
||||
.await?;
|
||||
|
||||
// Rank by diversity and quality
|
||||
let featured = rank_by_diversity(candidates);
|
||||
|
||||
Ok(featured)
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## This Extension Demonstrates
|
||||
|
||||
**Full SDK surface area** - All primitives used
|
||||
**Real-world complexity** - Matches commercial photo apps
|
||||
**Core/Extension separation** - Clear boundaries
|
||||
**User privacy** - Local processing, scoped access
|
||||
**Progressive enhancement** - Works with partial data
|
||||
**Durable operations** - All jobs resumable
|
||||
**Multi-device** - Sync face clusters via CRDT
|
||||
**AI-native** - Models, prompts, semantic search
|
||||
|
||||
**This is the reference implementation for the VDFS SDK.**
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
# Photos Extension Compilation Status
|
||||
|
||||
**Current Status:** ️ Does not compile (by design - aspirational reference)
|
||||
|
||||
---
|
||||
|
||||
## Purpose
|
||||
|
||||
The Photos extension is a **complete reference implementation** showing every SDK feature, not a working extension yet.
|
||||
|
||||
It demonstrates:
|
||||
- Content-scoped models
|
||||
- Standalone models
|
||||
- Agent with memory
|
||||
- Jobs and tasks
|
||||
- Actions and queries
|
||||
- AI integration
|
||||
- Complete architecture
|
||||
|
||||
**It's meant to guide implementation, not to run.**
|
||||
|
||||
---
|
||||
|
||||
## Known Issues
|
||||
|
||||
### Missing Derives
|
||||
|
||||
Models need Serialize/Deserialize:
|
||||
```rust
|
||||
#[derive(Serialize, Deserialize)] // Add this
|
||||
#[model]
|
||||
struct Photo { ... }
|
||||
```
|
||||
|
||||
### Missing Dependencies
|
||||
|
||||
```toml
|
||||
# Add to Cargo.toml
|
||||
tracing = "0.1"
|
||||
```
|
||||
|
||||
### Macro Limitations
|
||||
|
||||
Current macros are stubs - they don't generate full code yet:
|
||||
- `#[agent]` doesn't register handlers
|
||||
- `#[task]` doesn't handle retries
|
||||
- `#[action]` doesn't export FFI
|
||||
|
||||
### Async/Await Chains
|
||||
|
||||
Some method chains don't work as written - needs macro expansion.
|
||||
|
||||
---
|
||||
|
||||
## For Now
|
||||
|
||||
**Use test-extension as the working example:**
|
||||
```bash
|
||||
cd extensions/test-extension
|
||||
cargo build --target wasm32-unknown-unknown --release
|
||||
# This works!
|
||||
```
|
||||
|
||||
**Photos extension is aspirational** - shows what's possible when SDK is complete.
|
||||
|
||||
---
|
||||
|
||||
## To Make It Compile
|
||||
|
||||
Would need to:
|
||||
1. Add all missing derives
|
||||
2. Simplify agent implementation
|
||||
3. Remove unimplemented features
|
||||
4. Add tracing dependency
|
||||
|
||||
**But that defeats the purpose** - it's meant to show the complete API surface.
|
||||
|
||||
---
|
||||
|
||||
**Recommendation:** Keep Photos as reference, use test-extension for actual development.
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
# Photos Extension Refactoring - Complete
|
||||
|
||||
**Date:** October 11, 2025
|
||||
**Status:** Structure Complete - Ready for SDK Macro Implementation
|
||||
|
||||
## Summary
|
||||
|
||||
Successfully transformed the monolithic 1,054-line `lib.rs` into a clean, modular structure with **29 organized files** across 8 module directories. The extension now follows production-ready patterns and serves as the reference implementation for all future Spacedrive extensions.
|
||||
|
||||
## What Was Accomplished
|
||||
|
||||
### 1. Created Comprehensive Guideline Document
|
||||
**Location:** `/docs/sdk/EXTENSION_MODULE_STRUCTURE.md`
|
||||
|
||||
A complete guide covering:
|
||||
- Standard module structure for all extensions
|
||||
- Clear responsibilities for each module type (models, jobs, tasks, actions, queries, agents, utils)
|
||||
- Naming conventions and best practices
|
||||
- Anti-patterns to avoid with examples
|
||||
- Migration guide from monolithic code
|
||||
- Checklist for new extensions
|
||||
|
||||
### 2. Refactored Photos Extension
|
||||
|
||||
**Before:** 1 monolithic file (1,054 lines)
|
||||
**After:** 29 organized files across 8 directories
|
||||
|
||||
```
|
||||
src/
|
||||
├── lib.rs (47 lines) - Clean entry point
|
||||
├── config.rs - Extension configuration
|
||||
├── models/ (6 files)
|
||||
│ ├── photo.rs - Photo model + supporting types
|
||||
│ ├── person.rs - Person model + FaceDetection
|
||||
│ ├── place.rs - Place model
|
||||
│ ├── album.rs - Album model + AlbumType
|
||||
│ └── moment.rs - Moment model + MomentGroup
|
||||
├── jobs/ (6 files)
|
||||
│ ├── analyze.rs - Photo analysis batch job
|
||||
│ ├── clustering.rs - Face/place clustering
|
||||
│ ├── moments.rs - Moment generation
|
||||
│ ├── places.rs - Place identification
|
||||
│ └── scenes.rs - Scene analysis
|
||||
├── tasks/ (3 files)
|
||||
│ ├── detect_faces.rs - Face detection
|
||||
│ └── classify_scene.rs - Scene classification
|
||||
├── actions/ (4 files)
|
||||
│ ├── create_album.rs - Album creation
|
||||
│ ├── identify_person.rs - Person identification
|
||||
│ └── manage_album.rs - Album management
|
||||
├── queries/ (4 files)
|
||||
│ ├── search_person.rs - Person search
|
||||
│ ├── search_place.rs - Place search
|
||||
│ └── search_scene.rs - Scene search
|
||||
├── agent/ (3 files)
|
||||
│ ├── memory.rs - Memory definitions (PhotosMind, events, knowledge)
|
||||
│ └── handlers.rs - Event handlers and lifecycle
|
||||
└── utils/ (2 files)
|
||||
└── clustering.rs - Clustering algorithms
|
||||
```
|
||||
|
||||
### 3. Fixed SDK Type Issues
|
||||
|
||||
Added missing stubs to SDK:
|
||||
- Fixed async job `run()` method signature to accept futures
|
||||
- Added `check_interrupt()` async method alongside sync version
|
||||
- Added `JobResult` type imports
|
||||
- Implemented `ExtensionModel` trait for all models
|
||||
- Implemented `AgentMemory` trait for `PhotosMind`
|
||||
- Fixed type conversions and imports throughout
|
||||
|
||||
### 4. SDK Improvements Made
|
||||
|
||||
**In `/crates/sdk/src/`:**
|
||||
- `job_context.rs` - Fixed async task execution signature
|
||||
- `agent.rs` - Fixed agent context methods
|
||||
- Models now properly implement `ExtensionModel` trait
|
||||
- Memory types properly implement `AgentMemory` trait
|
||||
|
||||
## Compilation Status
|
||||
|
||||
### Type Checking: PASS
|
||||
All Rust type checking passes. The extension structure is sound.
|
||||
|
||||
### Current Errors: Proc Macros Only
|
||||
Remaining errors (93) are ALL from missing proc macro implementations:
|
||||
- `#[extension]`
|
||||
- `#[model]`
|
||||
- `#[job]`
|
||||
- `#[task]`
|
||||
- `#[action]` / `#[action_execute]`
|
||||
- `#[query]`
|
||||
- `#[agent]` / `#[agent_trail]` / `#[agent_memory]`
|
||||
- `#[on_startup]` / `#[on_event]`
|
||||
|
||||
**These are expected** - the macro crate (`crates/sdk-macros/`) needs full implementation.
|
||||
|
||||
### Progress Summary
|
||||
- **Started with:** 163 type errors + missing structure
|
||||
- **Ended with:** 0 type errors, clean modular structure
|
||||
- **Remaining:** Only proc macro implementations (SDK team task)
|
||||
|
||||
## Key Benefits Achieved
|
||||
|
||||
### 1. **Discoverability**
|
||||
- Clear module names make navigation intuitive
|
||||
- New developers can find functionality immediately
|
||||
- Logical grouping by responsibility
|
||||
|
||||
### 2. **Maintainability**
|
||||
- Average file size: ~50-100 lines
|
||||
- Single responsibility per file
|
||||
- Easy to locate and fix issues
|
||||
|
||||
### 3. **Scalability**
|
||||
- Simple to add new models, jobs, or actions
|
||||
- Can split further without breaking structure
|
||||
- No cognitive overload from massive files
|
||||
|
||||
### 4. **Collaboration**
|
||||
- Multiple developers can work without conflicts
|
||||
- Clear ownership boundaries
|
||||
- Easier code review
|
||||
|
||||
### 5. **Testing**
|
||||
- Each module can be tested independently
|
||||
- Clear interfaces between modules
|
||||
- Mock dependencies easily
|
||||
|
||||
## Files Created/Modified
|
||||
|
||||
### Documentation
|
||||
1. `/docs/sdk/EXTENSION_MODULE_STRUCTURE.md` - Complete guideline (NEW)
|
||||
2. `/extensions/photos/REFACTORING_SUMMARY.md` - Initial summary (NEW)
|
||||
3. `/extensions/photos/REFACTORING_COMPLETE.md` - This file (NEW)
|
||||
|
||||
### Photos Extension
|
||||
- `src/lib.rs` - Refactored to 47 lines
|
||||
- `src/config.rs` - NEW
|
||||
- `src/models/` - 6 NEW files
|
||||
- `src/jobs/` - 6 NEW files
|
||||
- `src/tasks/` - 3 NEW files
|
||||
- `src/actions/` - 4 NEW files
|
||||
- `src/queries/` - 4 NEW files
|
||||
- `src/agent/` - 3 NEW files
|
||||
- `src/utils/` - 2 NEW files
|
||||
|
||||
**Total:** 29 organized files vs 1 monolithic file
|
||||
|
||||
### SDK Improvements
|
||||
- `crates/sdk/src/job_context.rs` - Fixed async methods
|
||||
- `crates/sdk/src/agent.rs` - Fixed context methods
|
||||
- Multiple model files - Added `ExtensionModel` impls
|
||||
|
||||
## Next Steps (for SDK Team)
|
||||
|
||||
1. **Implement Proc Macros** in `crates/sdk-macros/`:
|
||||
- `#[extension]` - Parse extension metadata
|
||||
- `#[model]` - Generate model registration code
|
||||
- `#[job]` / `#[task]` - Generate job wrappers
|
||||
- `#[action]` / `#[action_execute]` - Generate action handlers
|
||||
- `#[query]` - Generate query handlers
|
||||
- `#[agent]` - Generate agent lifecycle code
|
||||
- `#[agent_memory]` - Generate memory initialization
|
||||
|
||||
2. **Complete WASM FFI** in SDK:
|
||||
- Implement WASM host function calls
|
||||
- Add proper error handling
|
||||
- Complete context method implementations
|
||||
|
||||
3. **Test Compilation** of photos extension after macros are done
|
||||
|
||||
4. **Use as Reference** for documentation and other extensions
|
||||
|
||||
## Verification
|
||||
|
||||
To verify the structure is sound:
|
||||
|
||||
```bash
|
||||
cd extensions/photos
|
||||
cargo check --lib 2>&1 | grep "error\[E" | wc -l
|
||||
# Should show 0 (all remaining errors are proc macros)
|
||||
```
|
||||
|
||||
To see current status:
|
||||
```bash
|
||||
cargo check 2>&1 | grep "cannot find attribute" | wc -l
|
||||
# Shows number of missing macro implementations
|
||||
```
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Guideline Created** - Comprehensive structure guide for all extensions
|
||||
**Photos Refactored** - Clean, modular, maintainable structure
|
||||
**SDK Stubs Added** - All type errors resolved
|
||||
**Ready for Macros** - Structure validated, waiting on proc macro implementation
|
||||
|
||||
The photos extension is now production-ready in structure and serves as the definitive reference implementation for Spacedrive extensions. All future extensions should follow this pattern.
|
||||
|
||||
**Next milestone:** Complete proc macro implementation in `crates/sdk-macros/`
|
||||
@@ -1,150 +0,0 @@
|
||||
# Photos Extension Refactoring Summary
|
||||
|
||||
**Date:** October 11, 2025
|
||||
**Status:** Structure Complete - SDK Implementation Pending
|
||||
|
||||
## What Was Done
|
||||
|
||||
### 1. Created Module Structure Guideline
|
||||
- New document: `/docs/sdk/EXTENSION_MODULE_STRUCTURE.md`
|
||||
- Comprehensive guide for all future extensions
|
||||
- Defines standard patterns for models, jobs, tasks, actions, queries, agents, and utils
|
||||
- Includes naming conventions, anti-patterns, and migration guide
|
||||
|
||||
### 2. Refactored Photos Extension
|
||||
|
||||
Transformed monolithic `lib.rs` (1,054 lines) into organized module structure:
|
||||
|
||||
```
|
||||
src/
|
||||
├── lib.rs # 47 lines - clean entry point
|
||||
├── config.rs # 20 lines - configuration
|
||||
├── models/
|
||||
│ ├── mod.rs
|
||||
│ ├── photo.rs # Photo model + ExifData, GpsCoordinates, SceneTag
|
||||
│ ├── person.rs # Person model + FaceDetection, BoundingBox
|
||||
│ ├── place.rs # Place model
|
||||
│ ├── album.rs # Album model + AlbumType
|
||||
│ └── moment.rs # Moment model + MomentGroup
|
||||
├── jobs/
|
||||
│ ├── mod.rs
|
||||
│ ├── analyze.rs # Photo analysis job
|
||||
│ ├── clustering.rs # Face/place clustering
|
||||
│ ├── moments.rs # Moment generation
|
||||
│ ├── places.rs # Place identification
|
||||
│ └── scenes.rs # Scene analysis
|
||||
├── tasks/
|
||||
│ ├── mod.rs
|
||||
│ ├── detect_faces.rs # Face detection task
|
||||
│ └── classify_scene.rs # Scene classification
|
||||
├── actions/
|
||||
│ ├── mod.rs
|
||||
│ ├── create_album.rs # Album creation
|
||||
│ ├── identify_person.rs # Person identification
|
||||
│ └── manage_album.rs # Album management
|
||||
├── queries/
|
||||
│ ├── mod.rs
|
||||
│ ├── search_person.rs # Person search
|
||||
│ ├── search_place.rs # Place search
|
||||
│ └── search_scene.rs # Scene search
|
||||
├── agent/
|
||||
│ ├── mod.rs
|
||||
│ ├── memory.rs # PhotosMind + memory types
|
||||
│ └── handlers.rs # Event handlers and lifecycle
|
||||
└── utils/
|
||||
├── mod.rs
|
||||
└── clustering.rs # Pure clustering algorithms
|
||||
```
|
||||
|
||||
## Benefits of New Structure
|
||||
|
||||
### Discoverability
|
||||
- Clear separation makes it easy to find specific functionality
|
||||
- New developers can navigate the codebase intuitively
|
||||
- Module names directly reflect their purpose
|
||||
|
||||
### Maintainability
|
||||
- Each file has a single responsibility
|
||||
- Changes to one feature don't affect others
|
||||
- Easier to review and test individual components
|
||||
|
||||
### Scalability
|
||||
- Easy to add new jobs, actions, or queries
|
||||
- Can split further when files grow too large
|
||||
- Follows DRY principle naturally
|
||||
|
||||
### Team Collaboration
|
||||
- Multiple developers can work on different modules without conflicts
|
||||
- Clear boundaries reduce merge conflicts
|
||||
- Easier to assign ownership of specific areas
|
||||
|
||||
## Compilation Status
|
||||
|
||||
Current state: **Structure complete, SDK implementation pending**
|
||||
|
||||
The refactored code has compilation errors due to:
|
||||
1. SDK types not fully implemented (placeholders exist)
|
||||
2. Missing trait implementations in SDK
|
||||
3. Incomplete core integration points
|
||||
|
||||
These are **expected** and will resolve as the SDK implementation progresses. The module structure is production-ready and follows best practices.
|
||||
|
||||
## Key Principles Applied
|
||||
|
||||
1. **Separation of Concerns** - Each module handles distinct responsibility
|
||||
2. **Flat Structure** - Avoided deep nesting for discoverability
|
||||
3. **Convention Over Configuration** - Predictable patterns throughout
|
||||
4. **SDK Alignment** - Structure mirrors SDK primitives (models, jobs, actions, agents)
|
||||
|
||||
## Migration from Old Structure
|
||||
|
||||
### Before (lib.rs - 1,054 lines)
|
||||
- All models mixed together
|
||||
- Jobs and tasks interleaved
|
||||
- Agent logic scattered
|
||||
- Hard to find specific functionality
|
||||
- Difficult to review changes
|
||||
|
||||
### After (modular - ~50 lines per file avg)
|
||||
- Clear module boundaries
|
||||
- Related types grouped logically
|
||||
- Agent split into memory + handlers
|
||||
- Easy navigation
|
||||
- Reviewable file sizes
|
||||
|
||||
## Next Steps (for SDK team)
|
||||
|
||||
1. Complete SDK trait implementations
|
||||
2. Implement missing context methods
|
||||
3. Add proper error types
|
||||
4. Complete AI model integration
|
||||
5. Test compilation with full SDK
|
||||
|
||||
## Usage for Future Extensions
|
||||
|
||||
All new extensions should follow the structure defined in:
|
||||
- `/docs/sdk/EXTENSION_MODULE_STRUCTURE.md`
|
||||
|
||||
This photos extension serves as the reference implementation.
|
||||
|
||||
## Files Created
|
||||
|
||||
### Documentation
|
||||
- `/docs/sdk/EXTENSION_MODULE_STRUCTURE.md` - Complete guideline
|
||||
|
||||
### Photos Extension Modules
|
||||
- `src/lib.rs` - Refactored entry point
|
||||
- `src/config.rs` - Configuration
|
||||
- `src/models/` - 6 files (mod + 5 models)
|
||||
- `src/jobs/` - 6 files (mod + 5 job groups)
|
||||
- `src/tasks/` - 3 files (mod + 2 tasks)
|
||||
- `src/actions/` - 4 files (mod + 3 actions)
|
||||
- `src/queries/` - 4 files (mod + 3 queries)
|
||||
- `src/agent/` - 3 files (mod + memory + handlers)
|
||||
- `src/utils/` - 2 files (mod + clustering)
|
||||
|
||||
**Total:** 29 organized, focused files vs. 1 monolithic file
|
||||
|
||||
## Conclusion
|
||||
|
||||
The photos extension now follows production-ready patterns that will scale with the project. The structure is clean, maintainable, and serves as the reference for all future extensions. Compilation errors are expected at this stage and will resolve as the SDK matures.
|
||||
@@ -1,147 +0,0 @@
|
||||
### Overall Miscellaneous Feedback
|
||||
|
||||
Before diving into specific suggestions, here's some high-level feedback on the paper as a whole:
|
||||
|
||||
- **Strengths**: This is a well-structured, comprehensive whitepaper that effectively balances technical depth with accessibility. The use of key takeaways boxes, figures, tables, and code snippets makes it engaging and easy to follow. The evolution from v1 section is refreshingly honest, building credibility. The architecture feels innovative yet grounded, with clear ties to prior work (e.g., local-first principles). The production-ready focus (Rust implementation, benchmarks) validates the claims effectively.
|
||||
|
||||
- **Areas for Improvement**:
|
||||
- **Length and Pacing**: At ~20,000 words, it's dense; some sections (e.g., Architecture) could be streamlined by moving tangential details (like detailed code examples) to appendices. This would improve readability without losing depth.
|
||||
- **Consistency**: Terminology like "VDFS" is used consistently, but acronyms (e.g., CRDT) are sometimes introduced late. Ensure all are defined on first use. Also, \planned markers are useful for transparency but could be consolidated into a single "Future Work" subsection to avoid disrupting flow.
|
||||
- **Visuals**: Figures and tables are excellent, but some (e.g., Fig. 1) reference external images not provided; ensure they're embedded or described better. More diagrams for complex flows (e.g., AI agentic loop) would help.
|
||||
- **Citations and Evidence**: Good use of references, but add more empirical data (e.g., user studies on AI usability) or comparisons to recent systems (e.g., 2024-2025 updates to IPFS or Alluxio). Benchmarks are strong but could include error bars or hardware specs for reproducibility.
|
||||
- **Tone and Audience**: Assumes a technical audience (e.g., Rust code), but executive summary caters to non-technical readers—lean into this duality more. Avoid hype (e.g., "reimagines data management") unless backed by unique claims.
|
||||
- **Diversity and Ethics**: Briefly mention ethical AI use (e.g., bias in semantic search) or accessibility (e.g., UI for visually impaired users) to broaden appeal.
|
||||
- **Typos/Grammar**: Minor issues like "dataspace" (inconsistent spacing), "sub-100ms" (hyphenate consistently), and "O(1)" (use math mode \O(1)). Run through a spellchecker/LaTeX linter.
|
||||
- **Future-Proofing**: Given the 2025 date, emphasize modularity (e.g., model-agnostic AI) to handle evolving tech like quantum-safe crypto.
|
||||
|
||||
Now, here's a curated list of 10 specific improvements. I've prioritized impactful ones across sections, providing LaTeX changes where applicable, and deep explanations for why they matter. Changes are numbered for reference.
|
||||
|
||||
### 1. **Improve Abstract Clarity and Conciseness**
|
||||
|
||||
- **Specific LaTeX Change**:
|
||||
|
||||
```
|
||||
\begin{abstract}
|
||||
Data fragmentation across devices and clouds hinders cohesive file management. Spacedrive addresses this with a local-first~\cite{kleppmann_localfirst_2019}, AI-native Virtual Distributed File System (VDFS) that unifies data views while preserving original file locations. Unlike cloud-centric alternatives, it operates offline, ensures privacy, and scales from individuals to enterprises.
|
||||
|
||||
Core features include a comprehensive data index for instant search, automatic deduplication, and safe cross-device operations. This index powers an AI layer supporting natural language queries (e.g., ``find tax documents from last year'') and intelligent assistance, all processed locally.
|
||||
|
||||
This paper details Spacedrive V2's architecture, highlighting innovations like content-aware addressing, transactional previews, and consensus-free synchronization. The Content Identity system enables deduplication and redundancy protection, while AI integration provides semantic search and data guardianship. We demonstrate flexibility via a cloud implementation where backends function as standard P2P devices, blurring client-server distinctions.
|
||||
\end{abstract}
|
||||
```
|
||||
|
||||
- **Deep Explanation**: The original abstract is strong but slightly repetitive (e.g., "local-first" and "privacy" mentioned twice) and could be tightened to ~150 words for better impact in academic/conference settings. Abstracts should hook readers immediately with the problem, solution, and unique contributions. This revision condenses without losing key points, improves flow by grouping features logically, and ends with a forward-looking hook on cloud integration. Why? Concise abstracts increase citation potential and respect readers' time; per ACM guidelines, they should avoid jargon overload while teasing innovations.
|
||||
|
||||
### 2. **Add a Dedicated "Limitations" Subsection in Conclusion**
|
||||
|
||||
- **Specific LaTeX Change**: Add after the "Future Work and Roadmap" paragraph in Section 10:
|
||||
|
||||
```
|
||||
\subsection{Limitations}
|
||||
While Spacedrive advances personal data management, it has boundaries. The single-device database model limits scalability beyond 100M files without sharding, potentially constraining extreme enterprise use. Mobile resource constraints may delay background indexing on low-power devices. The AI layer, while privacy-focused, requires capable hardware for local models, and cloud alternatives introduce latency. Finally, while offline-first, initial setup requires internet for device pairing in distributed scenarios.
|
||||
|
||||
These limitations inform our roadmap, ensuring future iterations maintain core principles while expanding capabilities.
|
||||
```
|
||||
|
||||
- **Deep Explanation**: Whitepapers often gloss over weaknesses, but acknowledging limitations builds trust and scientific rigor (e.g., per IEEE/ACM standards). The paper mentions scalability indirectly in benchmarks but doesn't consolidate drawbacks. This addition provides balanced self-critique, prevents reader skepticism, and ties back to the roadmap—showing proactive thinking. Why deeply? It demonstrates maturity, invites collaboration (e.g., community contributions on sharding), and aligns with ethical AI disclosure (e.g., hardware dependencies could exacerbate digital divides).
|
||||
|
||||
### 3. **Standardize Code Listing Styles**
|
||||
|
||||
- **Specific LaTeX Change**: In the preamble, update the Rust style definition and apply consistently:
|
||||
```
|
||||
\lstdefinestyle{ruststyle}{
|
||||
backgroundcolor=\color{backcolour},
|
||||
commentstyle=\color{codegreen},
|
||||
keywordstyle=\color{keywordblue}\bfseries, % Add bold for keywords
|
||||
numberstyle=\tiny\color{codegray},
|
||||
stringstyle=\color{codepurple},
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
breakatwhitespace=false,
|
||||
breaklines=true,
|
||||
captionpos=b,
|
||||
keepspaces=true,
|
||||
numbers=left,
|
||||
numbersep=5pt,
|
||||
showspaces=false,
|
||||
showstringspaces=false,
|
||||
showtabs=false,
|
||||
tabsize=2,
|
||||
frame=single,
|
||||
rulecolor=\color{black!30},
|
||||
language=Rust % Explicitly set language for highlighting
|
||||
}
|
||||
```
|
||||
Then, for all Rust listings, use `\lstset{style=ruststyle}`.
|
||||
- **Deep Explanation**: Current listings are functional but inconsistent (e.g., some lack bold keywords, making code harder to scan). Adding \bfseries to keywords improves readability, as bold distinguishes control flow in dense Rust snippets. Explicit language setting ensures proper syntax highlighting. Why? Code is central to the paper's credibility; poor formatting can undermine perceived professionalism. In technical papers, consistent visuals aid comprehension, reduce cognitive load, and follow best practices from ACM templates.
|
||||
|
||||
### 4. **Enhance Table \ref{tab:comparison} with More Metrics**
|
||||
|
||||
- **Specific LaTeX Change**: Update the table in Section 2.7:
|
||||
```
|
||||
\begin{table*}[ht]
|
||||
\centering
|
||||
\begin{tabular}{@{}llllll@{}}
|
||||
\toprule
|
||||
\textbf{System} & \textbf{Architecture} & \textbf{Target Users} & \textbf{Key Innovation} & \textbf{Primary Limitation} & \textbf{Privacy Model} \\
|
||||
\midrule
|
||||
Dropbox/iCloud & Client-Server & Consumers & Simple sync & No content addressing, vendor lock-in & Cloud-centralized \\
|
||||
IPFS & P2P DHT & Developers & Content addressing & Complex for consumers, no AI & Public by default \\
|
||||
Ceph & Distributed cluster & Enterprises & Scalable storage & Datacenter-focused, high overhead & Configurable \\
|
||||
Alluxio & Memory-centric VDFS & Analytics teams & Unified data access & Not for personal files & Enterprise-managed \\
|
||||
Nextcloud & Self-hosted server & Tech-savvy users & Data sovereignty & Requires dedicated server & Self-hosted private \\
|
||||
\textbf{Spacedrive} & \textbf{Local-first P2P} & \textbf{Everyone} & \textbf{AI-native VDFS} & \textbf{Higher resource usage than simple browsers} & \textbf{Local-first E2E} \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\caption{Comparison of Spacedrive with existing systems, expanded with privacy models for completeness.}
|
||||
\label{tab:comparison}
|
||||
\end{table*}
|
||||
```
|
||||
- **Deep Explanation**: The original table is good but misses a key differentiator: privacy, which is central to Spacedrive's value prop. Adding this column provides a holistic view, emphasizing local-first advantages. Why? Tables should facilitate quick comparisons; expanding to include a core theme (privacy) strengthens the paper's narrative without overwhelming the layout. It also addresses potential reviewer questions on how Spacedrive stands out in privacy-conscious eras (e.g., post-GDPR).
|
||||
|
||||
### 5. **Clarify AI Privacy in Section \ref{sec:ai-native}**
|
||||
|
||||
- **Specific LaTeX Change**: Add a paragraph after "Privacy-First AI Architecture":
|
||||
```
|
||||
\paragraph{Ethical Considerations}
|
||||
While model-agnostic, Spacedrive prioritizes ethical AI use. Local models mitigate bias by training on user data only, but users are notified of potential limitations (e.g., underrepresented demographics in embeddings). Cloud options include opt-out for sensitive files, ensuring compliance with regulations like GDPR.
|
||||
```
|
||||
- **Deep Explanation**: The section mentions privacy but lacks discussion of ethics/bias, which is crucial for AI-native systems. This addition addresses real concerns (e.g., biased embeddings in semantic search could misclassify diverse content). Why deeply? AI papers face scrutiny on ethics; proactively covering this builds trust, aligns with ACM ethics guidelines, and differentiates Spacedrive as responsible tech. It also ties to the local-first philosophy, reinforcing user control.
|
||||
|
||||
### 6. **Fix Inconsistent Citation Formatting**
|
||||
|
||||
- **Specific LaTeX Change**: Ensure all citations use ~\cite{...} for non-breaking spaces, e.g., change "local-first~\cite{kleppmann_localfirst_2019}" to consistent style throughout. Also, add missing citations, e.g., in benchmarks: ~\cite{internal-benchmarks-2025}.
|
||||
- **Deep Explanation**: Some citations lack tildes, risking line breaks (e.g., "local-first \cite{...}"). Adding placeholders for internal data ensures traceability. Why? Proper formatting prevents typesetting errors in final PDF; citations ground claims in evidence, improving academic integrity and allowing readers to verify (e.g., benchmark methodology).
|
||||
|
||||
### 7. **Streamline Executive Summary Bullet Lists**
|
||||
|
||||
- **Specific LaTeX Change**: In the executive summary, change itemize to use \itemsep=0pt for tighter spacing:
|
||||
```
|
||||
\begin{itemize}[noitemsep, topsep=0pt]
|
||||
\item \textbf{Universal Access}: A single interface to manage files across all devices and clouds
|
||||
\item \textbf{AI-Powered Intelligence}: Natural language commands and proactive data protection
|
||||
\item \textbf{Zero Vendor Lock-in}: Files remain in their original locations with full portability
|
||||
\item \textbf{Complete Privacy}: All processing happens locally with no data leaving your control
|
||||
\end{itemize}
|
||||
```
|
||||
- **Deep Explanation**: Original lists have extra spacing, making the summary feel bloated. Tightening improves scannability for executives skimming. Why? Summaries should be punchy; visual density affects engagement. This aligns with design principles in the paper (e.g., efficiency), mirroring the system's own optimizations.
|
||||
|
||||
### 8. **Add Cross-References to Glossary**
|
||||
|
||||
- **Specific LaTeX Change**: In preamble, add \usepackage{glossaries} and define terms. Then, reference in text, e.g., "SdPath\gls{sdpath}".
|
||||
- **Deep Explanation**: The glossary is appended but not linked in-text, reducing usability. Hyperlinks/glossaries aid navigation in long docs. Why? Technical papers benefit from interactive elements; this eases onboarding for non-experts while maintaining depth for pros.
|
||||
|
||||
### 9. **Update Performance Table with Variability**
|
||||
|
||||
- **Specific LaTeX Change**: In Table \ref{tab:performance}, add ± std dev:
|
||||
```
|
||||
\quad Internal NVMe SSD & 8,500 \pm 200 & files/sec \\
|
||||
```
|
||||
- **Deep Explanation**: Raw medians lack context on variability. Adding std dev shows reliability. Why? Benchmarks must be reproducible; this enhances scientific validity and addresses potential critiques on testing conditions.
|
||||
|
||||
### 10. **Strengthen Conclusion with Call to Action**
|
||||
|
||||
- **Specific LaTeX Change**: Add at end of Conclusion:
|
||||
```
|
||||
We invite researchers, developers, and users to contribute to Spacedrive's open-source ecosystem at \url{https://github.com/spacedriveapp/spacedrive}, advancing the future of personal data management.
|
||||
```
|
||||
- **Deep Explanation**: Conclusions often end abruptly; a CTA encourages engagement. Why? Whitepapers aim to inspire action; this fosters community, aligns with open-source ethos, and positions the work as collaborative rather than final.
|
||||
@@ -1,104 +0,0 @@
|
||||
Overview and Summary
|
||||
Your draft whitepaper on "Spacedrive: Architecture of a Content-Aware Virtual File System" presents an ambitious vision for a local-first, AI-native Virtual Distributed File System (VDFS) that unifies data across devices, clouds, and storage types without centralizing files. Authored by James Mathew Pine, it's positioned as a conference paper for "Spacedrive '25" in Vancouver, BC, dated July 26, 2025. The document spans 38 pages covering abstract, market opportunity, architectural details, comparisons, security, extensibility, and conclusions.
|
||||
The core idea is compelling: Spacedrive acts as a "data orchestrator" that indexes files in place, enables semantic search, automatic deduplication, cross-device sync, and AI-driven features like proactive organization and backups—all while emphasizing offline functionality, privacy, and scalability from personal to enterprise use. It draws on concepts like content-addressing (inspired by IPFS), local-first principles (citing Kleppmann et al.), and modern Rust-based implementation.
|
||||
However, the draft needs significant editing and proofreading to improve readability and professionalism. I'll break it down into strengths, weaknesses, detailed critiques by section, and recommendations.
|
||||
Strengths
|
||||
|
||||
Innovative Concept and Technical Depth: The paper effectively articulates a solution to real-world problems like data fragmentation and privacy concerns in cloud storage. Key innovations—such as SdPath for unified addressing, content-aware deduplication, domain-separated sync, and the AI agentic loop—are well-explained with practical examples (e.g., Alice's workflow). The architecture feels modern, leveraging Rust, SQLite, Iroh (for P2P), and Ollama (for local AI), which aligns with current trends in decentralized systems.
|
||||
Comprehensive Coverage: It covers a broad spectrum: from low-level components (e.g., indexing scopes, action previews) to high-level features (e.g., enterprise RBAC, temporal-semantic search). Tables (e.g., comparisons with rclone/Dropbox, performance benchmarks) and figures (e.g., system architecture) add value, making abstract ideas concrete. The glossary in the appendix is a nice touch for accessibility.
|
||||
Market and Practical Focus: The market opportunity section ($100B cloud market, targeting knowledge workers/creatives) grounds the technical discussion in business relevance. Investment highlights and roadmap show forward-thinking, blending academic rigor with startup pitch elements.
|
||||
Privacy and Security Emphasis: The "defense in depth" model (e.g., SQLCipher encryption, zero-knowledge cloud) is a standout, with realistic attack scenarios demonstrating robustness. This resonates in a post-GDPR era.
|
||||
Local-First Philosophy: Citing [8] (likely Kleppmann's local-first work), the paper convincingly argues for offline-first design, which differentiates it from cloud-centric competitors.
|
||||
|
||||
Weaknesses
|
||||
|
||||
Editing and Formatting Issues: The document requires thorough proofreading to address inconsistencies in formatting, dates, and author name variations. Some content appears repetitive or misplaced, and the overall presentation needs improvement for professional publication standards.
|
||||
Structure and Flow Problems: While organized into sections (abstract, architecture, security, etc.), transitions are abrupt, and some content feels repetitive or misplaced (e.g., marketing in a technical paper). Some ideas appear incomplete, and references are partial ([1] is cut off).
|
||||
Technical Inaccuracies and Gaps: Some claims lack substantiation or seem overstated. For instance:
|
||||
|
||||
"90%+ NAT traversal success" via Iroh—plausible but needs benchmarks or citations.
|
||||
Performance metrics (e.g., 8,500 files/sec indexing on NVMe) are impressive but not contextualized (e.g., compared to real tools like fd or ripgrep).
|
||||
AI features (e.g., agentic loop for proactive suggestions) are visionary but vague on implementation challenges like model fine-tuning or edge-case handling.
|
||||
References are incomplete or generic (e.g., [6] for semantic file systems—specify Gifford et al.?), and some are missing (e.g., no full bibliography).
|
||||
|
||||
Overly Ambitious or Speculative: Features like "enterprise-grade RBAC" are marked as "planned," diluting the "production system" claim. Scalability to "multi-petabyte deployments" is asserted without evidence of testing at that scale. The paper mixes v1 lessons with v2 solutions, but v1's "abandoned dependencies" critique feels like internal venting rather than objective analysis.
|
||||
Writing and Clarity: Prose is dense and jargon-heavy, with run-on sentences (e.g., Page 2's long paragraph on mobile adaptation). Examples are good but could be more concise. Marketing hype (e.g., "breakthrough in file discovery") sometimes overshadows technical detail.
|
||||
Bias and Originality: It positions Spacedrive as superior to competitors (e.g., Table 1 comparisons), but without balanced critique (e.g., Spacedrive's potential overhead vs. rclone's lightness). If this is based on the real Spacedrive project (an open-source app for unified file management), it aligns closely but exaggerates maturity—real Spacedrive (as of 2025) is still in alpha/beta, per public sources.
|
||||
|
||||
Detailed Critique by Section
|
||||
|
||||
Abstract and Introduction (Pages 1-3): Strong hook on data fragmentation, but abstract needs completion and repetitive content should be removed. Market opportunity is data-driven ($100B market, 2.5B knowledge workers), but sources aren't cited. Critique: Add quantifiable pain points (e.g., stats on data loss). Reliability metrics (92% P2P success) are promising but need validation.
|
||||
Architecture Overview (Pages 4-6): Figure 1 is described well, highlighting VDFS core (index, sync, AI). v2 improvements over v1 (e.g., unified networking) show evolution. Critique: Event Bus replacement is mentioned but not detailed—explain pub/sub mechanics. Codebase coverage (95%) is great, but specify tools (e.g., cargo-tarpaulin?).
|
||||
Addressing and Content Identity (Pages 7-10): SdPath and SHA-256 content addressing are core strengths, enabling deduplication and redundancy analysis. Table 3 is useful. Critique: Path selection algorithm (locality + latency) is solid but ignores costs like energy on mobile. Redundancy example needs completion.
|
||||
Indexing and Action System (Pages 11-14): Flexible scopes (recursive vs. shallow) and stale detection are innovative for efficiency. Action previews prevent errors. Critique: Simulation engine details are vague—how does it handle race conditions? Table 4 (audit trail) is repetitive.
|
||||
Sync and Networking (Pages 14-16): Domain separation (index vs. metadata) avoids CRDT complexity—excellent. Iroh integration for P2P is forward-looking. Critique: Spacedrop's forward secrecy is good, but discuss key exchange overhead. Hybrid relay diagram is helpful but description needs completion.
|
||||
AI Layer and Search (Pages 16-20): Agentic loop and Alice's example humanize the tech. Temporal-semantic hybrid is efficient (sub-100ms). Critique: Embedding models (all-MiniLM-L6-v2) are lightweight, but clarify compute requirements. Repository routing feels underdeveloped—potential for false positives?
|
||||
Storage Tiering and Extensibility (Pages 21-28): Table on StorageClass is insightful but needs formatting fixes. WASM plugins for safety are smart. Critique: Scalability limits (10M+ files) seem optimistic for SQLite—discuss sharding? Benchmarks (Table 6) lack error bars.
|
||||
Security and Privacy (Pages 29-32): Attack scenarios are thorough and realistic. Rate limiting is multi-layered. Critique: Certificate pinning is good, but address rotation challenges. Code snippets (e.g., pub device_id) are out of place/repetitive.
|
||||
Future Work, Conclusion, Glossary (Pages 33-38): Roadmap (e.g., RBAC) is clear. Conclusion ties back to local-first. Glossary is comprehensive. Critique: References need completion and expansion for credibility. Acronyms section needs formatting improvements.
|
||||
|
||||
Recommendations for Improvement
|
||||
|
||||
Cleanup and Editing:
|
||||
|
||||
Thorough proofreading and formatting consistency throughout the document.
|
||||
Standardize formatting: Consistent dates, author name, section headings. Use LaTeX for professional PDF output.
|
||||
Word count: Trim redundancies to ~15-20 pages for conciseness.
|
||||
|
||||
Enhance Structure:
|
||||
|
||||
Add a full table of contents, executive summary, and complete references (aim for 20+ citations, e.g., IPFS papers, CRDT research).
|
||||
Use more visuals: Expand on Figure 1; add diagrams for sync domains or AI loop.
|
||||
Balance sections: Shorten marketing; deepen technical proofs (e.g., pseudocode for algorithms).
|
||||
|
||||
Technical Refinements:
|
||||
|
||||
Substantiate claims: Include real benchmarks (e.g., vs. Syncthing) or simulations. If based on open-source Spacedrive, link to GitHub and note differences.
|
||||
Address gaps: Discuss mobile battery impact, error handling in AI suggestions, or integration with existing FS like FUSE.
|
||||
Originality: If this draws from real Spacedrive, credit contributors; otherwise, clarify as conceptual.
|
||||
|
||||
Writing and Accessibility:
|
||||
|
||||
Simplify jargon: Define terms on first use (e.g., ALPN multiplexing).
|
||||
Add case studies: Expand Alice's example with metrics (e.g., time saved).
|
||||
Bias check: Include limitations (e.g., index overhead on low-RAM devices).
|
||||
|
||||
Overall Polish:
|
||||
|
||||
Peer review: Share with tech experts for validation.
|
||||
Versioning: Mark as "Draft v0.1" and plan iterations.
|
||||
Length/Depth: This has potential as a strong ArXiv submission or conference paper—focus on novelty in VDFS + AI integration.
|
||||
|
||||
This draft has a solid foundation and could become a influential paper with revisions. If you provide a cleaned-up version or specific sections to focus on, I can refine this critique further!
|
||||
|
||||
gemini perspective:
|
||||
|
||||
Based on the provided analysis, here is a clear list of possible improvements for the whitepaper.
|
||||
|
||||
### Structural and Content Flow
|
||||
|
||||
- [cite_start]**Consolidate Core Concepts**: The explanations for key features like the "Data Guardian" [cite: 1] [cite_start]and the "Action Preview" system [cite: 1] are repeated in different sections. Define each concept in one definitive section and refer back to it elsewhere to improve flow and eliminate redundancy.
|
||||
- **Improve Section Transitions**: The flow between sections is sometimes abrupt. [cite_start]Work on improving the transitions to guide the reader more smoothly through the document's narrative[cite: 1].
|
||||
- [cite_start]**Balance Section Content**: Re-evaluate the balance of content, potentially shortening the marketing-focused discussions to create more space for deeper technical proofs and explanations[cite: 1].
|
||||
- [cite_start]**Add More Visuals**: To make complex ideas more concrete, expand on the existing figures and add new diagrams for concepts like the AI agentic loop or the synchronization domains[cite: 1].
|
||||
- [cite_start]**Add Formal Structure**: Include a full Table of Contents and an Executive Summary at the beginning of the paper to improve navigation and accessibility for readers[cite: 1].
|
||||
|
||||
### Technical Substantiation and Detail
|
||||
|
||||
- [cite_start]**Substantiate All Claims**: Key performance claims, such as the "92% P2P success" rate [cite: 1] [cite_start]and "90%+ NAT traversal success"[cite: 1], should be backed up with benchmark data, simulations, or citations.
|
||||
- [cite_start]**Contextualize Performance Metrics**: The impressive indexing and search metrics should be contextualized by comparing them against well-known, high-performance tools like `ripgrep`, `fd`, or Syncthing[cite: 1].
|
||||
- **Deepen Technical Explanations**: Expand on the implementation details for complex systems. [cite_start]For example, detail the pub/sub mechanics of the Event Bus [cite: 1][cite_start], explain how the AI handles edge cases and model fine-tuning [cite: 1][cite_start], and discuss sharding strategies to support the claims of multi-petabyte scalability[cite: 1].
|
||||
- [cite_start]**Address Technical Gaps**: The paper would be strengthened by addressing potential gaps, such as the impact on mobile device battery life, error handling in AI-generated suggestions, and the possibility of integration with filesystem drivers like FUSE[cite: 1].
|
||||
|
||||
### Writing Style and Tone
|
||||
|
||||
- [cite_start]**Simplify Dense Prose**: The text is described as jargon-heavy[cite: 1]. [cite_start]Simplify complex sentences and define technical terms like "ALPN multiplexing" when they are first introduced to improve readability[cite: 1].
|
||||
- [cite_start]**Reduce Marketing Hype**: Tone down marketing-oriented phrases like "breakthrough in file discovery" to maintain a more objective and credible technical voice[cite: 1].
|
||||
- [cite_start]**Provide a Balanced View**: To counter perceived bias, include a discussion of Spacedrive's own limitations, such as potential memory and CPU overhead compared to lighter tools like rclone[cite: 1].
|
||||
- [cite_start]**Maintain an Objective Tone**: When discussing the evolution from v1, ensure the critique of past architectural choices sounds like an objective analysis rather than "internal venting"[cite: 1].
|
||||
|
||||
### Professional Polish and Formatting
|
||||
|
||||
- [cite_start]**Complete the Bibliography**: The references are noted as being incomplete[cite: 1]. [cite_start]Expand the bibliography to include more than 20 credible and specific citations, such as the original papers on IPFS, CRDTs, and semantic file systems[cite: 1].
|
||||
- [cite_start]**Credit Contributors**: If the project is based on a real open-source effort, it's recommended to credit the contributors and clarify any differences between the vision in the paper and the project's current state[cite: 1].
|
||||
- [cite_start]**Seek Peer Review**: For academic and technical validation, have the whitepaper peer-reviewed by other experts in the field[cite: 1].
|
||||
@@ -1,111 +0,0 @@
|
||||
### Overall Impression
|
||||
|
||||
Your draft whitepaper on Spacedrive V2 is an ambitious and comprehensive document that effectively outlines a novel architecture for a Virtual Distributed File System (VDFS). It positions Spacedrive as a solution to data fragmentation, emphasizing local-first principles, AI integration, privacy, and scalability. The paper is well-structured, with a logical flow from problem statement to technical details, evolution from V1, and future outlook. It incorporates modern concepts like content-addressable storage, peer-to-peer synchronization, and AI-native design, drawing on established research (e.g., CRDTs, local-first software) while highlighting innovations.
|
||||
|
||||
The writing is professional and engaging, with effective use of key takeaways boxes, tables, figures, and code snippets to break up dense text. The inclusion of a glossary, acknowledgments, and references adds credibility. However, the paper feels somewhat promotional at times (e.g., in the Executive Summary and Market Opportunity sections), which might dilute its academic tone—whitepapers often blend marketing and technical depth, but this one leans slightly toward the former. Technically, it's strong, but some claims lack empirical data (e.g., "sub-100ms search across millions of files"), and there are minor inconsistencies or oversights in depth.
|
||||
|
||||
Length-wise, it's thorough (truncated in your upload but appears ~20-30 pages compiled), but some sections could be condensed for readability. Strengths include the evolutionary narrative (V1 lessons) and practical focus on real-world implementation in Rust. Weaknesses include occasional hype without caveats, incomplete handling of edge cases, and potential over-reliance on planned features.
|
||||
|
||||
Score (out of 10): 8.5 – Excellent technical foundation, but polish needed for balance, evidence, and critique of limitations.
|
||||
|
||||
### Strengths
|
||||
|
||||
1. **Innovative Concepts and Integration**: The paper excels at weaving together ideas like SdPath (universal addressing), Content Identity (for deduplication and redundancy), and domain-separated sync. These feel fresh and solve real problems (e.g., cross-device operations without consensus overhead). The AI-native layer is a standout, positioning Spacedrive as forward-thinking.
|
||||
|
||||
2. **Evolutionary Narrative**: Section 3 ("Learning from the Past") is a highlight—honest about V1's flaws (e.g., dual file systems, over-engineered CRDTs) and how V2 addresses them. This builds trust and shows maturity.
|
||||
|
||||
3. **Visual and Structural Aids**: Figures (e.g., architecture diagram), tables (e.g., system comparisons), and code snippets (e.g., Rust structs for Entry and SdPath) enhance understanding. Key takeaways boxes provide quick summaries, making it skimmable for executives.
|
||||
|
||||
4. **Practical Focus**: Emphasis on real implementation (Rust, Iroh stack, SeaORM) grounds the paper. Details like adaptive hashing, resource efficiency for mobile, and security models demonstrate production-readiness.
|
||||
|
||||
5. **Broad Appeal**: It targets individuals, creators, teams, and enterprises, with flexible deployment (local-first to cloud-hybrid). The privacy model (zero-knowledge cloud) is timely and well-articulated.
|
||||
|
||||
6. **References and Acknowledgments**: Solid bibliography (~10-15 citations) ties to seminal works (e.g., IPFS, Kleppmann's local-first). Acknowledging AI assistance in drafting is transparent and ethical.
|
||||
|
||||
### Weaknesses and Areas for Improvement
|
||||
|
||||
1. **Promotional Tone and Hype**: Phrases like "changes how we interact with digital assets" or unsubstantiated stats (e.g., "save 20-30% storage") read like marketing copy. Back these with data from benchmarks or studies. The Market Opportunity section feels out of place in a technical whitepaper—consider moving it to an appendix or shortening.
|
||||
|
||||
2. **Lack of Empirical Evidence**: Claims like "150MB memory footprint for 1M+ file libraries" or "sub-2-second connection establishment" need validation (e.g., benchmarks, graphs). Include a "Performance Evaluation" section with real-world tests (e.g., using the code execution tool, I could simulate some, but you'd need actual data).
|
||||
|
||||
3. **Incomplete Handling of Limitations**: While V1 flaws are critiqued, V2's potential downsides (e.g., WASM overhead, sync delays in poor networks, AI privacy trade-offs with cloud models) are glossed over. Add a "Limitations and Challenges" subsection in the Conclusion.
|
||||
|
||||
4. **Technical Depth Inconsistencies**: Some areas are deeply detailed (e.g., SdPath resolution), others superficial (e.g., AI integration lacks specifics on models or embeddings). Edge cases like massive file conflicts or device failures aren't fully addressed.
|
||||
|
||||
5. **Readability and Redundancy**: Dense prose in sections like Architecture could be streamlined. Repetition (e.g., privacy emphasis across multiple sections) could be consolidated. LaTeX issues: Some listings (e.g., Rust code) have minor formatting errors (e.g., escaped characters like "â<><C3A2>"); compile and proof.
|
||||
|
||||
6. **Diversity of Perspectives**: Related Work is good but could include more critical comparisons (e.g., how does Spacedrive fare against Syncthing or Resilio Sync in P2P efficiency?). Assumptions about user needs (e.g., "knowledge workers spend 25% searching") cite sources but feel generalized.
|
||||
|
||||
7. **Planned Features**: Overuse of \planned{} (e.g., compositional attributes) makes the paper feel speculative. Quantify: How many are "planned" vs. implemented? This risks undermining credibility.
|
||||
|
||||
8. **Accessibility and Inclusivity**: Unicode-native tags are mentioned, but broader accessibility (e.g., UI for visually impaired, internationalization) is absent. Security scenarios are strong but could include diverse threats (e.g., accessibility in low-resource regions).
|
||||
|
||||
### Detailed Section-by-Section Analysis
|
||||
|
||||
#### Title, Authors, Abstract, and Metadata
|
||||
|
||||
- **Strengths**: Title is clear and descriptive. Abstract concisely covers problem, solution, and innovations. Metadata (e.g., ACM details) gives an academic feel.
|
||||
- **Critique**: Subtitle could be punchier. Abstract claims "eliminates traditional client-server boundaries" but doesn't explain how until later—tease it more. DOI/ISBN are placeholders; replace with real ones if publishing.
|
||||
- **Suggestions**: Add keywords like "peer-to-peer file system" for SEO/discoverability.
|
||||
|
||||
#### Executive Summary
|
||||
|
||||
- **Strengths**: Bullet-point benefits are scannable; business angle (e.g., market opportunity) appeals to investors.
|
||||
- **Critique**: Too salesy (e.g., "positioned to become the essential infrastructure"). Stats like "25% of workweek" need better sourcing (Atlassian citation is fine, but verify 2025 projection). Investment highlights feel premature for a whitepaper.
|
||||
- **Suggestions**: Shorten to 1 page; focus on technical hooks.
|
||||
|
||||
#### Introduction (Section 1)
|
||||
|
||||
- **Strengths**: Strong problem framing ("data fragmentation hell") with key innovations listed. Ties to research well.
|
||||
- **Critique**: "Seven foundational innovations" is a good hook, but the list could be a table for emphasis. Mobile constraints are mentioned briefly—expand if targeting cross-platform.
|
||||
- **Suggestions**: Add a teaser figure of the unified view.
|
||||
|
||||
#### Related Work (Section 2)
|
||||
|
||||
- **Strengths**: Comprehensive comparison table; positions Spacedrive uniquely (e.g., vs. Alluxio's datacenter focus).
|
||||
- **Critique**: Could critique more deeply (e.g., IPFS's energy inefficiency for personal use). Missing modern peers like Solid (Tim Berners-Lee's project) for decentralized data.
|
||||
- **Suggestions**: Use a table for pros/cons expansion. Add subsection on AI in file systems (e.g., semantic FS research).
|
||||
|
||||
#### Learning from the Past (Section 3)
|
||||
|
||||
- **Strengths**: Candid and insightful—best section for showing maturity. Specific metrics (e.g., "90% boilerplate reduction") are convincing.
|
||||
- **Critique**: Assumes reader knows V1; add a brief V1 overview. "Over 95% line coverage" is great but needs context (which tests?).
|
||||
- **Suggestions**: Include a before/after architecture diagram.
|
||||
|
||||
#### The Spacedrive Architecture (Section 4)
|
||||
|
||||
- **Strengths**: Core of the paper—detailed and modular. Subsections on VDFS, Entry-Centric Model, etc., build logically. Code snippets illustrate well (e.g., SdPath enum).
|
||||
- **Critique**: Overlong; some subsections (e.g., Semantic Tagging) could merge. Adaptive hashing claims "99.9% accuracy" without proof—cite studies or explain calculation. Figure 1 (architecture) is complex; simplify labels.
|
||||
- **Suggestions**: Add pseudocode for optimal path resolution algorithm. Quantify performance (e.g., hashing speeds).
|
||||
|
||||
#### Subsequent Sections (5-10: Indexing, Sync, AI, etc.)
|
||||
|
||||
- **Strengths**: Depth in sync (domain separation avoids CRDT pitfalls) and security (scenarios are realistic). Resource Efficiency addresses mobile well. Conclusion ties back nicely.
|
||||
- **Critique**: AI-Native (Section 6) is visionary but vague on implementation (e.g., how does Ollama integrate with embeddings?). Conflict Resolution (Section 9) is practical but lacks flowcharts for all scenarios. Glossary is helpful but could alphabetize.
|
||||
- **Suggestions**: Add benchmarks in Resource Efficiency (e.g., battery impact tests). In Security, discuss quantum-resistant crypto as future-proofing.
|
||||
|
||||
#### Appendices (Glossary, References)
|
||||
|
||||
- **Strengths**: Glossary is exhaustive and useful. References are relevant.
|
||||
- **Critique**: References could expand (20+ for depth). Glossary has acronyms but misses some (e.g., ALPN in networking).
|
||||
- **Suggestions**: Add an index or hyperlinks in PDF.
|
||||
|
||||
### Technical Critique
|
||||
|
||||
- **Accuracy**: Concepts like content-addressing (inspired by IPFS/LBFS) are correctly applied. Rust code is idiomatic and error-free.
|
||||
- **Innovation**: Strong claims (e.g., no consensus in sync) are substantiated, but compare quantitatively to alternatives (e.g., vs. Raft in distributed FS).
|
||||
- **Scalability**: Handles "multi-petabyte" but lacks math (e.g., index size growth). Use tables for complexity (O(n) for hashing?).
|
||||
- **Security**: Excellent model, but add formal threat modeling (e.g., STRIDE analysis).
|
||||
- **AI Integration**: Privacy-preserving is good, but discuss biases in models or compute costs.
|
||||
- **Potential Flaws**: What if Iroh fails NAT traversal? Fallbacks mentioned but not detailed.
|
||||
|
||||
### Suggestions for Enhancement
|
||||
|
||||
1. **Add Data/Evidence**: Include graphs (e.g., sync time vs. devices) using PGFPlots. Run benchmarks via code tool if needed.
|
||||
2. **Balance Tone**: Reduce hype; add caveats (e.g., "While effective, adaptive hashing may miss rare collisions").
|
||||
3. **Expand Evaluations**: New section with simulations (e.g., conflict rates) or user studies.
|
||||
4. **Revise for Clarity**: Proofread for typos (e.g., "â<><C3A2>" artifacts). Compile LaTeX to check rendering.
|
||||
5. **Length/Focus**: Trim to 20 pages; prioritize implemented features over planned.
|
||||
6. **Next Steps**: Solicit feedback from Rust/P2P communities (e.g., via X search on similar projects).
|
||||
|
||||
Overall, this is a solid draft—refine evidence and tone for publication. If you provide specific sections or updates, I can dive deeper!
|
||||
@@ -1,704 +0,0 @@
|
||||
# Proposed Changes to Spacedrive V2 Whitepaper (Version 2)
|
||||
|
||||
This document outlines detailed changes to incorporate advanced features and clarifications into the Spacedrive V2 whitepaper. Each change includes the specific section, rationale, and proposed text.
|
||||
|
||||
**Note: This version incorporates the WebAssembly-based extension system design, building upon the existing integration system architecture.**
|
||||
|
||||
---
|
||||
|
||||
## 1. Rename "Lightning Search" to "Temporal-Semantic Search"
|
||||
|
||||
### Locations to Update:
|
||||
- Executive Summary (Key Features list)
|
||||
- Section 4.7 title and all references
|
||||
- Glossary entry
|
||||
- Table 7.1 (Performance benchmarks)
|
||||
- Any other mentions throughout the document
|
||||
|
||||
### Find and Replace:
|
||||
- "Lightning Search" → "Temporal-Semantic Search"
|
||||
- "lightning search" → "temporal-semantic search"
|
||||
|
||||
### Update Section 4.7 Introduction:
|
||||
**Current:** "Lightning Search combines traditional full-text search with AI-powered semantic understanding..."
|
||||
|
||||
**Proposed:** "Temporal-Semantic Search represents a breakthrough in file discovery, combining SQLite's FTS5 full-text search with AI-powered vector embeddings. The 'temporal' aspect leverages file timestamps and access patterns, while 'semantic' understanding enables natural language queries that find files by meaning, not just keywords."
|
||||
|
||||
---
|
||||
|
||||
## 2. Add Section 4.1.6: Semantic Content Types
|
||||
|
||||
### Location: After Section 4.1.5 (Virtual Sidecar System)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsubsection{Semantic Content Types}
|
||||
|
||||
While traditional file systems rely solely on MIME types and extensions, Spacedrive introduces \textbf{Semantic Content Types} that understand the actual structure and meaning of data. This system transforms Spacedrive from a simple file manager into an enterprise-grade knowledge base.
|
||||
|
||||
\paragraph{Beyond MIME Types}
|
||||
Semantic Content Types extend file identification to include:
|
||||
\begin{itemize}
|
||||
\item \textbf{Structured Data Extraction}: Email files (.eml, .msg) are parsed to extract sender, recipients, subject, and date into queryable fields
|
||||
\item \textbf{Compound Document Understanding}: Web archives (.warc, .maff) maintain relationships between HTML, CSS, images, and other assets
|
||||
\item \textbf{Domain-Specific Metadata}: Scientific datasets (.hdf5, .netcdf) expose internal structure and variables
|
||||
\item \textbf{Relationship Mapping}: Project files link to their dependencies and outputs
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Implementation via Virtual Sidecars}
|
||||
The Virtual Sidecar System (Section 4.1.5) provides the perfect mechanism for semantic types:
|
||||
|
||||
\begin{lstlisting}[language=json, caption=Example semantic sidecar for an email]
|
||||
{
|
||||
"content_type": "email/rfc822",
|
||||
"semantic_type": "EmailMessage",
|
||||
"extracted_data": {
|
||||
"from": "sender@example.com",
|
||||
"to": ["recipient@example.com"],
|
||||
"subject": "Q3 Financial Report",
|
||||
"date": "2024-03-15T10:30:00Z",
|
||||
"has_attachments": true,
|
||||
"attachment_count": 2,
|
||||
"thread_id": "conv-12345"
|
||||
},
|
||||
"search_vectors": {
|
||||
"subject_embedding": [0.23, -0.45, ...],
|
||||
"body_embedding": [0.12, 0.67, ...]
|
||||
}
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
This approach enables:
|
||||
\begin{itemize}
|
||||
\item Rich queries like "emails from Alice about budgets with attachments"
|
||||
\item Preservation of original files while adding intelligence
|
||||
\item Extensibility through user-defined content types
|
||||
\item Integration with the AI Agent system for automated organization
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Enterprise Knowledge Management}
|
||||
By treating files as structured data objects rather than opaque blobs, Spacedrive becomes a powerful knowledge management platform suitable for:
|
||||
\begin{itemize}
|
||||
\item Legal discovery with deep email and document search
|
||||
\item Research data management with scientific format understanding
|
||||
\item Digital asset management with rich media metadata
|
||||
\item Compliance tracking with automated classification
|
||||
\end{itemize}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Add Section 7.X: Extensibility Architecture
|
||||
|
||||
### Location: After Section 7.5 (Compatibility and Interoperability)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Extensibility Architecture}
|
||||
|
||||
Spacedrive's extensibility architecture combines a robust integration system for cloud providers with a WebAssembly-based plugin system for lightweight extensions. This dual approach provides both deep system integration capabilities and safe, portable user extensions.
|
||||
|
||||
\subsubsection{Integration System for Cloud Storage}
|
||||
|
||||
The integration system enables direct, remote indexing of large-scale cloud storage without local synchronization:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Process Isolation}: Each integration runs as a separate, sandboxed process
|
||||
\item \textbf{Language Agnostic}: Integrations can be written in any language
|
||||
\item \textbf{On-Demand Access}: Metadata and content fetched only when needed
|
||||
\item \textbf{Unified Core Logic}: Reuses indexer's advanced logic for all storage types
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Cloud storage provider trait]
|
||||
#[async_trait]
|
||||
pub trait CloudStorageProvider {
|
||||
/// Discover entries at a given remote path
|
||||
async fn discover(
|
||||
&self,
|
||||
path: &str,
|
||||
credentials: &IntegrationCredential
|
||||
) -> Result<Stream<DirEntry>>;
|
||||
|
||||
/// Stream file content with byte range support
|
||||
async fn stream_content(
|
||||
&self,
|
||||
path: &str,
|
||||
range: Option<ByteRange>,
|
||||
credentials: &IntegrationCredential,
|
||||
) -> Result<Stream<Bytes>>;
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
This architecture enables:
|
||||
\begin{itemize}
|
||||
\item Management of petabyte-scale libraries on devices with limited storage
|
||||
\item Efficient content hashing using ranged requests (8KB samples)
|
||||
\item Lazy thumbnail generation as background jobs
|
||||
\item Support for any storage provider via OpenDAL operators
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{WebAssembly Plugin System}
|
||||
|
||||
For lightweight extensions and custom functionality, Spacedrive employs a WebAssembly-based plugin system:
|
||||
|
||||
\paragraph{Security Model}
|
||||
WASM provides critical security guarantees:
|
||||
\begin{itemize}
|
||||
\item \textbf{Complete Sandboxing}: Plugins cannot access filesystem or network without permission
|
||||
\item \textbf{Capability-Based}: Plugins declare required permissions upfront
|
||||
\item \textbf{Resource Limits}: CPU, memory, and I/O are bounded
|
||||
\item \textbf{Memory Safety}: Prevents buffer overflows and pointer manipulation
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Plugin Capabilities}
|
||||
Through the exposed VDFS API, plugins can:
|
||||
\begin{itemize}
|
||||
\item Define custom semantic content types with parsing logic
|
||||
\item Create specialized AI agents for workflow automation
|
||||
\item Add new actions to the transactional action system
|
||||
\item Implement custom search providers and filters
|
||||
\item Generate specialized thumbnails and previews
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Example WASM plugin API]
|
||||
// Host functions exposed by Spacedrive
|
||||
#[link(wasm_import_module = "spacedrive")]
|
||||
extern "C" {
|
||||
fn vdfs_read_file(path_ptr: u32, path_len: u32) -> u32;
|
||||
fn vdfs_write_sidecar(
|
||||
entry_id: u32,
|
||||
data_ptr: u32,
|
||||
data_len: u32
|
||||
) -> u32;
|
||||
fn register_content_type(
|
||||
spec_ptr: u32,
|
||||
spec_len: u32
|
||||
) -> u32;
|
||||
}
|
||||
|
||||
// Plugin implementation
|
||||
#[spacedrive_plugin]
|
||||
pub struct ScientificDataPlugin;
|
||||
|
||||
#[spacedrive_plugin::content_type]
|
||||
impl ContentTypeHandler for ScientificDataPlugin {
|
||||
fn can_handle(&self, entry: &Entry) -> bool {
|
||||
matches!(entry.extension(),
|
||||
Some("hdf5") | Some("netcdf") | Some("fits"))
|
||||
}
|
||||
|
||||
fn extract_metadata(&self, data: &[u8]) -> Result<Metadata> {
|
||||
// Parse scientific format and extract variables,
|
||||
// dimensions, and other domain-specific metadata
|
||||
}
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
\paragraph{Distribution Model}
|
||||
The WASM approach solves critical distribution challenges:
|
||||
\begin{itemize}
|
||||
\item \textbf{Single Binary}: One .wasm file works on all platforms
|
||||
\item \textbf{No Code Signing}: Avoids platform-specific signing requirements
|
||||
\item \textbf{Instant Loading}: No process spawn overhead
|
||||
\item \textbf{Hot Reload}: Plugins can be updated without restart
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Integration Architecture}
|
||||
|
||||
The complete extensibility architecture combines both systems:
|
||||
|
||||
\begin{verbatim}
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Spacedrive Core │
|
||||
│ ┌─────────────────┐ ┌────────────────────────┐ │
|
||||
│ │ Integration │ │ WASM Plugin Host │ │
|
||||
│ │ Manager │ │ • Wasmer Runtime │ │
|
||||
│ │ • Process Mgmt │ │ • VDFS API Bridge │ │
|
||||
│ │ • IPC Router │ │ • Permission System │ │
|
||||
│ └────────┬────────┘ └───────────┬────────────┘ │
|
||||
└──────────┼───────────────────────┼──────────────┘
|
||||
│ │
|
||||
┌──────▼────────┐ ┌──────▼──────┐
|
||||
│ Integration │ │ WASM Plugin │
|
||||
│ Process │ │ (In-Proc) │
|
||||
│ • OpenDAL │ │ • Safe API │
|
||||
│ • Full Access │ │ • Limited │
|
||||
└───────────────┘ └─────────────┘
|
||||
\end{verbatim}
|
||||
|
||||
This dual approach provides:
|
||||
\begin{itemize}
|
||||
\item Heavy integrations (cloud storage) via isolated processes
|
||||
\item Lightweight extensions (content types, agents) via WASM
|
||||
\item Clear security boundaries for each use case
|
||||
\item Maximum flexibility without compromising safety
|
||||
\end{itemize}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Update Section 7.5.2: Cloud Service Integration
|
||||
|
||||
### Location: Section 7.5.2 (under Compatibility and Interoperability)
|
||||
|
||||
### Replace existing content with:
|
||||
|
||||
```latex
|
||||
\subsubsection{Cloud Service Integration}
|
||||
|
||||
Spacedrive's cloud integration architecture enables seamless management of cloud storage as if it were local, without the limitations of traditional sync-based approaches.
|
||||
|
||||
\paragraph{Direct Remote Indexing}
|
||||
Unlike traditional cloud sync clients that duplicate data locally, Spacedrive indexes cloud storage in-place:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Streaming Metadata}: Directory listings streamed directly from cloud APIs
|
||||
\item \textbf{On-Demand Content}: Files accessed only when needed
|
||||
\item \textbf{Efficient Hashing}: Content identification using ranged requests (8KB samples)
|
||||
\item \textbf{Lazy Processing}: Thumbnails and rich metadata extracted as background jobs
|
||||
\end{itemize}
|
||||
|
||||
This approach enables management of petabyte-scale cloud libraries on devices with minimal local storage.
|
||||
|
||||
\paragraph{OpenDAL Integration}
|
||||
To achieve comprehensive cloud storage support efficiently, Spacedrive leverages OpenDAL (Open Data Access Layer), a Rust-native library providing unified access to storage services:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Unified Interface}: Single API for S3, Azure Blob, Google Cloud Storage, WebDAV, and dozens more
|
||||
\item \textbf{Native Performance}: Zero-overhead abstractions with service-specific optimizations
|
||||
\item \textbf{Streaming Support}: Efficient handling of large files without full downloads
|
||||
\item \textbf{Automatic Retries}: Built-in resilience for unreliable network conditions
|
||||
\item \textbf{Byte Range Requests}: Essential for efficient content hashing and previews
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Virtual Device Abstraction}
|
||||
Each cloud service appears as a virtual device in Spacedrive's volume system:
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Cloud location registration]
|
||||
// Adding a cloud location creates a virtual device
|
||||
let location = LocationManager::add_cloud_location(
|
||||
integration_id: "gdrive",
|
||||
name: "Work Google Drive",
|
||||
credentials_id: cred_id,
|
||||
).await?;
|
||||
|
||||
// The location behaves identically to local storage
|
||||
let entries = vdfs.list_directory(&location, "/Projects").await?;
|
||||
\end{lstlisting}
|
||||
|
||||
This abstraction means:
|
||||
\begin{itemize}
|
||||
\item Unified search across local and cloud storage
|
||||
\item Transparent file operations between any storage types
|
||||
\item Consistent access control and audit trails
|
||||
\item No special handling required for cloud vs local files
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Performance Optimization}
|
||||
The system employs several strategies to minimize latency:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Metadata Caching}: Recently accessed directory listings cached locally
|
||||
\item \textbf{Predictive Prefetch}: AI agents anticipate and preload likely accesses
|
||||
\item \textbf{Parallel Operations}: Multiple cloud API calls executed concurrently
|
||||
\item \textbf{Progressive Loading}: UI displays results as they stream in
|
||||
\end{itemize}
|
||||
|
||||
This architecture exemplifies our "Zero Vendor Lock-in" principle while providing users seamless access to their data regardless of where it resides.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Add Section 5.4: Collaboration and Public Sharing
|
||||
|
||||
### Location: After Section 5.3 (User Benefits)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Collaboration and Public Sharing}
|
||||
|
||||
The Cloud Core architecture enables sophisticated sharing capabilities without introducing complex APIs or compromising the peer-to-peer model.
|
||||
|
||||
\subsubsection{Flexible Hosting Model}
|
||||
|
||||
While Spacedrive Cloud provides turnkey hosting, the architecture supports multiple deployment options:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Spacedrive Cloud}: Managed hosting with automatic SSL, CDN, and scaling
|
||||
\item \textbf{Self-Hosted Cloud Core}: Deploy on any infrastructure with full control
|
||||
\item \textbf{Hybrid Deployment}: Mix of self-hosted and managed components
|
||||
\item \textbf{Edge Deployment}: Run cores close to users for optimal performance
|
||||
\end{itemize}
|
||||
|
||||
Any Spacedrive core—whether on a personal device or in the cloud—can serve as a sharing endpoint with appropriate configuration.
|
||||
|
||||
\subsubsection{Shared Folders via Team Libraries}
|
||||
|
||||
Collaboration in Spacedrive leverages the Library abstraction:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Team Libraries}: Shared libraries with role-based permissions
|
||||
\item \textbf{Granular Access Control}: Per-location and per-file permissions
|
||||
\item \textbf{Action Audit Trail}: Complete history of all modifications
|
||||
\item \textbf{Conflict Resolution}: Automatic handling of concurrent edits
|
||||
\end{itemize}
|
||||
|
||||
Team members connect to shared libraries exactly as they would personal ones—the Cloud Core simply acts as an always-available peer ensuring data availability.
|
||||
|
||||
\subsubsection{Public File Hosting}
|
||||
|
||||
Public sharing leverages the same infrastructure with a crucial distinction:
|
||||
|
||||
\begin{itemize}
|
||||
\item Files marked with "public" role become web-accessible
|
||||
\item Any core with port exposure can serve public files
|
||||
\item Spacedrive Cloud provides automatic SSL and CDN for ease of use
|
||||
\item Self-hosted cores require manual port configuration and SSL setup
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=text, caption=Public sharing URL examples]
|
||||
# Via Spacedrive Cloud (automatic SSL + CDN)
|
||||
https://share.spacedrive.com/user/file.pdf
|
||||
|
||||
# Via self-hosted Cloud Core
|
||||
https://files.company.com/public/presentation.pdf
|
||||
|
||||
# Via personal device (requires port forwarding)
|
||||
https://home.user.com:8443/share/document.docx
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Enhanced Spacedrop}
|
||||
|
||||
The Cloud Core extends Spacedrop's capabilities:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Asynchronous Transfers}: Cloud Core holds files until recipients connect
|
||||
\item \textbf{Persistent Links}: Share links remain valid indefinitely
|
||||
\item \textbf{Large File Support}: No size limits with resumable transfers
|
||||
\item \textbf{Access Control}: Optional passwords and expiration dates
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=text, caption=Spacedrop relay options]
|
||||
# Direct P2P (ephemeral, no relay)
|
||||
spacedrop://device-id/transfer-id
|
||||
|
||||
# Via Spacedrive Cloud relay
|
||||
https://drop.spacedrive.com/abc123
|
||||
|
||||
# Via self-hosted relay
|
||||
https://relay.company.com/drop/xyz789
|
||||
\end{lstlisting}
|
||||
|
||||
This unified approach to sharing—from private team collaboration to public content distribution—demonstrates how core P2P primitives scale to support diverse use cases without architectural compromises.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Update Section 4.5.2: Iroh-Powered Network Infrastructure
|
||||
|
||||
### Location: Add clarification about relay flexibility
|
||||
|
||||
### Add after the relay description:
|
||||
|
||||
```latex
|
||||
\paragraph{Self-Hosted Relay Infrastructure}
|
||||
While Spacedrive provides public relay servers for convenience, the architecture fully supports self-hosted deployments:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Zero-Trust Option}: Organizations can run private relay networks
|
||||
\item \textbf{Simple Deployment}: Single binary with minimal configuration
|
||||
\item \textbf{Geographic Distribution}: Deploy relays near users for optimal performance
|
||||
\item \textbf{Compliance Ready}: Keep all traffic within organizational boundaries
|
||||
\end{itemize}
|
||||
|
||||
This flexibility makes Spacedrive suitable for:
|
||||
\begin{itemize}
|
||||
\item Enterprises requiring complete data sovereignty
|
||||
\item Regions with data residency requirements
|
||||
\item Air-gapped networks with no external connectivity
|
||||
\item Organizations building private overlay networks (similar to Tailscale)
|
||||
\end{itemize}
|
||||
|
||||
The relay service can be deployed as a standalone component, in Kubernetes, or as a managed service, providing deployment flexibility to match any infrastructure requirement.
|
||||
|
||||
\paragraph{Network Architecture Flexibility}
|
||||
The Iroh-based networking supports multiple topologies:
|
||||
|
||||
\begin{verbatim}
|
||||
Public Cloud (Default):
|
||||
Device A ←→ Public Relay ←→ Device B
|
||||
|
||||
Direct (if possible)
|
||||
|
||||
Self-Hosted:
|
||||
Device A ←→ Private Relay ←→ Device B
|
||||
|
||||
Direct (always preferred)
|
||||
|
||||
Hybrid:
|
||||
Corporate ←→ Private Relay ←→ Public Relay ←→ Personal
|
||||
Devices Devices
|
||||
\end{verbatim}
|
||||
|
||||
This flexibility ensures Spacedrive can adapt to any network environment while maintaining its peer-to-peer principles.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Expand Table 7.1: Performance Benchmarks
|
||||
|
||||
### Location: Section 7.1 (Performance Evaluation)
|
||||
|
||||
### Replace existing table with:
|
||||
|
||||
```latex
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\caption{Performance benchmarks across storage tiers (M2 MacBook Pro, 16GB RAM)}
|
||||
\label{tab:performance}
|
||||
\begin{tabular}{lrr}
|
||||
\toprule
|
||||
\textbf{Metric} & \textbf{Value} & \textbf{Unit} \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Indexing Throughput}} \\
|
||||
\quad Internal NVMe SSD & 8,500 & files/sec \\
|
||||
\quad External USB 3.2 SSD & 6,200 & files/sec \\
|
||||
\quad Network Attached Storage (1Gbps) & 3,100 & files/sec \\
|
||||
\quad External HDD (USB 3.0) & 1,850 & files/sec \\
|
||||
\quad Cloud Storage (S3, parallel) & 450 & files/sec \\
|
||||
\quad Cloud Storage (Google Drive) & 280 & files/sec \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Search Latency (1M entries)}} \\
|
||||
\quad Temporal Search (FTS5) & 55 & ms \\
|
||||
\quad Semantic Search (Vector) & 95 & ms \\
|
||||
\quad Combined Temporal-Semantic & 110 & ms \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Memory Usage}} \\
|
||||
\quad Base daemon & 45 & MB \\
|
||||
\quad Per 1M indexed files & 105 & MB \\
|
||||
\quad With active P2P connections & +15 & MB/peer \\
|
||||
\quad With WASM plugins (per plugin) & +8-25 & MB \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Network Performance}} \\
|
||||
\quad P2P transfer (LAN) & 110 & MB/s \\
|
||||
\quad P2P transfer (WAN w/ relay) & 45 & MB/s \\
|
||||
\quad NAT traversal success rate & 92 & \% \\
|
||||
\quad Connection establishment & 1.8 & seconds \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Extension System}} \\
|
||||
\quad WASM plugin load time & 12 & ms \\
|
||||
\quad Integration process startup & 150 & ms \\
|
||||
\quad IPC roundtrip latency & 0.8 & ms \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\textit{Note: Cloud storage indexing uses metadata-only requests with on-demand content fetching. Performance varies based on API rate limits and network conditions.}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Update Section 4.4: The Transactional Action System
|
||||
|
||||
### Location: In the Simulation Engine description
|
||||
|
||||
### Update the paragraph about pre-visualization to include:
|
||||
|
||||
```latex
|
||||
\paragraph{Intelligent Time Estimation}
|
||||
The Simulation Engine combines multiple data sources to provide accurate operation time estimates:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Volume Performance Metrics}: Real-time read/write speeds from continuous monitoring
|
||||
\item \textbf{Network Conditions}: Current bandwidth and latency from Iroh's measurements
|
||||
\item \textbf{Historical Data}: Previous operations on similar files and paths
|
||||
\item \textbf{Operation Complexity}: Number of files, total size, and fragmentation
|
||||
\item \textbf{Storage Type Awareness}: Different strategies for local vs cloud storage
|
||||
\end{itemize}
|
||||
|
||||
For example, when copying 10GB across devices, the estimation considers:
|
||||
\begin{itemize}
|
||||
\item Source volume read speed: 250 MB/s (measured)
|
||||
\item Network throughput: 45 MB/s (current P2P bandwidth)
|
||||
\item Destination write speed: 180 MB/s (measured)
|
||||
\item Bottleneck: Network at 45 MB/s
|
||||
\item Estimated time: 3 minutes 45 seconds (with 10\% buffer)
|
||||
\end{itemize}
|
||||
|
||||
For cloud operations, additional factors apply:
|
||||
\begin{itemize}
|
||||
\item API rate limits (e.g., 1000 requests/second for S3)
|
||||
\item Chunk size optimization (balancing throughput vs memory)
|
||||
\item Parallel stream count (typically 4-8 for cloud providers)
|
||||
\item Resume capability for long-running transfers
|
||||
\end{itemize}
|
||||
|
||||
This transparency helps users make informed decisions about when and how to execute operations, especially for large-scale cloud migrations.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. Add Section 8.5: Balancing Privacy and Public Sharing
|
||||
|
||||
### Location: After Section 8.4 (Incident Response)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Balancing Privacy and Public Sharing}
|
||||
|
||||
Spacedrive's security model accommodates both zero-knowledge privacy and public content sharing through its library-based architecture.
|
||||
|
||||
\subsubsection{Per-Library Encryption Policy}
|
||||
|
||||
Each library maintains independent encryption settings:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Private Libraries} (default): Full SQLCipher encryption at rest
|
||||
\item \textbf{Public Libraries} (opt-in): Unencrypted for web serving
|
||||
\item \textbf{Hybrid Libraries}: Encrypted with selective public locations
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Library encryption configuration]
|
||||
pub struct LibraryConfig {
|
||||
pub encryption: EncryptionMode,
|
||||
pub public_sharing: PublicSharingConfig,
|
||||
}
|
||||
|
||||
pub enum EncryptionMode {
|
||||
/// Full encryption (default)
|
||||
Encrypted { key_derivation: Argon2id },
|
||||
/// No encryption (for public content)
|
||||
Unencrypted,
|
||||
/// Encrypted with public locations
|
||||
Hybrid { public_locations: Vec<LocationId> },
|
||||
}
|
||||
|
||||
pub struct PublicSharingConfig {
|
||||
/// Which core serves public content
|
||||
pub hosting_core: CoreIdentity,
|
||||
/// Custom domain (if any)
|
||||
pub custom_domain: Option<String>,
|
||||
/// Access control rules
|
||||
pub access_rules: Vec<AccessRule>,
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Secure Public Sharing Workflow}
|
||||
|
||||
Users can share content publicly without compromising private data:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Create a dedicated public library or location
|
||||
\item Configure which core hosts public content (cloud or self-hosted)
|
||||
\item Move/copy files to public locations
|
||||
\item Share generated URLs with recipients
|
||||
\item Private libraries remain fully encrypted throughout
|
||||
\end{enumerate}
|
||||
|
||||
\subsubsection{Implementation Considerations}
|
||||
|
||||
This dual-mode approach ensures:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Clear Boundaries}: Users explicitly choose what becomes public
|
||||
\item \textbf{No Encryption Downgrade}: Private libraries cannot be converted to public
|
||||
\item \textbf{Audit Trail}: All public sharing actions are logged
|
||||
\item \textbf{Revocable Access}: Public files can be made private instantly
|
||||
\item \textbf{Hosting Flexibility}: Any core can serve public content with proper setup
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Security Implications}
|
||||
The system maintains security through isolation:
|
||||
|
||||
\begin{itemize}
|
||||
\item Public and private data never mix within a library
|
||||
\item Encryption keys are never exposed to hosting infrastructure
|
||||
\item Access tokens are scoped to specific libraries and operations
|
||||
\item Public URLs use capability-based security (unguessable paths)
|
||||
\end{itemize}
|
||||
|
||||
By making encryption optional but enabled by default, Spacedrive provides flexibility for content creators and enterprises while maintaining strong privacy guarantees for personal data.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Minor Updates Throughout
|
||||
|
||||
### Executive Summary - Key Features
|
||||
Update the feature list to reflect new terminology and capabilities:
|
||||
- Change "Lightning Search" to "Temporal-Semantic Search"
|
||||
- Add bullet: "• Extensible via WebAssembly plugins and isolated integrations"
|
||||
- Add bullet: "• Direct cloud indexing without local synchronization"
|
||||
|
||||
### Glossary Updates
|
||||
- Remove "Lightning Search" entry
|
||||
- Add "Temporal-Semantic Search: Hybrid search combining temporal (time-based) full-text search with semantic (meaning-based) vector search"
|
||||
- Add "OpenDAL: Open Data Access Layer, providing unified access to cloud storage services"
|
||||
- Add "Semantic Content Types: Advanced file type system that understands data structure and meaning beyond MIME types"
|
||||
- Add "WASM Plugin: WebAssembly-based extension running in a sandboxed environment"
|
||||
- Add "Integration: Isolated process providing deep system integration (e.g., cloud storage)"
|
||||
|
||||
### Section 2 (Related Work)
|
||||
Add paragraphs comparing Spacedrive's approach:
|
||||
|
||||
```latex
|
||||
\paragraph{Extensibility Models}
|
||||
Unlike systems that require native plugins (Finder, Nautilus) or rely on scripting languages (Obsidian, VS Code), Spacedrive employs a dual extensibility model. Heavy integrations requiring full system access run as isolated processes, while lightweight extensions execute in a WebAssembly sandbox. This provides both power and safety.
|
||||
|
||||
\paragraph{Cloud Storage Approaches}
|
||||
Traditional cloud sync clients (Dropbox, Google Drive) duplicate data locally, consuming significant disk space and bandwidth. Spacedrive's direct indexing approach treats cloud storage as just another volume, accessing content on-demand. This enables management of petabyte-scale cloud libraries on devices with minimal storage.
|
||||
```
|
||||
|
||||
### Section 3 (Learning from the Past)
|
||||
Add a note about extensibility lessons:
|
||||
|
||||
```latex
|
||||
\paragraph{Extensibility Lessons}
|
||||
Version 1's monolithic architecture limited community contributions. Version 2's dual extensibility model—process-isolated integrations for complex providers and WASM plugins for safe extensions—enables a vibrant ecosystem while maintaining security and stability.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
1. **High Priority** (Core value propositions):
|
||||
- Semantic Content Types (Section 4.1.6)
|
||||
- Temporal-Semantic Search rename
|
||||
- Cloud Storage Integration with OpenDAL (Section 7.5.2)
|
||||
- Extensibility Architecture (Section 7.X)
|
||||
|
||||
2. **Medium Priority** (Important differentiators):
|
||||
- Collaboration and Public Sharing (Section 5.4)
|
||||
- Enhanced benchmarks table
|
||||
- Time estimation details
|
||||
- Self-hosted relay clarification
|
||||
|
||||
3. **Low Priority** (Polish and completeness):
|
||||
- Security model clarifications
|
||||
- Minor wording updates
|
||||
- Glossary additions
|
||||
- Related work comparisons
|
||||
|
||||
---
|
||||
|
||||
## Technical Consistency Notes
|
||||
|
||||
- The WASM plugin system complements, not replaces, the integration system
|
||||
- Cloud providers use the integration system (full process isolation)
|
||||
- Content types and agents use WASM plugins (sandboxed, lightweight)
|
||||
- Both systems share the same VDFS abstraction layer
|
||||
- Performance numbers account for both extension types
|
||||
|
||||
---
|
||||
|
||||
## Key Architectural Decisions Highlighted
|
||||
|
||||
1. **Dual Extensibility**: Process isolation for heavy integrations, WASM for lightweight plugins
|
||||
2. **Direct Cloud Indexing**: No local sync required, on-demand content access
|
||||
3. **Flexible Hosting**: Any core can serve content, but managed options available
|
||||
4. **Security by Default**: Encryption on by default, explicit opt-in for public sharing
|
||||
5. **Universal Abstraction**: All storage types (local, network, cloud) treated uniformly
|
||||
@@ -1,475 +0,0 @@
|
||||
# Proposed Changes to Spacedrive V2 Whitepaper
|
||||
|
||||
This document outlines detailed changes to incorporate advanced features and clarifications into the Spacedrive V2 whitepaper. Each change includes the specific section, rationale, and proposed text.
|
||||
|
||||
---
|
||||
|
||||
## 1. Rename "Lightning Search" to "Temporal-Semantic Search"
|
||||
|
||||
### Locations to Update:
|
||||
- Executive Summary (Key Features list)
|
||||
- Section 4.7 title and all references
|
||||
- Glossary entry
|
||||
- Table 7.1 (Performance benchmarks)
|
||||
- Any other mentions throughout the document
|
||||
|
||||
### Find and Replace:
|
||||
- "Lightning Search" → "Temporal-Semantic Search"
|
||||
- "lightning search" → "temporal-semantic search"
|
||||
|
||||
### Update Section 4.7 Introduction:
|
||||
**Current:** "Lightning Search combines traditional full-text search with AI-powered semantic understanding..."
|
||||
|
||||
**Proposed:** "Temporal-Semantic Search represents a breakthrough in file discovery, combining SQLite's FTS5 full-text search with AI-powered vector embeddings. The 'temporal' aspect leverages file timestamps and access patterns, while 'semantic' understanding enables natural language queries that find files by meaning, not just keywords."
|
||||
|
||||
---
|
||||
|
||||
## 2. Add Section 4.1.6: Semantic Content Types
|
||||
|
||||
### Location: After Section 4.1.5 (Virtual Sidecar System)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsubsection{Semantic Content Types}
|
||||
|
||||
While traditional file systems rely solely on MIME types and extensions, Spacedrive introduces \textbf{Semantic Content Types} that understand the actual structure and meaning of data. This system transforms Spacedrive from a simple file manager into an enterprise-grade knowledge base.
|
||||
|
||||
\paragraph{Beyond MIME Types}
|
||||
Semantic Content Types extend file identification to include:
|
||||
\begin{itemize}
|
||||
\item \textbf{Structured Data Extraction}: Email files (.eml, .msg) are parsed to extract sender, recipients, subject, and date into queryable fields
|
||||
\item \textbf{Compound Document Understanding}: Web archives (.warc, .maff) maintain relationships between HTML, CSS, images, and other assets
|
||||
\item \textbf{Domain-Specific Metadata}: Scientific datasets (.hdf5, .netcdf) expose internal structure and variables
|
||||
\item \textbf{Relationship Mapping}: Project files link to their dependencies and outputs
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Implementation via Virtual Sidecars}
|
||||
The Virtual Sidecar System (Section 4.1.5) provides the perfect mechanism for semantic types:
|
||||
|
||||
\begin{lstlisting}[language=json, caption=Example semantic sidecar for an email]
|
||||
{
|
||||
"content_type": "email/rfc822",
|
||||
"semantic_type": "EmailMessage",
|
||||
"extracted_data": {
|
||||
"from": "sender@example.com",
|
||||
"to": ["recipient@example.com"],
|
||||
"subject": "Q3 Financial Report",
|
||||
"date": "2024-03-15T10:30:00Z",
|
||||
"has_attachments": true,
|
||||
"attachment_count": 2,
|
||||
"thread_id": "conv-12345"
|
||||
},
|
||||
"search_vectors": {
|
||||
"subject_embedding": [0.23, -0.45, ...],
|
||||
"body_embedding": [0.12, 0.67, ...]
|
||||
}
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
This approach enables:
|
||||
\begin{itemize}
|
||||
\item Rich queries like "emails from Alice about budgets with attachments"
|
||||
\item Preservation of original files while adding intelligence
|
||||
\item Extensibility through user-defined content types
|
||||
\item Integration with the AI Agent system for automated organization
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Enterprise Knowledge Management}
|
||||
By treating files as structured data objects rather than opaque blobs, Spacedrive becomes a powerful knowledge management platform suitable for:
|
||||
\begin{itemize}
|
||||
\item Legal discovery with deep email and document search
|
||||
\item Research data management with scientific format understanding
|
||||
\item Digital asset management with rich media metadata
|
||||
\item Compliance tracking with automated classification
|
||||
\end{itemize}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Add Section 7.X: Extensibility via WebAssembly
|
||||
|
||||
### Location: After Section 7.5 (Compatibility and Interoperability)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Extensibility via WebAssembly}
|
||||
|
||||
Spacedrive's architecture is designed for extensibility from the ground up. The WebAssembly (WASM) plugin system enables safe, performant extensions while maintaining the security and reliability guarantees of the core system.
|
||||
|
||||
\subsubsection{Plugin Architecture}
|
||||
|
||||
The WASM runtime provides a sandboxed environment where plugins can:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Define Custom Content Types}: Register new semantic types with parsing logic
|
||||
\item \textbf{Add Storage Providers}: Implement connectors for additional cloud services
|
||||
\item \textbf{Create AI Agents}: Deploy specialized agents for domain-specific workflows
|
||||
\item \textbf{Extend Actions}: Add new operations to the Action System
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Example WASM plugin interface]
|
||||
#[spacedrive_plugin]
|
||||
pub struct EmailPlugin;
|
||||
|
||||
#[spacedrive_plugin::content_type]
|
||||
impl ContentTypeHandler for EmailPlugin {
|
||||
fn can_handle(&self, entry: &Entry) -> bool {
|
||||
matches!(entry.extension(), Some("eml") | Some("msg"))
|
||||
}
|
||||
|
||||
fn extract_metadata(&self, data: &[u8]) -> Result<JsonValue> {
|
||||
// Parse email and return structured data
|
||||
}
|
||||
}
|
||||
|
||||
#[spacedrive_plugin::agent]
|
||||
impl Agent for EmailPlugin {
|
||||
fn on_file_added(&self, entry: &Entry) -> Vec<Action> {
|
||||
// Propose organization actions for new emails
|
||||
}
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Security Model}
|
||||
|
||||
WASM plugins operate under strict security constraints:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Capability-Based Security}: Plugins declare required permissions upfront
|
||||
\item \textbf{Resource Limits}: CPU, memory, and I/O are bounded
|
||||
\item \textbf{No Direct File Access}: All operations go through the VDFS API
|
||||
\item \textbf{Audited Operations}: Plugin actions are logged and can be reverted
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Distribution and Discovery}
|
||||
|
||||
The plugin ecosystem leverages modern package management:
|
||||
|
||||
\begin{itemize}
|
||||
\item Official registry for verified plugins
|
||||
\item Cryptographic signing for authenticity
|
||||
\item Automatic updates with semantic versioning
|
||||
\item Community ratings and reviews
|
||||
\end{itemize}
|
||||
|
||||
This extensibility model enables Spacedrive to grow beyond its core capabilities while maintaining the integrity and performance users expect.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Update Section 7.5.2: Cloud Service Integration
|
||||
|
||||
### Location: Section 7.5.2 (under Compatibility and Interoperability)
|
||||
|
||||
### Add after existing content:
|
||||
|
||||
```latex
|
||||
\paragraph{OpenDAL Integration}
|
||||
To achieve comprehensive cloud storage support efficiently, Spacedrive leverages OpenDAL (Open Data Access Layer), a Rust-native library providing unified access to storage services. This architectural decision offers several advantages:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Unified Interface}: Single API for S3, Azure Blob, Google Cloud Storage, WebDAV, and dozens more
|
||||
\item \textbf{Native Performance}: Zero-overhead abstractions with service-specific optimizations
|
||||
\item \textbf{Streaming Support}: Efficient handling of large files without full downloads
|
||||
\item \textbf{Automatic Retries}: Built-in resilience for unreliable network conditions
|
||||
\end{itemize}
|
||||
|
||||
Each OpenDAL backend appears as a standard Volume in Spacedrive's architecture, automatically enabling:
|
||||
\begin{itemize}
|
||||
\item Full indexing of cloud storage contents
|
||||
\item Transparent file operations across providers
|
||||
\item Unified search across all connected services
|
||||
\item Intelligent caching based on access patterns
|
||||
\end{itemize}
|
||||
|
||||
This approach exemplifies our commitment to "Zero Vendor Lock-in" while providing users seamless access to their data regardless of where it resides.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Add Section 5.4: Collaboration and Public Sharing
|
||||
|
||||
### Location: After Section 5.3 (User Benefits)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Collaboration and Public Sharing}
|
||||
|
||||
The Cloud Core architecture enables sophisticated sharing capabilities without introducing complex APIs or compromising the peer-to-peer model.
|
||||
|
||||
\subsubsection{Shared Folders via Team Libraries}
|
||||
|
||||
Collaboration in Spacedrive leverages the Library abstraction:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Team Libraries}: Shared libraries with role-based permissions
|
||||
\item \textbf{Granular Access Control}: Per-location and per-file permissions
|
||||
\item \textbf{Action Audit Trail}: Complete history of all modifications
|
||||
\item \textbf{Conflict Resolution}: Automatic handling of concurrent edits
|
||||
\end{itemize}
|
||||
|
||||
Team members connect to shared libraries exactly as they would personal ones—the Cloud Core simply acts as an always-available peer ensuring data availability.
|
||||
|
||||
\subsubsection{Public File Hosting}
|
||||
|
||||
Public sharing leverages the same infrastructure with a crucial distinction:
|
||||
|
||||
\begin{itemize}
|
||||
\item Files marked with "public" role become web-accessible
|
||||
\item Cloud Core's web frontend serves public files without authentication
|
||||
\item Automatic CDN integration for performance
|
||||
\item Analytics and access logs for content creators
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Enhanced Spacedrop}
|
||||
|
||||
The Cloud Core extends Spacedrop's capabilities:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Asynchronous Transfers}: Cloud Core holds files until recipients connect
|
||||
\item \textbf{Persistent Links}: Share links remain valid indefinitely
|
||||
\item \textbf{Large File Support}: No size limits with resumable transfers
|
||||
\item \textbf{Access Control}: Optional passwords and expiration dates
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=text, caption=Spacedrop link examples]
|
||||
# Direct P2P (ephemeral)
|
||||
spacedrop://device-id/transfer-id
|
||||
|
||||
# Cloud-assisted (persistent)
|
||||
https://drop.spacedrive.com/abc123
|
||||
|
||||
# Self-hosted relay
|
||||
https://relay.company.com/drop/xyz789
|
||||
\end{lstlisting}
|
||||
|
||||
This unified approach to sharing—from private team collaboration to public content distribution—demonstrates how core P2P primitives scale to support diverse use cases without architectural compromises.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Update Section 4.5.2: Iroh-Powered Network Infrastructure
|
||||
|
||||
### Location: Add clarification about relay flexibility
|
||||
|
||||
### Add after the relay description:
|
||||
|
||||
```latex
|
||||
\paragraph{Self-Hosted Relay Infrastructure}
|
||||
While Spacedrive provides public relay servers for convenience, the architecture fully supports self-hosted deployments:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Zero-Trust Option}: Organizations can run private relay networks
|
||||
\item \textbf{Simple Deployment}: Single binary with minimal configuration
|
||||
\item \textbf{Geographic Distribution}: Deploy relays near users for optimal performance
|
||||
\item \textbf{Compliance Ready}: Keep all traffic within organizational boundaries
|
||||
\end{itemize}
|
||||
|
||||
This flexibility makes Spacedrive suitable for:
|
||||
\begin{itemize}
|
||||
\item Enterprises requiring complete data sovereignty
|
||||
\item Regions with data residency requirements
|
||||
\item Air-gapped networks with no external connectivity
|
||||
\item Organizations building private overlay networks (similar to Tailscale)
|
||||
\end{itemize}
|
||||
|
||||
The relay service can be deployed as a standalone component, in Kubernetes, or as a managed service, providing deployment flexibility to match any infrastructure requirement.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Expand Table 7.1: Performance Benchmarks
|
||||
|
||||
### Location: Section 7.1 (Performance Evaluation)
|
||||
|
||||
### Replace existing table with:
|
||||
|
||||
```latex
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\caption{Performance benchmarks across storage tiers (M2 MacBook Pro, 16GB RAM)}
|
||||
\label{tab:performance}
|
||||
\begin{tabular}{lrr}
|
||||
\toprule
|
||||
\textbf{Metric} & \textbf{Value} & \textbf{Unit} \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Indexing Throughput}} \\
|
||||
\quad Internal NVMe SSD & 8,500 & files/sec \\
|
||||
\quad External USB 3.2 SSD & 6,200 & files/sec \\
|
||||
\quad Network Attached Storage & 3,100 & files/sec \\
|
||||
\quad External HDD (USB 3.0) & 1,850 & files/sec \\
|
||||
\quad Cloud Storage (S3 Standard) & 450 & files/sec \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Search Latency (1M entries)}} \\
|
||||
\quad Temporal Search (FTS5) & 55 & ms \\
|
||||
\quad Semantic Search (Vector) & 95 & ms \\
|
||||
\quad Combined Temporal-Semantic & 110 & ms \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Memory Usage}} \\
|
||||
\quad Base daemon & 45 & MB \\
|
||||
\quad Per 1M indexed files & 105 & MB \\
|
||||
\quad With active P2P connections & +15 & MB/peer \\
|
||||
\midrule
|
||||
\multicolumn{3}{l}{\textit{Network Performance}} \\
|
||||
\quad P2P transfer (LAN) & 110 & MB/s \\
|
||||
\quad P2P transfer (WAN w/ relay) & 45 & MB/s \\
|
||||
\quad NAT traversal success rate & 92 & \% \\
|
||||
\quad Connection establishment & 1.8 & seconds \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\textit{Note: Indexing throughput varies based on file size distribution and metadata complexity. Tests used a representative dataset of mixed document types with average size of 250KB.}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Update Section 4.4: The Transactional Action System
|
||||
|
||||
### Location: In the Simulation Engine description
|
||||
|
||||
### Update the paragraph about pre-visualization to include:
|
||||
|
||||
```latex
|
||||
\paragraph{Intelligent Time Estimation}
|
||||
The Simulation Engine combines multiple data sources to provide accurate operation time estimates:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Volume Performance Metrics}: Real-time read/write speeds from continuous monitoring
|
||||
\item \textbf{Network Conditions}: Current bandwidth and latency from Iroh's measurements
|
||||
\item \textbf{Historical Data}: Previous operations on similar files and paths
|
||||
\item \textbf{Operation Complexity}: Number of files, total size, and fragmentation
|
||||
\end{itemize}
|
||||
|
||||
For example, when copying 10GB across devices, the estimation considers:
|
||||
\begin{itemize}
|
||||
\item Source volume read speed: 250 MB/s (measured)
|
||||
\item Network throughput: 45 MB/s (current P2P bandwidth)
|
||||
\item Destination write speed: 180 MB/s (measured)
|
||||
\item Bottleneck: Network at 45 MB/s
|
||||
\item Estimated time: 3 minutes 45 seconds (with 10\% buffer)
|
||||
\end{itemize}
|
||||
|
||||
This transparency helps users make informed decisions about when and how to execute operations.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. Add Section 8.5: Balancing Privacy and Public Sharing
|
||||
|
||||
### Location: After Section 8.4 (Incident Response)
|
||||
|
||||
### Proposed Text:
|
||||
|
||||
```latex
|
||||
\subsection{Balancing Privacy and Public Sharing}
|
||||
|
||||
Spacedrive's security model accommodates both zero-knowledge privacy and public content sharing through its library-based architecture.
|
||||
|
||||
\subsubsection{Per-Library Encryption Policy}
|
||||
|
||||
Each library maintains independent encryption settings:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Private Libraries} (default): Full SQLCipher encryption at rest
|
||||
\item \textbf{Public Libraries} (opt-in): Unencrypted for web serving
|
||||
\item \textbf{Hybrid Libraries}: Encrypted with selective public locations
|
||||
\end{itemize}
|
||||
|
||||
\begin{lstlisting}[language=Rust, caption=Library encryption configuration]
|
||||
pub struct LibraryConfig {
|
||||
pub encryption: EncryptionMode,
|
||||
pub public_sharing: PublicSharingConfig,
|
||||
}
|
||||
|
||||
pub enum EncryptionMode {
|
||||
/// Full encryption (default)
|
||||
Encrypted { key_derivation: Argon2id },
|
||||
/// No encryption (for public content)
|
||||
Unencrypted,
|
||||
/// Encrypted with public locations
|
||||
Hybrid { public_locations: Vec<LocationId> },
|
||||
}
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Secure Public Sharing Workflow}
|
||||
|
||||
Users can share content publicly without compromising private data:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Create a dedicated public library on Cloud Core
|
||||
\item Move/copy files to public library locations
|
||||
\item Cloud Core serves these files via HTTPS
|
||||
\item Private libraries remain fully encrypted
|
||||
\end{enumerate}
|
||||
|
||||
\subsubsection{Implementation Considerations}
|
||||
|
||||
This dual-mode approach ensures:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Clear Boundaries}: Users explicitly choose what becomes public
|
||||
\item \textbf{No Encryption Downgrade}: Private libraries cannot be converted to public
|
||||
\item \textbf{Audit Trail}: All public sharing actions are logged
|
||||
\item \textbf{Revocable Access}: Public files can be made private instantly
|
||||
\end{itemize}
|
||||
|
||||
By making encryption optional but enabled by default, Spacedrive provides flexibility for content creators and enterprises while maintaining strong privacy guarantees for personal data.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Minor Updates Throughout
|
||||
|
||||
### Executive Summary - Key Features
|
||||
Update the feature list to reflect new terminology:
|
||||
- Change "Lightning Search" to "Temporal-Semantic Search"
|
||||
- Add bullet: "• Extensible via WebAssembly plugins"
|
||||
|
||||
### Glossary Updates
|
||||
- Remove "Lightning Search" entry
|
||||
- Add "Temporal-Semantic Search: Hybrid search combining temporal (time-based) full-text search with semantic (meaning-based) vector search"
|
||||
- Add "OpenDAL: Open Data Access Layer, providing unified access to cloud storage services"
|
||||
- Add "Semantic Content Types: Advanced file type system that understands data structure and meaning beyond MIME types"
|
||||
|
||||
### Section 2 (Related Work)
|
||||
Add a paragraph comparing Spacedrive's extensibility approach to other systems:
|
||||
|
||||
```latex
|
||||
\paragraph{Extensibility Models}
|
||||
Unlike systems that require native plugins (Finder, Nautilus) or rely on scripting languages (Obsidian, VS Code), Spacedrive's WebAssembly approach provides both safety and performance. This positions it uniquely as an enterprise-ready platform that can be extended without compromising security or stability.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
1. **High Priority** (Core value propositions):
|
||||
- Semantic Content Types (Section 4.1.6)
|
||||
- Temporal-Semantic Search rename
|
||||
- Cloud Storage via OpenDAL
|
||||
- Collaboration and Public Sharing (Section 5.4)
|
||||
|
||||
2. **Medium Priority** (Important but not critical path):
|
||||
- WebAssembly Extensibility
|
||||
- Enhanced benchmarks table
|
||||
- Time estimation details
|
||||
- Self-hosted relay clarification
|
||||
|
||||
3. **Low Priority** (Nice to have):
|
||||
- Minor wording updates
|
||||
- Glossary additions
|
||||
- Related work comparison
|
||||
|
||||
---
|
||||
|
||||
## Notes for Reviewers
|
||||
|
||||
- All proposed changes maintain the academic tone and technical rigor of the original
|
||||
- New sections integrate seamlessly with existing architecture
|
||||
- No changes compromise the core principles (Local-First, Privacy, P2P, etc.)
|
||||
- Implementation details are realistic based on current codebase analysis
|
||||
- The changes position Spacedrive as both consumer-friendly and enterprise-ready
|
||||
Reference in New Issue
Block a user