From e648dd2e2b51a72f6f73e223871eb937336bb2cd Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Mon, 11 Apr 2022 21:42:32 -0700 Subject: [PATCH] docs and plans --- .gitignore | 1 + docs/architecture/distributed-data-sync.md | 45 +++++++--------------- docs/architecture/virtual-filesystem.md | 2 +- extensions/apple-photos/README.md | 39 +++++++++++++++++++ extensions/twitter-history/Cargo.toml | 0 5 files changed, 54 insertions(+), 33 deletions(-) create mode 100644 extensions/apple-photos/README.md create mode 100644 extensions/twitter-history/Cargo.toml diff --git a/.gitignore b/.gitignore index 4716a7d46..ca2f6b272 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ packages/*/data apps/*/data docs/public/*.st docs/public/*.toml +dev.db !cli/cmd/turbo cli/npm/turbo-android-arm64/bin diff --git a/docs/architecture/distributed-data-sync.md b/docs/architecture/distributed-data-sync.md index 21a572cc2..cfc5aad90 100644 --- a/docs/architecture/distributed-data-sync.md +++ b/docs/architecture/distributed-data-sync.md @@ -20,8 +20,8 @@ mod sync { // we can now impl specfic CRDT traits to given resources enum SyncResource { FilePath(dyn Replicate), - File(dyn OperationalTransform), - Tag(dyn OperationalTransform), + File(dyn PropertyOperation), + Tag(dyn PropertyOperation), TagOnFile(dyn LastWriteWin), Jobs(dyn Replicate + OperationalTransform) } @@ -31,29 +31,19 @@ mod sync { ## Data Types -Data is divided into several kinds, Shared, Relational and Owned. +Data is divided into several kinds, Shared and Owned. - **Shared data** - Can be created and modified by any client. Has a `uuid`. - *Sync Method:* `Operational transform*` + *Sync Method:* `Property operation*` > Shared resources could be,`files`, `tags`, `comments`, `albums` and `labels`. Since these can be created, updated or deleted by any client at any time. -- **Relational data** - Can be created and modified by any client. Links two UUIDs by local IDs. - - *Sync Method:* `Last write wins (LWW)` - - > Any many-to-many tables do not store UUIDs, we have to handle this data specifically. Querying for the resources local IDs before creating or deleting the relation. - - **Owned data** - Can only be modified by the client that created it. Has a `client_id` and `uuid`. *Sync Method:* `Replicate` > Owned resources would be `file_paths`, `jobs`, `locations` and `media_data`, since a client is the single source of truth for this data. This means we can perform conflict free synchronization. -- **Offline data** - Not synchronized at all. - - > For example `logs`, `pending_operations` and `_migrations`. These are static and not part of this system. - **Shared data doesn't always use this method, in some cases we can create shared resources in bulk, where conflicts are handled by simply merging. More on that in [Synchronization Strategy]()*. @@ -120,17 +110,20 @@ Owned data → Bulk shared data → Shared data → Relational data ### Types of CRDT: ```rust -trait OperationalTransform; - -trait LastWriteWin; +trait PropertyOperation; trait Replicate; ``` -- **Operational Transform** - Update Shared resources at a property level. Operations stored in `pending_operations` table. -- **Last Write Win** - The most recent event will always be applied, used for many-to-many datasets. +- **PropertyOperation** - Update Shared resources at a property level. Operations stored in `pending_operations` table. - **Replicate** - Used exclusively for Owned data, clients will replicate with no questions asked. +- ~~**Last Write Win** - The most recent event will always be applied, used for many-to-many datasets.~~ + + + + + ## Operations @@ -139,7 +132,7 @@ Operations perform a Shared data change, they are cached in the database as `pen Operations are removed once all online clients have received the payload. ```rust -struct OperationalTransform { +struct PropertyOperation { method: OperationMethod, // the name of the database table resource_type: String, @@ -272,18 +265,6 @@ Files also impempent `OperationalMerge` would use -## Ingesting Sync Events - - - - - - - - - - - diff --git a/docs/architecture/virtual-filesystem.md b/docs/architecture/virtual-filesystem.md index 116bc038c..f3bd8e50d 100644 --- a/docs/architecture/virtual-filesystem.md +++ b/docs/architecture/virtual-filesystem.md @@ -36,7 +36,7 @@ struct File { } ``` -- `partial_checksum ` - A SHA256 checksum generated from 5 samples of 10,000 bytes throughout the file data, including the begining and end. This is used to identify a file as *likely* unique in under 100µs. +- `partial_checksum ` - A SHA256 checksum generated from 5 samples of 10,000 bytes throughout the file data, including the begining and end + total byte count. This is used to identify a file as *likely* unique in under 100µs. > ~~It is impossible to have a unique constraint at a database level for the `partial_checksum` however we can asyncronously resolve conflicts by querying for duplicates and generating full checksums at a later date.~~ > diff --git a/extensions/apple-photos/README.md b/extensions/apple-photos/README.md new file mode 100644 index 000000000..10324e037 --- /dev/null +++ b/extensions/apple-photos/README.md @@ -0,0 +1,39 @@ +This extension must first register an indexer context to prevent the indexer from scanning the photo library + +```rust +struct IndexerContext { + key: String, + is_dir: bool, + extension: Option, + must_contain: Vec, + always_ignored: Option + scan: bool, +} +``` + +```rust +core.register_context(IndexerContext { + key: "apple-photo-library", + is_dir: false, + extension: ".photoslibrary", + must_contain: vec!["database", "originals"], + always_ignored: None, + scan: false, // apple-photos extension takes care of scan +}); + +core.register_context(IndexerContext { + key: "github-repository", + is_dir: true, + extension: None, + must_contain: vec![".git"], + always_ignored: Some("node_modules", "target") + scan: true, +}); +``` + +For Apple Photos we need: +- Hidden/Favorite items +- Live photo support +- Original creation date +- Edited photos +- Albums diff --git a/extensions/twitter-history/Cargo.toml b/extensions/twitter-history/Cargo.toml new file mode 100644 index 000000000..e69de29bb