From 40cc780ec1726e5ea21edd5ce72ab68faf5a6035 Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Sat, 23 Apr 2022 01:12:35 -0700 Subject: [PATCH] added unique file identifier --- Cargo.lock | Bin 217630 -> 215866 bytes apps/debug/Cargo.toml | 24 ++-- apps/debug/src/main.rs | 9 +- apps/landing/src/App.tsx | 2 +- apps/server/src/main.rs | 4 +- .../migrations/20220422102144_/migration.sql | 32 +++++ core/prisma/schema.prisma | 2 +- core/src/encode/metadata.rs | 2 +- core/src/file/cas/identifier.rs | 127 ++++++++++++++++++ core/src/file/cas/mod.rs | 1 + core/src/lib.rs | 6 + docs/product/faq.md | 2 - packages/interface/src/screens/Spaces.tsx | 60 ++------- .../src/screens/settings/GeneralSettings.tsx | 4 + 14 files changed, 207 insertions(+), 68 deletions(-) create mode 100644 core/prisma/migrations/20220422102144_/migration.sql diff --git a/Cargo.lock b/Cargo.lock index f10fca53e69b9288bf09b30b8c1da20acef226e2..d71f4e991b7598189493248d281f434782ee9b75 100644 GIT binary patch delta 69 zcmV-L0J{I4qz$^34X{@;vl}$pRF`sU0w0s0iWIZQi6*AE)Gz^uOSfKq0csw%i<|)< b*0+;$0s}?2ueky%1($`V0T;K#zXDp44SgLX delta 896 zcmYk5zl&W(6vw%DS<#mSP2p}}QJ>F3A=z-|_l)313rS()kAPqk&di+QJ$LWB;l8`( zhgGZ+u!&C`Eer;Ng_U88C!(FTkj74kg@1sJtzHt8w3Fl*B!`TXb-*ARVZ93h zm+a>=N0-F!m-09F>yw+)uAR5}(L43alYMM3;{GmOo2ALQ(Rk8e-?fWa<_D07cRvC>@O~I#475JS3m*-L1Dq&0&u{U2f+gKRl{- zR=3Yrx88c<`0@GIze^P&aDq4`5rGU^&^#iR3j|ZaVoc0=6HC1mrH?|Oiz&%a7D=d> zA6%}tcl@k>jA9UWWtzrto|Taw^wrtb=^NFD&yOCzh4MdLg|9OmlC})F@G=5Qk%Cab zWne~2VUmj>T4tjck|0TrUMh`1q%cZnd}g0iXI3A7S&e@?y>9mCBkC5jc0PV>JYmaw zPrkP&$x1XyIUT^VD3puE51`QsYYZ#kg9uXEvcZU9(aJP2X*jK? zXma`H=k;@|ujcjUo1_20F7N#0_jJ&TdjQSW6chlr60}I@6)!DR#G{YE)Y_sUf(^H|@2tshO01?Fk af=S`Pa~?v`LSVtT_3v0c`l0^dvws2Bf)juM diff --git a/apps/debug/Cargo.toml b/apps/debug/Cargo.toml index 1782be1c3..455bbeb18 100644 --- a/apps/debug/Cargo.toml +++ b/apps/debug/Cargo.toml @@ -6,16 +6,16 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = "1.0.56" -data-encoding = "2.3.2" -kamadak-exif = "0.5.4" -ring = "0.16.20" -thumbnailer = "0.4.0" -mime = "0.3.16" -sha256 = "1.0.3" -ffmpeg-next = "5.0.3" -sdcore = { path = "../../core" } -uhlc = "0.4.1" -matroska = "0.11.0" +# anyhow = "1.0.56" +# data-encoding = "2.3.2" +# kamadak-exif = "0.5.4" +# ring = "0.16.20" +# thumbnailer = "0.4.0" +# mime = "0.3.16" +# sha256 = "1.0.3" +# ffmpeg-next = "5.0.3" +# sdcore = { path = "../../core" } +# uhlc = "0.4.1" +# matroska = "0.11.0" tokio = { version = "1.17.0", features = ["sync", "rt"] } -chrono = "0.4.19" +# chrono = "0.4.19" diff --git a/apps/debug/src/main.rs b/apps/debug/src/main.rs index 580685540..7fe22041c 100644 --- a/apps/debug/src/main.rs +++ b/apps/debug/src/main.rs @@ -2,5 +2,10 @@ // use sdcore::{prisma, sync::engine::test, sync::FakeCoreContext}; -#[tokio::main] -async fn main() {} +use std::fs::File; + +fn main() { + let file = File::open("/Users/james/Desktop/Cloud/preview.mp4").unwrap(); + + println!("{:?}", file.metadata().unwrap()) +} diff --git a/apps/landing/src/App.tsx b/apps/landing/src/App.tsx index 7e90200b9..e127364b7 100644 --- a/apps/landing/src/App.tsx +++ b/apps/landing/src/App.tsx @@ -49,7 +49,7 @@ function App() { The file explorer from the future

- Spacedrive allows you to manage files across all devices, drives and clouds at once. + Manage files across all devices, drives and clouds from one place.
Designed for creators, hoarders and the painfully disorganized.

diff --git a/apps/server/src/main.rs b/apps/server/src/main.rs index 0b2a2f504..d189cc588 100644 --- a/apps/server/src/main.rs +++ b/apps/server/src/main.rs @@ -19,5 +19,7 @@ async fn main() { tokio::spawn(async move { core.start().await; - }); + }) + .await + .unwrap(); } diff --git a/core/prisma/migrations/20220422102144_/migration.sql b/core/prisma/migrations/20220422102144_/migration.sql new file mode 100644 index 000000000..fddd58288 --- /dev/null +++ b/core/prisma/migrations/20220422102144_/migration.sql @@ -0,0 +1,32 @@ +/* + Warnings: + + - You are about to drop the column `streams_json` on the `media_data` table. All the data in the column will be lost. + - A unique constraint covering the columns `[cas_id]` on the table `files` will be added. If there are existing duplicate values, this will fail. + +*/ +-- RedefineTables +PRAGMA foreign_keys=OFF; +CREATE TABLE "new_media_data" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "pixel_width" INTEGER, + "pixel_height" INTEGER, + "longitude" REAL, + "latitude" REAL, + "fps" INTEGER, + "capture_device_make" TEXT, + "capture_device_model" TEXT, + "capture_device_software" TEXT, + "duration_seconds" INTEGER, + "codecs" TEXT, + "streams" INTEGER, + CONSTRAINT "media_data_id_fkey" FOREIGN KEY ("id") REFERENCES "files" ("id") ON DELETE CASCADE ON UPDATE CASCADE +); +INSERT INTO "new_media_data" ("capture_device_make", "capture_device_model", "capture_device_software", "codecs", "duration_seconds", "fps", "id", "latitude", "longitude", "pixel_height", "pixel_width") SELECT "capture_device_make", "capture_device_model", "capture_device_software", "codecs", "duration_seconds", "fps", "id", "latitude", "longitude", "pixel_height", "pixel_width" FROM "media_data"; +DROP TABLE "media_data"; +ALTER TABLE "new_media_data" RENAME TO "media_data"; +PRAGMA foreign_key_check; +PRAGMA foreign_keys=ON; + +-- CreateIndex +CREATE UNIQUE INDEX "files_cas_id_key" ON "files"("cas_id"); diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index 7ca968982..8a87994a6 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -93,7 +93,7 @@ model File { id Int @id @default(autoincrement()) // content addressable storage id - sha256 // this does not need to be unique, as incoming replicas will always ignore if at least one exists - cas_id String + cas_id String @unique // full byte contents digested into sha256 checksum integrity_checksum String? @unique // basic metadata diff --git a/core/src/encode/metadata.rs b/core/src/encode/metadata.rs index 4b94238df..93e782cdd 100644 --- a/core/src/encode/metadata.rs +++ b/core/src/encode/metadata.rs @@ -1,7 +1,7 @@ extern crate ffmpeg_next as ffmpeg; use chrono::NaiveDateTime; use ffmpeg::{dictionary::Iter, format}; -use std::{env, ffi::OsStr, fs, path::Path}; +use std::{ffi::OsStr, path::Path}; #[derive(Default, Debug)] pub struct MediaItem { diff --git a/core/src/file/cas/identifier.rs b/core/src/file/cas/identifier.rs index e69de29bb..34cbbb8c6 100644 --- a/core/src/file/cas/identifier.rs +++ b/core/src/file/cas/identifier.rs @@ -0,0 +1,127 @@ +use crate::job::jobs::JobReportUpdate; +use crate::{ + file::FileError, + job::{jobs::Job, worker::WorkerContext}, + prisma::{self, file_path}, + CoreContext, +}; +use anyhow::Result; +use futures::executor::block_on; +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Debug)] +pub struct FileCreated { + pub id: i32, + pub cas_id: String, +} + +#[derive(Debug)] +pub struct FileIdentifierJob; + +#[async_trait::async_trait] +impl Job for FileIdentifierJob { + async fn run(&self, ctx: WorkerContext) -> Result<()> { + let total_count = count_orphan_file_paths(&ctx.core_ctx).await?; + println!("Found {} orphan file paths", total_count); + + let task_count = (total_count as f64 / 100f64).ceil() as usize; + + println!("Will process {} tasks", task_count); + + // update job with total task count based on orphan file_paths count + ctx.progress(vec![JobReportUpdate::TaskCount(task_count)]); + + let db = ctx.core_ctx.database.clone(); + + let ctx = tokio::task::spawn_blocking(move || { + let mut completed: usize = 0; + + while completed < task_count { + let file_paths = block_on(get_orphan_file_paths(&ctx.core_ctx, completed * 100)).unwrap(); + println!("Processing: {:?}", file_paths); + let mut rows: Vec = Vec::new(); + for file_path in file_paths.iter() { + if file_path.temp_cas_id.is_none() { + continue; + } + rows.push(prepare_file_values(file_path)); + } + if rows.len() == 0 { + break; + } + let insert_files = format!( + r#"INSERT INTO files (cas_id, size_in_bytes) VALUES {} ON CONFLICT (cas_id) DO NOTHING RETURNING id, cas_id"#, + rows.join(", ") + ); + println!("{}", insert_files); + let files: Vec = block_on(db._query_raw(&insert_files)).unwrap(); + + println!("FILES: {:?}", files); + + for file in files.iter() { + let update_file_path = format!( + r#"UPDATE file_paths SET file_id = "{}" WHERE temp_cas_id = "{}""#, + file.id, file.cas_id + ); + println!("UPDATING PATH: {}", update_file_path); + block_on(db._execute_raw(&update_file_path)).unwrap(); + } + + completed += 1; + println!("completed: {}", completed); + ctx.progress(vec![JobReportUpdate::CompletedTaskCount(completed)]); + } + ctx + }).await?; + + let remaining = count_orphan_file_paths(&ctx.core_ctx).await?; + + if remaining > 0 { + ctx.core_ctx.spawn_job(Box::new(FileIdentifierJob)); + } + + Ok(()) + } +} + +#[derive(Deserialize, Serialize, Debug)] +struct CountRes { + count: Option, +} + +pub async fn count_orphan_file_paths(ctx: &CoreContext) -> Result { + let db = &ctx.database; + let files_count = db + ._query_raw::( + r#"SELECT COUNT(*) AS count FROM file_paths WHERE file_id IS NULL AND is_dir IS FALSE"#, + ) + .await?; + Ok(files_count[0].count.unwrap_or(0)) +} + +pub async fn get_orphan_file_paths( + ctx: &CoreContext, + offset: usize, +) -> Result, FileError> { + let db = &ctx.database; + println!("offset: {}", offset); + let files = db + .file_path() + .find_many(vec![ + file_path::file_id::equals(None), + file_path::is_dir::equals(false), + ]) + .skip(offset) + .take(100) + .exec() + .await?; + Ok(files) +} + +pub fn prepare_file_values(file_path: &file_path::Data) -> String { + format!( + "(\"{}\",\"{}\")", + file_path.temp_cas_id.as_ref().unwrap(), + "0" + ) +} diff --git a/core/src/file/cas/mod.rs b/core/src/file/cas/mod.rs index 239eb62db..43cded2f2 100644 --- a/core/src/file/cas/mod.rs +++ b/core/src/file/cas/mod.rs @@ -1 +1,2 @@ pub mod checksum; +pub mod identifier; diff --git a/core/src/lib.rs b/core/src/lib.rs index 357f49944..03c741239 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,3 +1,4 @@ +use crate::file::cas::identifier::FileIdentifierJob; use job::jobs::{Job, JobReport, Jobs}; use log::{error, info}; use prisma::PrismaClient; @@ -269,6 +270,10 @@ impl Core { fs::remove_file(Path::new(&self.state.data_path).join("library.db")).unwrap(); CoreResponse::Success(()) } + ClientCommand::IdentifyUniqueFiles => { + ctx.spawn_job(Box::new(FileIdentifierJob)); + CoreResponse::Success(()) + } }) } @@ -328,6 +333,7 @@ pub enum ClientCommand { SysVolumeUnmount { id: i32 }, GenerateThumbsForLocation { id: i32, path: String }, PurgeDatabase, + IdentifyUniqueFiles, } // represents an event this library can emit diff --git a/docs/product/faq.md b/docs/product/faq.md index 44035c139..bbe08cf68 100644 --- a/docs/product/faq.md +++ b/docs/product/faq.md @@ -1,5 +1,3 @@ - - ## What is it? Spacedrive is a cross platform app that allows you to manage files across all devices, drives and clouds at once. Check out the [readme](https://github.com/spacedriveapp) for more detailed info. diff --git a/packages/interface/src/screens/Spaces.tsx b/packages/interface/src/screens/Spaces.tsx index a0962d1cb..1732e7782 100644 --- a/packages/interface/src/screens/Spaces.tsx +++ b/packages/interface/src/screens/Spaces.tsx @@ -2,60 +2,24 @@ import { useBridgeQuery } from '@sd/client'; import React from 'react'; import ReactJson from 'react-json-view'; import FileItem from '../components/file/FileItem'; +import CodeBlock from '../components/primitive/Codeblock'; import { Tag } from '../components/primitive/Tag'; export const SpacesScreen: React.FC<{}> = (props) => { const { data: client } = useBridgeQuery('ClientGetState'); + const { data: jobs } = useBridgeQuery('JobGetRunning'); + const { data: jobHistory } = useBridgeQuery('JobGetHistory'); return ( -
-
-

Rust level client state:

- +
+
+

Developer Debugger

+

Running Jobs

+ +

Job History

+ +

Client State

+
- {/*
*/} - {/* Videos*/} - {/* DSLR Photos*/} - {/* Camera Roll*/} - {/* NFTs*/} - {/* Screenshots*/} - {/* Documents*/} - {/* Repositories*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/*
*/} - {/* /!*
*/} - {/*
*/} - {/*
*/} - {/*
*!/*/} - {/*
*/}
); }; diff --git a/packages/interface/src/screens/settings/GeneralSettings.tsx b/packages/interface/src/screens/settings/GeneralSettings.tsx index 95743a60f..ae39bb288 100644 --- a/packages/interface/src/screens/settings/GeneralSettings.tsx +++ b/packages/interface/src/screens/settings/GeneralSettings.tsx @@ -18,6 +18,7 @@ export default function GeneralSettings() { alert('Database purged'); } }); + const { mutate: identifyFiles } = useBridgeCommand('IdentifyUniqueFiles'); return (
@@ -37,6 +38,9 @@ export default function GeneralSettings() { +
{/*