diff --git a/README.md b/README.md index f303acfa3..a5b7c7ea0 100644 --- a/README.md +++ b/README.md @@ -155,8 +155,8 @@ These options are the same regardless of whether you use them with the the file. An attempt to move the file to that branch will occur (keeping all metadata possible) and if successful the original is unlinked and the write retried. (default: false, true = mfs) -* **inodecalc=passthrough|path-hash|devino-hash|hybrid-hash**: Selects - the inode calculation algorithm. (default: hybrid-hash) +* **inodecalc=passthrough|path-hash|devino-hash|basepath-hash|hybrid-hash|basehybrid-hash**: + Selects the inode calculation algorithm. (default: hybrid-hash) * **dropcacheonclose=BOOL**: When a file is requested to be closed call `posix_fadvise` on it first to instruct the kernel that we no longer need the data and it can drop its cache. Recommended when @@ -444,12 +444,23 @@ covering different usecases. different file or files move out of band but will present the same inode for underlying files that do too. * devino-hash32: 32bit version of devino-hash. +* basepath-hash: Hashes the branch base path along with + the inode of the underlying entry. This has a similar purpose to + devino-hash, but by using the path instead of the device-id, the inodes + will be guaranteed to be stable across reboots. Useful for backup or + deduplication systems that rely on a static inode. Note that if the + root directory is below the mountpoint of the underlying storage, + duplicate inodes are possible. +* basepath-hash32: 32bit version of basepath-hash. * hybrid-hash: Performs `path-hash` on directories and `devino-hash` on other file types. Since directories can't have hard links the static value won't make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default. * hybrid-hash32: 32bit version of hybrid-hash. +* basehybrid-hash: Serves the same purpose as `hybrid-hash` but using + the `basepath-hash` algorithm for files. +* basehybrid-hash32: 32bit version of basehybrid-hash 32bit versions are provided as there is some software which does not handle 64bit inodes well. diff --git a/src/fileinfo.hpp b/src/fileinfo.hpp index 4c8beaba8..d9eb0a490 100644 --- a/src/fileinfo.hpp +++ b/src/fileinfo.hpp @@ -27,16 +27,19 @@ class FileInfo : public FH { public: FileInfo(int const fd_, + const std::string &basepath_, char const *fusepath_, bool const direct_io_) : FH(fusepath_), fd(fd_), + basepath(basepath_), direct_io(direct_io_) { } public: int fd; + const std::string basepath; uint32_t direct_io:1; std::mutex mutex; }; diff --git a/src/fs_inode.cpp b/src/fs_inode.cpp index 19098440c..96ad766f0 100644 --- a/src/fs_inode.cpp +++ b/src/fs_inode.cpp @@ -18,6 +18,7 @@ #include "ef.hpp" #include "errno.hpp" +#include "fmt/core.h" #include "fs_inode.hpp" #include "wyhash.h" @@ -28,9 +29,9 @@ #include #include -typedef uint64_t (*inodefunc_t)(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); +typedef uint64_t (*inodefunc_t)(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t); -static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,const ino_t); +static uint64_t hybrid_hash(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t); static inodefunc_t g_func = hybrid_hash; @@ -44,7 +45,8 @@ h64_to_h32(uint64_t h_) static uint64_t -passthrough(const char *fusepath_, +passthrough(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -55,7 +57,8 @@ passthrough(const char *fusepath_, static uint64_t -path_hash(const char *fusepath_, +path_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -69,7 +72,8 @@ path_hash(const char *fusepath_, static uint64_t -path_hash32(const char *fusepath_, +path_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -77,7 +81,8 @@ path_hash32(const char *fusepath_, { uint64_t h; - h = path_hash(fusepath_, + h = path_hash(basepath_, + fusepath_, fusepath_len_, mode_, dev_, @@ -88,7 +93,8 @@ path_hash32(const char *fusepath_, static uint64_t -devino_hash(const char *fusepath_, +devino_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -107,7 +113,8 @@ devino_hash(const char *fusepath_, static uint64_t -devino_hash32(const char *fusepath_, +devino_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, @@ -115,7 +122,8 @@ devino_hash32(const char *fusepath_, { uint64_t h; - h = devino_hash(fusepath_, + h = devino_hash(basepath_, + fusepath_, fusepath_len_, mode_, dev_, @@ -126,28 +134,97 @@ devino_hash32(const char *fusepath_, static uint64_t -hybrid_hash(const char *fusepath_, +basepath_hash(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + + std::string buf = fmt::format("{}{}",ino_,basepath_); + + return wyhash(buf.c_str(), + buf.length(), + fs::inode::MAGIC, + _wyp); +} + +static +uint64_t +basepath_hash32(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + uint64_t h; + + h = basepath_hash(basepath_, + fusepath_, + fusepath_len_, + mode_, + dev_, + ino_); + + return h64_to_h32(h); +} + +static +uint64_t +hybrid_hash(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); +} + +static +uint64_t +hybrid_hash32(const std::string &basepath_, + const char *fusepath_, + const uint64_t fusepath_len_, + const mode_t mode_, + const dev_t dev_, + const ino_t ino_) +{ + return (S_ISDIR(mode_) ? + path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + devino_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); +} + +static +uint64_t +basehybrid_hash(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { return (S_ISDIR(mode_) ? - path_hash(fusepath_,fusepath_len_,mode_,dev_,ino_) : - devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_)); + path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + basepath_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); } static uint64_t -hybrid_hash32(const char *fusepath_, +basehybrid_hash32(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { return (S_ISDIR(mode_) ? - path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) : - devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_)); + path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) : + basepath_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_)); } namespace fs @@ -171,6 +248,14 @@ namespace fs g_func = hybrid_hash; ef(algo_ == "hybrid-hash32") g_func = hybrid_hash32; + ef(algo_ == "basepath-hash") + g_func = basepath_hash; + ef(algo_ == "basepath-hash32") + g_func = basepath_hash32; + ef(algo_ == "basehybrid-hash") + g_func = basehybrid_hash; + ef(algo_ == "basehybrid-hash32") + g_func = basehybrid_hash32; else return -EINVAL; @@ -194,27 +279,38 @@ namespace fs return "hybrid-hash"; if(g_func == hybrid_hash32) return "hybrid-hash32"; + if(g_func == basepath_hash) + return "basepath-hash"; + if(g_func == basepath_hash) + return "basepath-hash32"; + if(g_func == basehybrid_hash) + return "basehybrid-hash"; + if(g_func == basehybrid_hash) + return "basehybrid-hash32"; return std::string(); } uint64_t - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { - return g_func(fusepath_,fusepath_len_,mode_,dev_,ino_); + return g_func(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_); } uint64_t - calc(std::string const &fusepath_, + calc(const std::string &basepath_, + std::string const &fusepath_, const mode_t mode_, const dev_t dev_, const ino_t ino_) { - return calc(fusepath_.c_str(), + return calc(basepath_, + fusepath_.c_str(), fusepath_.size(), mode_, dev_, @@ -222,11 +318,13 @@ namespace fs } void - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, const uint64_t fusepath_len_, struct stat *st_) { - st_->st_ino = calc(fusepath_, + st_->st_ino = calc(basepath_, + fusepath_, fusepath_len_, st_->st_mode, st_->st_dev, @@ -234,17 +332,19 @@ namespace fs } void - calc(const char *fusepath_, + calc(const std::string &basepath_, + const char *fusepath_, struct stat *st_) { - calc(fusepath_,strlen(fusepath_),st_); + calc(basepath_,fusepath_,strlen(fusepath_),st_); } void - calc(const std::string &fusepath_, + calc(const std::string &basepath_, + const std::string &fusepath_, struct stat *st_) { - calc(fusepath_.c_str(),fusepath_.size(),st_); + calc(basepath_,fusepath_.c_str(),fusepath_.size(),st_); } } } diff --git a/src/fs_inode.hpp b/src/fs_inode.hpp index 11e772c83..87abb88e5 100644 --- a/src/fs_inode.hpp +++ b/src/fs_inode.hpp @@ -33,21 +33,26 @@ namespace fs int set_algo(const std::string &s); std::string get_algo(void); - uint64_t calc(const char *fusepath, + uint64_t calc(const std::string &basepath, + const char *fusepath, const uint64_t fusepath_len, const mode_t mode, const dev_t dev, const ino_t ino); - uint64_t calc(std::string const &fusepath, + uint64_t calc(const std::string &basepath, + std::string const &fusepath, mode_t const mode, dev_t const dev, ino_t ino); - void calc(const char *fusepath, + void calc(const std::string &basepath, + const char *fusepath, const uint64_t fusepath_len, struct stat *st); - void calc(const char *fusepath, + void calc(const std::string &basepath, + const char *fusepath, struct stat *st); - void calc(const std::string &fusepath, + void calc(const std::string &basepath, + const std::string &fusepath, struct stat *st); } diff --git a/src/fuse_create.cpp b/src/fuse_create.cpp index 45ee5c686..2ce27e289 100644 --- a/src/fuse_create.cpp +++ b/src/fuse_create.cpp @@ -163,7 +163,7 @@ namespace l if(rv == -1) return -errno; - fi = new FileInfo(rv,fusepath_,ffi_->direct_io); + fi = new FileInfo(rv,createpath_,fusepath_,ffi_->direct_io); ffi_->fh = reinterpret_cast(fi); diff --git a/src/fuse_fgetattr.cpp b/src/fuse_fgetattr.cpp index bc8b44d4a..485207c86 100644 --- a/src/fuse_fgetattr.cpp +++ b/src/fuse_fgetattr.cpp @@ -28,6 +28,7 @@ namespace l static int fgetattr(const int fd_, + const std::string &basepath_, const std::string &fusepath_, struct stat *st_) { @@ -37,7 +38,7 @@ namespace l if(rv == -1) return -errno; - fs::inode::calc(fusepath_,st_); + fs::inode::calc(basepath_,fusepath_,st_); return 0; } @@ -54,7 +55,7 @@ namespace FUSE Config::Read cfg; FileInfo *fi = reinterpret_cast(ffi_->fh); - rv = l::fgetattr(fi->fd,fi->fusepath,st_); + rv = l::fgetattr(fi->fd,fi->basepath,fi->fusepath,st_); timeout_->entry = ((rv >= 0) ? cfg->cache_entry : diff --git a/src/fuse_getattr.cpp b/src/fuse_getattr.cpp index 13b5ccbd6..5b9c51125 100644 --- a/src/fuse_getattr.cpp +++ b/src/fuse_getattr.cpp @@ -141,7 +141,7 @@ namespace l if(symlinkify_ && symlinkify::can_be_symlink(*st_,symlinkify_timeout_)) symlinkify::convert(fullpath,st_); - fs::inode::calc(fusepath_,st_); + fs::inode::calc(basepaths[0],fusepath_,st_); return 0; } diff --git a/src/fuse_open.cpp b/src/fuse_open.cpp index d49ed797c..db30c2c65 100644 --- a/src/fuse_open.cpp +++ b/src/fuse_open.cpp @@ -211,7 +211,7 @@ namespace l if(fd == -1) return -errno; - fi = new FileInfo(fd,fusepath_,ffi_->direct_io); + fi = new FileInfo(fd,basepath_,fusepath_,ffi_->direct_io); ffi_->fh = reinterpret_cast(fi); diff --git a/src/fuse_readdir_cor.cpp b/src/fuse_readdir_cor.cpp index 2b96c0d24..436d4a42f 100644 --- a/src/fuse_readdir_cor.cpp +++ b/src/fuse_readdir_cor.cpp @@ -77,7 +77,8 @@ namespace l static inline int - readdir(std::string basepath_, + readdir(const std::string &branchdir_, + std::string basepath_, HashSet &names_, fuse_dirents_t *buf_, std::mutex &mutex_) @@ -122,7 +123,8 @@ namespace l continue; filepath = fs::path::make(basepath_,d->name); - d->ino = fs::inode::calc(filepath, + d->ino = fs::inode::calc(branchdir_, + filepath, DTTOIF(d->type), dev, d->ino); @@ -161,7 +163,7 @@ namespace l basepath = fs::path::make(branch.path,dirname_); - return l::readdir(basepath,names,buf_,mutex); + return l::readdir(branch.path,basepath,names,buf_,mutex); }; auto rv = tp_.enqueue_task(func); diff --git a/src/fuse_readdir_cosr.cpp b/src/fuse_readdir_cosr.cpp index 6be0a97e3..83c2394c0 100644 --- a/src/fuse_readdir_cosr.cpp +++ b/src/fuse_readdir_cosr.cpp @@ -52,6 +52,7 @@ namespace l { DIR *dir; int err; + std::string basepath; }; struct Error @@ -119,6 +120,7 @@ namespace l errno = 0; rv.dir = fs::opendir(basepath); rv.err = errno; + rv.basepath = branch.path; return rv; }; @@ -169,7 +171,8 @@ namespace l continue; fullpath = fs::path::make(dirname_,de->d_name); - de->d_ino = fs::inode::calc(fullpath, + de->d_ino = fs::inode::calc(dirrv.basepath, + fullpath, DTTOIF(de->d_type), dev, de->d_ino); diff --git a/src/fuse_readdir_seq.cpp b/src/fuse_readdir_seq.cpp index 9aa0c6f0c..d6568bfe1 100644 --- a/src/fuse_readdir_seq.cpp +++ b/src/fuse_readdir_seq.cpp @@ -125,7 +125,8 @@ namespace l continue; fullpath = fs::path::make(dirname_,de->d_name); - de->d_ino = fs::inode::calc(fullpath, + de->d_ino = fs::inode::calc(branch.path, + fullpath, DTTOIF(de->d_type), dev, de->d_ino); diff --git a/src/fuse_symlink.cpp b/src/fuse_symlink.cpp index fcc9ef36f..a8ed41e29 100644 --- a/src/fuse_symlink.cpp +++ b/src/fuse_symlink.cpp @@ -74,7 +74,7 @@ namespace l { fs::lstat(fullnewpath,st_); if(st_->st_ino != 0) - fs::inode::calc(linkpath_,st_); + fs::inode::calc(newbasepath_,linkpath_,st_); } return error::calc(rv,error_,errno);