Skip to content

Commit

Permalink
Add new basepath-hash inode-generation algorithm
Browse files Browse the repository at this point in the history
A filesystem's device id (st_dev) may change on reboot (eg with zfs). Instead, we can use the files base path (+underlying inode) to generate an inode, which will remain constant across reboots. However, this may have unexpected effects if multiple unique devices appear under a base path.

Like hybrid_hash, basehybrid_hash/32 hashes relative path for dirs and basepath_hash for files

Original patch by thrnz@github
  • Loading branch information
PhracturedBlue committed Apr 5, 2024
1 parent 42d0b57 commit d0d265a
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 43 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ These options are the same regardless of whether you use them with the
the file. An attempt to move the file to that branch will occur
(keeping all metadata possible) and if successful the original is
unlinked and the write retried. (default: false, true = mfs)
* **inodecalc=passthrough|path-hash|devino-hash|hybrid-hash**: Selects
the inode calculation algorithm. (default: hybrid-hash)
* **inodecalc=passthrough|path-hash|devino-hash|basepath-hash|hybrid-hash|basehybrid-hash**:
Selects the inode calculation algorithm. (default: hybrid-hash)
* **dropcacheonclose=BOOL**: When a file is requested to be closed
call `posix_fadvise` on it first to instruct the kernel that we no
longer need the data and it can drop its cache. Recommended when
Expand Down Expand Up @@ -444,12 +444,23 @@ covering different usecases.
different file or files move out of band but will present the same
inode for underlying files that do too.
* devino-hash32: 32bit version of devino-hash.
* basepath-hash: Hashes the branch base path along with
the inode of the underlying entry. This has a similar purpose to
devino-hash, but by using the path instead of the device-id, the inodes
will be guaranteed to be stable across reboots. Useful for backup or
deduplication systems that rely on a static inode. Note that if the
root directory is below the mountpoint of the underlying storage,
duplicate inodes are possible.
* basepath-hash32: 32bit version of basepath-hash.
* hybrid-hash: Performs `path-hash` on directories and `devino-hash`
on other file types. Since directories can't have hard links the
static value won't make a difference and the files will get values
useful for finding duplicates. Probably the best to use if not using
NFS. As such it is the default.
* hybrid-hash32: 32bit version of hybrid-hash.
* basehybrid-hash: Serves the same purpose as `hybrid-hash` but using
the `basepath-hash` algorithm for files.
* basehybrid-hash32: 32bit version of basehybrid-hash

32bit versions are provided as there is some software which does not
handle 64bit inodes well.
Expand Down
3 changes: 3 additions & 0 deletions src/fileinfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,19 @@ class FileInfo : public FH
{
public:
FileInfo(int const fd_,
const std::string &basepath_,
char const *fusepath_,
bool const direct_io_)
: FH(fusepath_),
fd(fd_),
basepath(basepath_),
direct_io(direct_io_)
{
}

public:
int fd;
const std::string basepath;
uint32_t direct_io:1;
std::mutex mutex;
};
150 changes: 125 additions & 25 deletions src/fs_inode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "ef.hpp"
#include "errno.hpp"
#include "fmt/core.h"
#include "fs_inode.hpp"
#include "wyhash.h"

Expand All @@ -28,9 +29,9 @@
#include <string.h>
#include <sys/stat.h>

typedef uint64_t (*inodefunc_t)(const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
typedef uint64_t (*inodefunc_t)(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t);

static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,const ino_t);
static uint64_t hybrid_hash(const std::string&,const char*,const uint64_t,const mode_t,const dev_t,const ino_t);

static inodefunc_t g_func = hybrid_hash;

Expand All @@ -44,7 +45,8 @@ h64_to_h32(uint64_t h_)

static
uint64_t
passthrough(const char *fusepath_,
passthrough(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
Expand All @@ -55,7 +57,8 @@ passthrough(const char *fusepath_,

static
uint64_t
path_hash(const char *fusepath_,
path_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
Expand All @@ -69,15 +72,17 @@ path_hash(const char *fusepath_,

static
uint64_t
path_hash32(const char *fusepath_,
path_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
uint64_t h;

h = path_hash(fusepath_,
h = path_hash(basepath_,
fusepath_,
fusepath_len_,
mode_,
dev_,
Expand All @@ -88,7 +93,8 @@ path_hash32(const char *fusepath_,

static
uint64_t
devino_hash(const char *fusepath_,
devino_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
Expand All @@ -107,15 +113,17 @@ devino_hash(const char *fusepath_,

static
uint64_t
devino_hash32(const char *fusepath_,
devino_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
uint64_t h;

h = devino_hash(fusepath_,
h = devino_hash(basepath_,
fusepath_,
fusepath_len_,
mode_,
dev_,
Expand All @@ -126,28 +134,97 @@ devino_hash32(const char *fusepath_,

static
uint64_t
hybrid_hash(const char *fusepath_,
basepath_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{

std::string buf = fmt::format("{}{}",ino_,basepath_);

return wyhash(buf.c_str(),
buf.length(),
fs::inode::MAGIC,
_wyp);
}

static
uint64_t
basepath_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
uint64_t h;

h = basepath_hash(basepath_,
fusepath_,
fusepath_len_,
mode_,
dev_,
ino_);

return h64_to_h32(h);
}

static
uint64_t
hybrid_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}

static
uint64_t
hybrid_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}

static
uint64_t
basehybrid_hash(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash(fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_));
path_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
basepath_hash(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}

static
uint64_t
hybrid_hash32(const char *fusepath_,
basehybrid_hash32(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return (S_ISDIR(mode_) ?
path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) :
devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_));
path_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_) :
basepath_hash32(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_));
}

namespace fs
Expand All @@ -171,6 +248,14 @@ namespace fs
g_func = hybrid_hash;
ef(algo_ == "hybrid-hash32")
g_func = hybrid_hash32;
ef(algo_ == "basepath-hash")
g_func = basepath_hash;
ef(algo_ == "basepath-hash32")
g_func = basepath_hash32;
ef(algo_ == "basehybrid-hash")
g_func = basehybrid_hash;
ef(algo_ == "basehybrid-hash32")
g_func = basehybrid_hash32;
else
return -EINVAL;

Expand All @@ -194,57 +279,72 @@ namespace fs
return "hybrid-hash";
if(g_func == hybrid_hash32)
return "hybrid-hash32";
if(g_func == basepath_hash)
return "basepath-hash";
if(g_func == basepath_hash)
return "basepath-hash32";
if(g_func == basehybrid_hash)
return "basehybrid-hash";
if(g_func == basehybrid_hash)
return "basehybrid-hash32";

return std::string();
}

uint64_t
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return g_func(fusepath_,fusepath_len_,mode_,dev_,ino_);
return g_func(basepath_,fusepath_,fusepath_len_,mode_,dev_,ino_);
}

uint64_t
calc(std::string const &fusepath_,
calc(const std::string &basepath_,
std::string const &fusepath_,
const mode_t mode_,
const dev_t dev_,
const ino_t ino_)
{
return calc(fusepath_.c_str(),
return calc(basepath_,
fusepath_.c_str(),
fusepath_.size(),
mode_,
dev_,
ino_);
}

void
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
const uint64_t fusepath_len_,
struct stat *st_)
{
st_->st_ino = calc(fusepath_,
st_->st_ino = calc(basepath_,
fusepath_,
fusepath_len_,
st_->st_mode,
st_->st_dev,
st_->st_ino);
}

void
calc(const char *fusepath_,
calc(const std::string &basepath_,
const char *fusepath_,
struct stat *st_)
{
calc(fusepath_,strlen(fusepath_),st_);
calc(basepath_,fusepath_,strlen(fusepath_),st_);
}

void
calc(const std::string &fusepath_,
calc(const std::string &basepath_,
const std::string &fusepath_,
struct stat *st_)
{
calc(fusepath_.c_str(),fusepath_.size(),st_);
calc(basepath_,fusepath_.c_str(),fusepath_.size(),st_);
}
}
}
15 changes: 10 additions & 5 deletions src/fs_inode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,26 @@ namespace fs
int set_algo(const std::string &s);
std::string get_algo(void);

uint64_t calc(const char *fusepath,
uint64_t calc(const std::string &basepath,
const char *fusepath,
const uint64_t fusepath_len,
const mode_t mode,
const dev_t dev,
const ino_t ino);
uint64_t calc(std::string const &fusepath,
uint64_t calc(const std::string &basepath,
std::string const &fusepath,
mode_t const mode,
dev_t const dev,
ino_t ino);
void calc(const char *fusepath,
void calc(const std::string &basepath,
const char *fusepath,
const uint64_t fusepath_len,
struct stat *st);
void calc(const char *fusepath,
void calc(const std::string &basepath,
const char *fusepath,
struct stat *st);
void calc(const std::string &fusepath,
void calc(const std::string &basepath,
const std::string &fusepath,
struct stat *st);

}
Expand Down
2 changes: 1 addition & 1 deletion src/fuse_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ namespace l
if(rv == -1)
return -errno;

fi = new FileInfo(rv,fusepath_,ffi_->direct_io);
fi = new FileInfo(rv,createpath_,fusepath_,ffi_->direct_io);

ffi_->fh = reinterpret_cast<uint64_t>(fi);

Expand Down
Loading

0 comments on commit d0d265a

Please sign in to comment.