forked from triblerteam/libswift
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hashtree.h
288 lines (229 loc) · 10.3 KB
/
hashtree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/*
* hashtree.h
* hashing, Merkle hash trees and data integrity
*
* Created by Victor Grishchenko on 3/6/09.
* Copyright 2009-2012 TECHNISCHE UNIVERSITEIT DELFT. All rights reserved.
*
*/
#ifndef SWIFT_SHA1_HASH_TREE_H
#define SWIFT_SHA1_HASH_TREE_H
#include <string.h>
#include <string>
#include "bin.h"
#include "binmap.h"
#include "operational.h"
namespace swift {
#define HASHSZ 20
/** SHA-1 hash, 20 bytes of data */
struct Sha1Hash {
uint8_t bits[HASHSZ];
Sha1Hash() { memset(bits,0,HASHSZ); }
/** Make a hash of two hashes (for building Merkle hash trees). */
Sha1Hash(const Sha1Hash& left, const Sha1Hash& right);
/** Hash an old plain string. */
Sha1Hash(const char* str, size_t length=-1);
Sha1Hash(const uint8_t* data, size_t length);
/** Either parse hash from hex representation of read in raw format. */
Sha1Hash(bool hex, const char* hash);
std::string hex() const;
bool operator == (const Sha1Hash& b) const
{ return 0==memcmp(bits,b.bits,SIZE); }
bool operator != (const Sha1Hash& b) const { return !(*this==b); }
const char* operator * () const { return (char*) bits; }
const static Sha1Hash ZERO;
const static size_t SIZE = HASHSZ;
};
// Arno: The chunk size parameter can now be configured via the constructor,
// for values up to 8192. Above that you'll have to edit the
// SWIFT_MAX_SEND_DGRAM_SIZE in swift.h
//
#define SWIFT_DEFAULT_CHUNK_SIZE 1024
class Storage;
/** This class controls data integrity of some file; hash tree is put to
an auxilliary file next to it. The hash tree file is mmap'd for
performance reasons. Actually, I'd like the data file itself to be
mmap'd, but 32-bit platforms do not allow that for bigger files.
There are two variants of the general workflow: either a MmapHashTree
is initialized with a root hash and the rest of hashes and data is
spoon-fed later, OR a MmapHashTree is initialized with a data file, so
the hash tree is derived, including the root hash.
*/
class HashTree : public Operational {
public:
HashTree() : Operational() {}
/** Offer a hash; returns true if it verified; false otherwise.
Once it cannot be verified (no sibling or parent), the hash
is remembered, while returning false. */
virtual bool OfferHash (bin_t pos, const Sha1Hash& hash) = 0;
/** Offer data; the behavior is the same as with a hash:
accept or remember or drop. Returns true => ACK is sent. */
virtual bool OfferData (bin_t bin, const char* data, size_t length) = 0;
/** Returns the number of peaks (read on peak hashes). */
virtual int peak_count () const = 0;
/** Returns the i-th peak's bin number. */
virtual bin_t peak (int i) const = 0;
/** Returns peak hash #i. */
virtual const Sha1Hash& peak_hash (int i) const = 0;
/** Return the peak bin the given bin belongs to. */
virtual bin_t peak_for (bin_t pos) const = 0;;
/** Return a (Merkle) hash for the given bin. */
virtual const Sha1Hash& hash (bin_t pos) const = 0;
/** Give the root hash, which is effectively an identifier of this file. */
virtual const Sha1Hash& root_hash () const = 0;
/** Get file size, in bytes. */
virtual uint64_t size () const = 0;
/** Get file size in chunks (in kilobytes, rounded up). */
virtual uint64_t size_in_chunks () const = 0;
/** Number of bytes retrieved and checked. */
virtual uint64_t complete () const = 0;
/** Number of chunks retrieved and checked. */
virtual uint64_t chunks_complete () const = 0;
/** The number of bytes completed sequentially, i.e. from the beginning of
the file (or offset), uninterrupted. */
virtual uint64_t seq_complete(int64_t offset) = 0;// SEEK
/** Whether the file is complete. */
virtual bool is_complete () = 0;
/** The binmap of complete chunks. */
virtual binmap_t * ack_out() = 0;
virtual uint32_t chunk_size() = 0; // CHUNKSIZE
//NETWVSHASH
virtual bool get_check_netwvshash() = 0;
// for transfertest.cpp
virtual Storage * get_storage() = 0;
virtual void set_size(uint64_t size) = 0;
virtual int TESTGetFD() = 0;
virtual ~HashTree() {};
};
/** This class implements the HashTree interface via a memory mapped file. */
class MmapHashTree : public HashTree, Serializable {
/** Merkle hash tree: root */
Sha1Hash root_hash_;
Sha1Hash *hashes_;
/** Merkle hash tree: peak hashes */
Sha1Hash peak_hashes_[64];
bin_t peaks_[64];
int peak_count_;
/** File descriptor to put hashes to */
int hash_fd_;
std::string hash_filename_;
std::string filename_; // for easy serialization
/** Base size, as derived from the hashes. */
uint64_t size_;
uint64_t sizec_;
/** Part of the tree currently checked. */
uint64_t complete_;
uint64_t completec_;
/** Binmap of own chunk availability */
binmap_t ack_out_;
// CHUNKSIZE
/** Arno: configurable fixed chunk size in bytes */
uint32_t chunk_size_;
// LESSHASH
binmap_t is_hash_verified_; // binmap being abused as bitmap, only layer 0 used
// FAXME: make is_hash_verified_ part of persistent state?
//MULTIFILE
Storage * storage_;
int internal_deserialize(FILE *fp,bool contentavail=true);
//NETWVSHASH
bool check_netwvshash_;
protected:
int OpenHashFile();
void Submit();
void RecoverProgress();
bool RecoverPeakHashes();
Sha1Hash DeriveRoot();
bool OfferPeakHash (bin_t pos, const Sha1Hash& hash);
public:
MmapHashTree (Storage *storage, const Sha1Hash& root=Sha1Hash::ZERO, uint32_t chunk_size=SWIFT_DEFAULT_CHUNK_SIZE,
std::string hash_filename=NULL, bool force_check_diskvshash=true, bool check_netwvshash=true, std::string binmap_filename=NULL);
// Arno, 2012-01-03: Hack to quickly learn root hash from a checkpoint
MmapHashTree (bool dummy, std::string binmap_filename);
bool OfferHash (bin_t pos, const Sha1Hash& hash);
bool OfferData (bin_t bin, const char* data, size_t length);
/** For live streaming. Not implemented yet. */
int AppendData (char* data, int length) ;
int peak_count () const { return peak_count_; }
bin_t peak (int i) const { return peaks_[i]; }
const Sha1Hash& peak_hash (int i) const { return peak_hashes_[i]; }
bin_t peak_for (bin_t pos) const;
const Sha1Hash& hash (bin_t pos) const {return hashes_[pos.toUInt()];}
const Sha1Hash& root_hash () const { return root_hash_; }
uint64_t size () const { return size_; }
uint64_t size_in_chunks () const { return sizec_; }
uint64_t complete () const { return complete_; }
uint64_t chunks_complete () const { return completec_; }
uint64_t seq_complete(int64_t offset); // SEEK
bool is_complete () { return size_ && complete_==size_; }
binmap_t * ack_out () { return &ack_out_; }
uint32_t chunk_size() { return chunk_size_; } // CHUNKSIZE
~MmapHashTree ();
// for transfertest.cpp
Storage * get_storage() { return storage_; }
void set_size(uint64_t size) { size_ = size; }
// Arno: persistent storage for state other than hashes (which are in .mhash)
int serialize(FILE *fp);
int deserialize(FILE *fp);
int partial_deserialize(FILE *fp);
//NETWVSHASH
bool get_check_netwvshash() { return check_netwvshash_; }
int TESTGetFD() { return hash_fd_; }
};
/** This class implements the HashTree interface by reading directly from disk */
class ZeroHashTree : public HashTree {
/** Merkle hash tree: root */
Sha1Hash root_hash_;
/** Merkle hash tree: peak hashes */
//Sha1Hash peak_hashes_[64]; // now read from disk live too
bin_t peaks_[64];
int peak_count_;
/** File descriptor to put hashes to */
int hash_fd_;
/** Base size, as derived from the hashes. */
uint64_t size_;
uint64_t sizec_;
/** Part of the tree currently checked. */
uint64_t complete_;
uint64_t completec_;
// CHUNKSIZE
/** Arno: configurable fixed chunk size in bytes */
uint32_t chunk_size_;
//MULTIFILE
Storage * storage_;
protected:
bool RecoverPeakHashes();
Sha1Hash DeriveRoot();
bool OfferPeakHash (bin_t pos, const Sha1Hash& hash);
public:
ZeroHashTree (Storage *storage, const Sha1Hash& root=Sha1Hash::ZERO, uint32_t chunk_size=SWIFT_DEFAULT_CHUNK_SIZE,
std::string hash_filename=NULL, std::string binmap_filename=NULL);
// Arno, 2012-01-03: Hack to quickly learn root hash from a checkpoint
ZeroHashTree (bool dummy, std::string binmap_filename);
bool OfferHash (bin_t pos, const Sha1Hash& hash);
bool OfferData (bin_t bin, const char* data, size_t length);
/** For live streaming. Not implemented yet. */
int AppendData (char* data, int length) ;
int peak_count () const { return peak_count_; }
bin_t peak (int i) const { return peaks_[i]; }
const Sha1Hash& peak_hash (int i) const;
bin_t peak_for (bin_t pos) const;
const Sha1Hash& hash (bin_t pos) const;
const Sha1Hash& root_hash () const { return root_hash_; }
uint64_t size () const { return size_; }
uint64_t size_in_chunks () const { return sizec_; }
uint64_t complete () const { return complete_; }
uint64_t chunks_complete () const { return completec_; }
uint64_t seq_complete(int64_t offset); // SEEK
bool is_complete () { return size_ && complete_==size_; }
binmap_t * ack_out () { return NULL; }
uint32_t chunk_size() { return chunk_size_; } // CHUNKSIZE
~ZeroHashTree ();
// for transfertest.cpp
Storage * get_storage() { return storage_; }
void set_size(uint64_t size) { size_ = size; }
//NETWVSHASH
bool get_check_netwvshash() { return true; }
int TESTGetFD() { return hash_fd_; }
};
}
#endif