wq

thustorage · Feb 16, 2022 · c5ee9d8 · c5ee9d8
commit c5ee9d8
Show file tree

Hide file tree

Showing 59 changed files with 7,903 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+build/
+build/*
+.vscode/
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 2.8.5 FATAL_ERROR)
+project(Sherman)
+
+# set (CMAKE_C_COMPILER "/usr/local/openmpi/bin/mpicxx")
+# set (CMAKE_CXX_COMPILER ${CMAKE_C_COMPILER})
+
+#Compiler options
+# set(CMAKE_C_FLAGS "-Wall -Wno-deprecated-declarations -Wsign-compare -DNDEBUG -g")
+set(CMAKE_C_FLAGS "-Wall -Wno-deprecated-declarations -Wsign-compare -O3 -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -std=c++11 ")
+
+# Link Options
+set(LINKS_FLAGS "-lcityhash -lboost_system -lboost_coroutine -lpthread -libverbs -lmemcached")
+
+# option (SANITIZE "Turn on sanitization" ON)
+# if (SANITIZE)
+    # set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+# endif()
+
+set(INCLUDE_BASE ${PROJECT_SOURCE_DIR}/include)
+include_directories(${INCLUDE_BASE})
+
+#Source file define
+set(COMMON_SRC ${PROJECT_SOURCE_DIR}/src)
+
+#Used by both server and clients
+file(GLOB_RECURSE COMMON_FILE ${COMMON_SRC}/*.cpp)
+add_library(sherman STATIC ${COMMON_FILE})
+link_libraries(sherman)
+
+# test
+
+file(GLOB TEST_SRC ${PROJECT_SOURCE_DIR}/test/*.cpp)
+foreach (TEST ${TEST_SRC})
+    get_filename_component(TEST_NAME ${TEST} NAME_WE)
+    add_executable(${TEST_NAME} ${TEST})
+    target_link_libraries(${TEST_NAME} ${LINKS_FLAGS})
+endforeach()
+
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+# Sherman: A Write-Optimized Distributed B+Tree Index on Disaggregated Memory 
+
+Sherman is a B+Tree on disaggregated memory; it uses one-sided RDMA verbs to perform all index operations.
+Sherman includes three techniques to boost write performance:
+
+- A hierarchical locks leveraging on-chip memory of RDMA NICs.
+- Coalescing dependent RDMA commands 
+- Two-level version layout in leaf nodes
+
+For more details, please refer to our [paper](https://arxiv.org/abs/2112.07320):
+
+[**SIGMOG'22**] Sherman: A Write-Optimized Distributed B+Tree Index on Disaggregated Memory. Qing Wang and Youyou Lu and Jiwu Shu.
+
+
+## System Requirements
+
+1. Mellanox ConnectX-5 NICs and above
+2. RDMA Driver: MLNX_OFED_LINUX-4.7-3.2.9.0 (If you use MLNX_OFED_LINUX-5**, you should modify codes to resolve interface incompatibility)
+3. NIC Firmware: version 16.26.4012 and above (to support on-chip memory, you can use `ibstat` to obtain the version)
+4. memcached (to exchange QP information)
+5. cityhash
+6. boost 1.53 (to support `boost::coroutines::symmetric_coroutine`)
+
+
+## Getting Started
+
+- `cd Sherman`
+- `./script/hugepage.sh` to request huge pages from OS (use `./script/clear_hugepage.sh` to return huge pages)
+- `mkdir build; cd build; cmake ..; make -j`
+- `cp ../script/restartMemc.sh .`
+- configure `../memcached.conf`, where the 1st line is memcached IP, the 2nd is memcached port
+
+For each run:
+- `./restartMemc.sh` (to initialize memcached server)
+- In each server, execute `./benchmark kNodeCount kReadRatio kThreadCount`
+
+>  We emulate each server as one compute node and one memory node: In each server, as the compute node, 
+we launch `kReadRatio` client threads; as the memory node, we launch one memory thread.
+
+> In `./test/benchmark.cpp`, we can modify `kKeySpace` and `zipfan`, to generate different workloads.
+> In addition, we can open the macro `USE_CORO` to bind `kCoroCnt` coroutine on each client thread.
+
+## TODO
+- Re-write `delete` operations
diff --git a/include/AbstractMessageConnection.h b/include/AbstractMessageConnection.h
@@ -0,0 +1,53 @@
+#ifndef __ABSTRACTMESSAGECONNECTION_H__
+#define __ABSTRACTMESSAGECONNECTION_H__
+
+#include "Common.h"
+
+#define SIGNAL_BATCH 31
+
+class Message;
+
+// #messageNR send pool and #messageNR message pool
+class AbstractMessageConnection {
+
+  const static int kBatchCount = 4;
+
+protected:
+  ibv_qp *message; // ud or raw packet
+  uint16_t messageNR;
+
+  ibv_mr *messageMR;
+  void *messagePool;
+  uint32_t messageLkey;
+
+  uint16_t curMessage;
+
+  void *sendPool;
+  uint16_t curSend;
+
+  ibv_recv_wr *recvs[kBatchCount];
+  ibv_sge *recv_sgl[kBatchCount];
+  uint32_t subNR;
+
+  ibv_cq *send_cq;
+  uint64_t sendCounter;
+
+  uint16_t sendPadding; // ud: 0
+                        // rp: ?
+  uint16_t recvPadding; // ud: 40
+                        // rp: ?
+
+public:
+  AbstractMessageConnection(ibv_qp_type type, uint16_t sendPadding,
+                            uint16_t recvPadding, RdmaContext &ctx, ibv_cq *cq,
+                            uint32_t messageNR);
+
+  void initRecv();
+
+  char *getMessage();
+  char *getSendPool();
+
+  uint32_t getQPN() { return message->qp_num; }
+};
+
+#endif /* __ABSTRACTMESSAGECONNECTION_H__ */
diff --git a/include/Bitmap.h b/include/Bitmap.h
@@ -0,0 +1,54 @@
+#ifndef __BITMAP_H__
+#define __BITMAP_H__
+
+#include <cstdint>
+#include <cassert>
+#include <cstring>
+
+#include <cstdio>
+
+class BitMap {
+   private:
+    int n;
+    uint64_t *bits;
+
+   public:
+    BitMap(int n) : n(n) {
+
+        assert(n % 64 == 0);
+        bits = new uint64_t[n / 64];
+        memset(bits, 0, n / 8);
+    }
+
+    bool get(int pos) { return (bits[pos / 64] >> (pos % 64)) & 0x1; }
+
+    void set(int pos) {
+        uint64_t &v = bits[pos / 64];
+        v = v | (0x1ull << (pos % 64));
+    }
+
+    void clear(int pos) {
+        uint64_t &v = bits[pos / 64];
+        v = v & ~(0x1ull << (pos % 64));
+    }
+
+    int setZeroPos() {
+        for (int i = 0; i < n / 64; ++i) {
+            uint64_t &v = bits[i];
+            uint64_t b = ~v;
+            if (b) {
+                uint64_t pos = __builtin_ctzll(b);
+
+                v = v | (0x1ull << pos);
+
+                return i * 64 + pos;
+            }
+        }
+
+        assert(false);
+    }
+
+    ~BitMap() { delete[] bits; }
+};
+
+#endif /* __BITMAP_H__ */
diff --git a/include/Cache.h b/include/Cache.h
@@ -0,0 +1,18 @@
+#if !defined(_CACHE_H_)
+#define _CACHE_H_
+
+#include "Config.h"
+#include "HugePageAlloc.h"
+
+class Cache {
+
+public:
+  Cache(const CacheConfig &cache_config);
+
+  uint64_t data;
+  uint64_t size;
+
+private:
+};
+
+#endif // _CACHE_H_
diff --git a/include/CacheEntry.h b/include/CacheEntry.h
@@ -0,0 +1,64 @@
+#if !defined(_CACHE_ENTRY_H_)
+#define _CACHE_ENTRY_H_
+
+#include "Common.h"
+#include "Tree.h"
+
+struct CacheEntry {
+  Key from;
+  Key to; // [from, to]
+  mutable InternalPage *ptr;
+  // mutable std::atomic<InternalPage *> ptr;
+
+  // CacheEntry() = default;
+
+  // CacheEntry(const CacheEntry &p) {
+  //   from = p.from;
+  //   to = p.to;
+  //   ptr = p.ptr.load(std::memory_order_relaxed);
+  // }
+}
+ __attribute__((packed));
+
+static_assert(sizeof(CacheEntry) == 2 * sizeof(Key) + sizeof(uint64_t), "XXX");
+
+inline std::ostream &operator<<(std::ostream &os, const CacheEntry &obj) {
+  os << "[" << (int)obj.from << ", " << obj.to + 1 << ")";
+  return os;
+}
+
+inline static CacheEntry Decode(const char *val) { return *(CacheEntry *)val; }
+
+struct CacheEntryComparator {
+  typedef CacheEntry DecodedType;
+
+  static DecodedType decode_key(const char *b) { return Decode(b); }
+
+  int cmp(const DecodedType a_v, const DecodedType b_v) const {
+    if (a_v.to < b_v.to) {
+      return -1;
+    }
+
+    if (a_v.to > b_v.to) {
+      return +1;
+    }
+
+    if (a_v.from < b_v.from) {
+      return +1;
+    } else if (a_v.from > b_v.from) {
+      return -1;
+    } else {
+      return 0;
+    }
+  }
+
+  int operator()(const char *a, const char *b) const {
+    return cmp(Decode(a), Decode(b));
+  }
+
+  int operator()(const char *a, const DecodedType b) const {
+    return cmp(Decode(a), b);
+  }
+};
+
+#endif // _CACHE_ENTRY_H_