Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

genericize DataIdentity for container types #4591

Draft
wants to merge 15 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 112 additions & 54 deletions library/include/DataIdentity.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ distribution.
#include <variant>
#include <vector>
#include <variant>
#include <ranges>

#include "DataDefs.h"

Expand Down Expand Up @@ -113,10 +114,10 @@ namespace DFHack

class DFHACK_EXPORT container_identity : public constructed_identity {
type_identity *item;
enum_identity *ienum;
type_identity *ienum;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why this changed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

specifically because of associative containers, the identity key type of an associative container goes there

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I wonder if there is a way to enforce that this is an enum_identity for non-associative containers. Probably complicated.


public:
container_identity(size_t size, TAllocateFn alloc, type_identity *item, enum_identity *ienum = NULL)
container_identity(size_t size, TAllocateFn alloc, type_identity *item, type_identity *ienum = NULL)
: constructed_identity(size, alloc), item(item), ienum(ienum) {};

virtual identity_type type() { return IDTYPE_CONTAINER; }
Expand Down Expand Up @@ -190,6 +191,83 @@ namespace DFHack
virtual bool get_item(void *ptr, int idx) = 0;
virtual void set_item(void *ptr, int idx, bool val) = 0;
};

template <typename C>
concept isIndexedContainer = requires (C c, C::iterator i, C::size_type sz, C::value_type v) {
{ c[sz] } -> std::same_as<typename C::reference>;
{ c.size() } ->std::same_as<typename C::size_type>;
{ c.begin() } -> std::same_as<typename C::iterator>;
{ c.end() } -> std::same_as<typename C::iterator>;
{ c.erase(i) };
{ c.resize(sz) };
{ c.insert(i, v) };
lethosor marked this conversation as resolved.
Show resolved Hide resolved
};

template<isIndexedContainer C>
class generic_container_identity : public container_identity {
private:
using T = C::value_type;
public:
generic_container_identity() : container_identity(sizeof(C), &df::allocator_fn<C>, df::identity_traits<T>::get()) {};
// this is just going to be generic "container", doing something more interesting requires parsing typeid which is nonportable
std::string getFullName(type_identity* item) { return "container" + container_identity::getFullName(item); }
lethosor marked this conversation as resolved.
Show resolved Hide resolved
virtual bool resize(void* ptr, int size) { ((C*)ptr)->resize(size); return true; }
virtual bool erase(void* ptr, int size) { auto& ct = *(C*)ptr; ct.erase(ct.begin() + size); return true; }
virtual bool insert(void* ptr, int idx, void* item) { auto& ct = *(C*)ptr; ct.insert(ct.begin() + idx, *(T*)item); return true; }
protected:
virtual int item_count(void* ptr, CountMode cnt) { return (int)((C*)ptr)->size(); }
virtual void* item_pointer(type_identity* item, void* ptr, int idx) { return &(*(C*)ptr)[idx]; }
};

#undef realname

template <typename C>
concept isAssocContainer = requires (C c, C::iterator i, C::key_type k) {
{ c[k] } -> std::same_as<typename C::mapped_type &>;
{ c.size() } -> std::same_as<typename C::size_type>;
{ c.begin() } -> std::same_as<typename C::iterator>;
{ c.end() } -> std::same_as<typename C::iterator>;
};

template<isAssocContainer C>
class generic_assoc_container_identity : public container_identity {
private:
using KT = C::key_type;
using T = C::mapped_type;
public:
generic_assoc_container_identity() : container_identity(sizeof(C), &df::allocator_fn<C>, df::identity_traits<T>::get(), df::identity_traits<KT>::get()) {};
std::string getFullName(type_identity* item) { return "map" + container_identity::getFullName(item); }
virtual bool resize(void* ptr, int size) { return false; }
virtual bool erase(void* ptr, int size) { return false; }
virtual bool is_readonly() { return true; }
protected:
virtual int item_count(void* ptr, CountMode cnt) { return (int)((C*)ptr)->size(); }
virtual void* item_pointer(type_identity* item, void* ptr, int idx) { auto iter = (((C*)ptr)->begin()); while(idx--) iter++; return (void*)&(iter->second); }
};

template <typename C>
concept isSetContainer = requires (C c, C::iterator i, C::key_type k) {
requires std::same_as<typename C::key_type, typename C::value_type>;
{ c.contains(k) } -> std::same_as<bool>;
{ c.size() } -> std::same_as<typename C::size_type>;
{ c.begin() } -> std::same_as<typename C::iterator>;
{ c.end() } -> std::same_as<typename C::iterator>;
};

template<isSetContainer C>
class generic_set_container_identity : public container_identity {
private:
using KT = C::key_type;
public:
generic_set_container_identity() : container_identity(sizeof(C), &df::allocator_fn<C>, df::identity_traits<KT>::get()) {};
std::string getFullName(type_identity* item) { return "set" + container_identity::getFullName(item); }
virtual bool resize(void* ptr, int size) { return false; }
virtual bool erase(void* ptr, int size) { return false; }
virtual bool is_readonly() { return true; }
protected:
virtual int item_count(void* ptr, CountMode cnt) { return (int)((C*)ptr)->size(); }
virtual void* item_pointer(type_identity* item, void* ptr, int idx) { auto iter = (((C*)ptr)->begin()); while (idx--) iter++; return (void*)&(*iter); }
};
}

namespace df
Expand All @@ -201,6 +279,13 @@ namespace df
using DFHack::container_identity;
using DFHack::ptr_container_identity;
using DFHack::bit_container_identity;
using DFHack::generic_container_identity;
using DFHack::generic_assoc_container_identity;
using DFHack::generic_set_container_identity;

using DFHack::isIndexedContainer;
using DFHack::isAssocContainer;
using DFHack::isSetContainer;

class DFHACK_EXPORT number_identity_base : public primitive_identity {
const char *name;
Expand Down Expand Up @@ -577,6 +662,7 @@ namespace df
INTEGER_IDENTITY_TRAITS(unsigned long long);
FLOAT_IDENTITY_TRAITS(float);
FLOAT_IDENTITY_TRAITS(double);

OPAQUE_IDENTITY_TRAITS(std::condition_variable);
OPAQUE_IDENTITY_TRAITS(std::fstream);
OPAQUE_IDENTITY_TRAITS(std::mutex);
Expand Down Expand Up @@ -658,31 +744,13 @@ namespace df
static container_identity *get();
};

template<class T> struct identity_traits<std::vector<T> > {
static container_identity *get();
};
#endif

template<class T> struct identity_traits<std::vector<T*> > {
static stl_ptr_vector_identity *get();
};

#ifdef BUILD_DFHACK_LIB
template<class T> struct identity_traits<std::deque<T> > {
static container_identity *get();
};

template<class T> struct identity_traits<std::set<T> > {
static container_identity *get();
};

template<class KT, class T> struct identity_traits<std::map<KT, T>> {
static container_identity *get();
};

template<class KT, class T> struct identity_traits<std::unordered_map<KT, T>> {
static container_identity *get();
};

template<> struct identity_traits<BitArray<int> > {
static bit_array_identity identity;
Expand Down Expand Up @@ -712,6 +780,30 @@ namespace df
}
#endif

template<isIndexedContainer C>
struct DFHACK_EXPORT identity_traits<C> {
static type_identity* get() {
static generic_container_identity<C> identity{};
return &identity;
}
};

template<isAssocContainer C>
struct DFHACK_EXPORT identity_traits<C> {
static type_identity* get() {
static generic_assoc_container_identity<C> identity{};
return &identity;
}
};

template<isSetContainer C>
struct DFHACK_EXPORT identity_traits<C> {
static type_identity* get() {
static generic_set_container_identity<C> identity{};
return &identity;
}
};

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure these no longer need to be behind BUILD_DFHACK_LIB? My understanding is that had something to do with linking issues with plugins attempting to use these specializations, and it was deemed better/easier to not allow that at all, but I'm afraid I don't remember much more than that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm still exploring this - i think it has to do inconsistent typing of the get static method. there appears to be no reason why all of the get methods cannot return a type_identity* as the type is polymorphic anyhow

however, i'm still investigating this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue of duplicate implementations still exists, but in this case the static object being created is read-only so if there are two copies there should be no risk of harm. the problem that arises when there are multiple instances of the same specialization is when there is mutable data, but there's no mutable data in a type_identity

also, both gcc and msvc have improved their loading models since 2012 to reduce problems with duplicated implementations

there is a ton of stuff in our code that is there to work around compiler bugs and shortcomings, or even C++ language shortcomings, that were fixed, in some cases, over a decade ago

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue of duplicate implementations still exists, but in this case the static object being created is read-only so if there are two copies there should be no risk of harm.

Are we sure the address of that object isn't used as an identifier anywhere? I'm pretty sure at least something in the Lua layer passes around pointers to type_identity objects and compares them for equality.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least some places where the pointer to type_identity is used:

  • Several registry tables:
    • The table at &DFHACK_TYPEID_TABLE_TOKEN maps type_identity* pointers (as raw lightuserdata) to the corresponding DF type object (e.g. identity_traits<df::item>::get() maps to the lua df.item).
    • The table at &DFHACK_TYPETABLE_TOKEN is a bidirectional map of type_identity* to metatable
  • In DF object metatables:
    • The value at &DFHACK_IDENTITY_FIELD_TOKEN is the object's type_identity. This is the value with a "userdata" key visible in printall(debug.getmetatable(df.item)), for example.
      • This is used in is_type_compatible(), but that already returns false for all IDTYPE_CONTAINER identities anyway, so your change has no effect there.
      • It's also returned by get_object_identity(), but that doesn't seem to be used for identification anywhere (although the result is passed on to several calls that I didn't trace...). Mainly, the identity is used by calling methods that should return the same thing for duplicate copies of the identity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, unless I'm missing something else, I think duplicates might be ok. Especially for templated container types, for which several operations that use the type_identity aren't implemented to begin with.

template<class T>
inline pointer_identity *identity_traits<T *>::get() {
static pointer_identity identity(identity_traits<T>::get());
Expand All @@ -725,12 +817,6 @@ namespace df
return &identity;
}

template<class T>
inline container_identity *identity_traits<std::vector<T> >::get() {
typedef std::vector<T> container;
static stl_container_identity<container> identity("vector", identity_traits<T>::get());
return &identity;
}
#endif

template<class T>
Expand All @@ -740,34 +826,6 @@ namespace df
}

#ifdef BUILD_DFHACK_LIB
template<class T>
inline container_identity *identity_traits<std::deque<T> >::get() {
typedef std::deque<T> container;
static stl_container_identity<container> identity("deque", identity_traits<T>::get());
return &identity;
}

template<class T>
inline container_identity *identity_traits<std::set<T> >::get() {
typedef std::set<T> container;
static ro_stl_container_identity<container> identity("set", identity_traits<T>::get());
return &identity;
}

template<class KT, class T>
inline container_identity *identity_traits<std::map<KT, T>>::get() {
typedef std::map<KT, T> container;
static ro_stl_assoc_container_identity<container> identity("map", identity_traits<KT>::get(), identity_traits<T>::get());
return &identity;
}

template<class KT, class T>
inline container_identity *identity_traits<std::unordered_map<KT, T>>::get() {
typedef std::unordered_map<KT, T> container;
static ro_stl_assoc_container_identity<container> identity("unordered_map", identity_traits<KT>::get(), identity_traits<T>::get());
return &identity;
}

template<class T>
inline bit_container_identity *identity_traits<BitArray<T> >::get() {
static bit_array_identity identity(identity_traits<T>::get());
Expand Down
2 changes: 1 addition & 1 deletion test/library/print.lua
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ function test.printall_recurse()
idx = validate_patterns(idx, {'^fn$', EQ, '^function: '})
elseif str:startswith('udatatable') then
idx = validate_patterns(idx,
{'^udatatable$', EQ, '^<vector<int32_t>%[6%]: ',
{'^udatatable$', EQ, '^<container<int32_t>%[6%]: ',
'%s+', '^0$', EQ, '^10$',
'%s+', '^1$', EQ, '^20$',
'Repeated 2 times',
Expand Down