MFEM  v4.6.0
Finite element discretization library
hash.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_HASH
13 #define MFEM_HASH
14 
15 #include "../config/config.hpp"
16 #include "array.hpp"
17 #include "globals.hpp"
18 #include <type_traits>
19 #include <cstdint>
20 
21 namespace mfem
22 {
23 
24 /** A concept for items that should be used in HashTable and be accessible by
25  * hashing two IDs.
26  */
27 struct Hashed2
28 {
29  int p1, p2;
30  int next;
31 };
32 
33 /** A concept for items that should be used in HashTable and be accessible by
34  * hashing four IDs.
35  */
36 struct Hashed4
37 {
38  int p1, p2, p3; // NOTE: p4 is neither hashed nor stored
39  int next;
40 };
41 
42 
43 /** HashTable is a container for items that require associative access through
44  * pairs (or quadruples) of indices:
45  *
46  * (p1, p2) -> item
47  * (p1, p2, p3, p4) -> item
48  *
49  * An example of this are edges and faces in a mesh. Each edge is uniquely
50  * identified by two parent vertices and so can be easily accessed from
51  * different elements using this class. Similarly for faces.
52  *
53  * The order of the p1, p2, ... indices is not relevant as they are sorted
54  * each time this class is invoked.
55  *
56  * There are two main methods this class provides. The Get(...) methods always
57  * return an item given the two or four indices. If the item didn't previously
58  * exist, the methods creates a new one. The Find(...) methods, on the other
59  * hand, just return NULL or -1 if the item doesn't exist.
60  *
61  * Each new item is automatically assigned a unique ID - the index of the item
62  * inside the BlockArray. The IDs may (but need not) be used as p1, p2, ... of
63  * other items.
64  *
65  * The item type (T) needs to follow either the Hashed2 or the Hashed4
66  * concept. It is easiest to just inherit from these structs.
67  *
68  * All items in the container can also be accessed sequentially using the
69  * provided iterator.
70  *
71  * Notes:
72  * The data structure and implementation is based on a BlockArray<T> which
73  * provides an efficient item storage that avoids heap fragmentation, and
74  * index-based item access. The hash table implemented on top of the
75  * BlockArray provides fast associative (key -> value) access by grouping
76  * items into bins (buckets) of O(1) size.
77  * - "id" denotes the index of an item in the underlying BlockArray<T>,
78  * - "idx" denotes the index of a bin, determined by hashing a key with
79  * the function `Hash`.
80  */
81 template<typename T>
82 class HashTable : public BlockArray<T>
83 {
84 protected:
86 
87 public:
88  /** @brief Main constructor of the HashTable class.
89 
90  @param[in] block_size The size of the storage blocks of the underlying
91  BlockArray<T>.
92  @param[in] init_hash_size The initial size of the hash table. Must be
93  a power of 2. */
94  HashTable(int block_size = 16*1024, int init_hash_size = 32*1024);
95  /// @brief Deep copy
96  HashTable(const HashTable& other);
97  /// @brief Copy assignment not supported
98  HashTable& operator=(const HashTable&) = delete;
99  ~HashTable();
100 
101  /** @brief Item accessor with key (or parents) the pair 'p1', 'p2'. Default
102  construct an item of type T if no value correspond to the requested key.
103 
104  @param[in] p1 First part of the key.
105  @param[in] p2 Second part of the key.
106  @return The index "id" of the key in the BlockArray<T>.
107 
108  @warning This method should only be called if T inherits from Hashed2. */
109  T* Get(int p1, int p2);
110 
111  /** @brief Item accessor with key (or parents) the quadruplet 'p1', 'p2',
112  'p3', 'p4'. The key 'p4' is optional. Default construct an item of type T
113  if no value corresponds to the requested key.
114 
115  @param[in] p1 First part of the key.
116  @param[in] p2 Second part of the key.
117  @param[in] p3 Third part of the key.
118  @param[in] p4 Fourth part of the key (optional).
119  @return The index "id" of the key in the BlockArray<T>.
120 
121  @warning This method should only be called if T inherits from Hashed4. */
122  T* Get(int p1, int p2, int p3, int p4 = -1 /* p4 optional */);
123 
124  /// Get id of item whose parents are p1, p2... Create it if it doesn't exist.
125  /** @brief Get the "id" of an item, this "id" corresponding to the index of the
126  item in the underlying BlockArray<T> object. Default construct an item
127  and id if no value corresponds to the requested key.
128 
129  @param[in] p1 First part of the key.
130  @param[in] p2 Second part of the key.
131  @return The index "id" of the key in the BlockArray<T>.
132 
133  @warning This method should only be called if T inherits from Hashed2. */
134  int GetId(int p1, int p2);
135 
136  /** @brief Get the "id" of an item, this "id" corresponding to the index of the
137  item in the underlying BlockArray<T> object. Default construct an item
138  and id if no value correspond to the requested key.
139 
140  @param[in] p1 First part of the key.
141  @param[in] p2 Second part of the key.
142  @param[in] p3 Third part of the key.
143  @param[in] p4 Fourth part of the key (optional).
144  @return The index "id" of the key in the BlockArray<T>.
145 
146  @warning This method should only be called if T inherits from Hashed4. */
147  int GetId(int p1, int p2, int p3, int p4 = -1);
148 
149  /// Find item whose parents are p1, p2... Return NULL if it doesn't exist.
150  /** @brief Item accessor with key (or parents) the pair 'p1', 'p2'. Return
151  nullptr if no value correspond to the requested key.
152 
153  @param[in] p1 First part of the key.
154  @param[in] p2 Second part of the key.
155  @return The item associated to the key (p1,p2).
156 
157  @warning This method should only be called if T inherits from Hashed2. */
158  T* Find(int p1, int p2);
159 
160  /** @brief Item accessor with key (or parents) the quadruplet 'p1', 'p2',
161  'p3', 'p4'. The key 'p4' is optional. Return nullptr if no value
162  correspond to the requested key.
163 
164  @param[in] p1 First part of the key.
165  @param[in] p2 Second part of the key.
166  @param[in] p3 Third part of the key.
167  @param[in] p4 Fourth part of the key (optional).
168  @return The item associated to the key (p1,p2,p3,p4).
169 
170  @warning This method should only be called if T inherits from Hashed4. */
171  T* Find(int p1, int p2, int p3, int p4 = -1);
172 
173  /** @brief Item const accessor with key (or parents) the pair 'p1', 'p2'.
174  Return nullptr if no value correspond to the requested key.
175 
176  @param[in] p1 First part of the key.
177  @param[in] p2 Second part of the key.
178  @return The item associated to the key (p1,p2).
179 
180  @warning This method should only be called if T inherits from Hashed2. */
181  const T* Find(int p1, int p2) const;
182 
183  /** @brief Item const accessor with key (or parents) the quadruplet 'p1',
184  'p2', 'p3', 'p4'. The key 'p4' is optional. Return nullptr if no value
185  correspond to the requested key.
186 
187  @param[in] p1 First part of the key.
188  @param[in] p2 Second part of the key.
189  @param[in] p3 Third part of the key.
190  @param[in] p4 Fourth part of the key (optional).
191  @return The item associated to the key (p1,p2,p3,p4).
192 
193  @warning This method should only be called if T inherits from Hashed4. */
194  const T* Find(int p1, int p2, int p3, int p4 = -1) const;
195 
196  /// Find id of item whose parents are p1, p2... Return -1 if it doesn't exist.
197  /** @brief Find the "id" of an item, this "id" corresponding to the index of
198  the item in the underlying BlockArray<T> object. Default construct an
199  item and id if no value correspond to the requested key.
200 
201  @param[in] p1 First part of the key.
202  @param[in] p2 Second part of the key.
203  @return The index "id" of the key in the BlockArray<T>.
204 
205  @warning This method should only be called if T inherits from Hashed2. */
206  int FindId(int p1, int p2) const;
207 
208  /** @brief Find the "id" of an item, this "id" corresponding to the index of
209  the item in the underlying BlockArray<T> object. Default construct an
210  item and id if no value correspond to the requested key.
211 
212  @param[in] p1 First part of the key.
213  @param[in] p2 Second part of the key.
214  @param[in] p3 Third part of the key.
215  @param[in] p4 Fourth part of the key (optional).
216  @return The index "id" of the key in the BlockArray<T>.
217 
218  @warning This method should only be called if T inherits from Hashed4. */
219  int FindId(int p1, int p2, int p3, int p4 = -1) const;
220 
221  /// @brief Return the number of elements currently stored in the HashTable.
222  int Size() const { return Base::Size() - unused.Size(); }
223 
224  /// @brief Return the total number of ids (used and unused) in the HashTable.
225  int NumIds() const { return Base::Size(); }
226 
227  /// @brief Return the number of free/unused ids in the HashTable.
228  int NumFreeIds() const { return unused.Size(); }
229 
230  /** @brief Return true if item 'id' exists in (is used by) the container.
231 
232  @param[in] id Index of the item in the underlying BlockArray<T>.
233 
234  @warning It is assumed that 0 <= id < NumIds(). */
235  bool IdExists(int id) const { return (Base::At(id).next != -2); }
236 
237  /** @brief Remove an item from the hash table.
238 
239  @param[in] id Index of the item in the underlying BlockArray<T>.
240 
241  @warning Its id will be reused by newly added items. */
242  void Delete(int id);
243 
244  /// @brief Remove all items.
245  void DeleteAll();
246 
247  /** @brief Allocate an item at 'id'. Enlarge the underlying BlockArray if
248  necessary.
249 
250  @param[in] id Index of the item in the underlying BlockArray<T>.
251  @param[in] p1 First part of the key.
252  @param[in] p2 Second part of the key.
253 
254  @warning This is a special purpose method used when loading data from a
255  file. Does nothing if the slot 'id' has already been allocated. */
256  void Alloc(int id, int p1, int p2);
257 
258  /** @brief Reinitialize the internal list of unallocated items.
259 
260  @warning This is a special purpose method used when loading data from a file. */
261  void UpdateUnused();
262 
263  /** @brief Change the key associated with an item.
264 
265  In other words, makes an item hashed under different parent IDs.
266 
267  @param[in] id Index of the item in the underlying BlockArray<T>.
268  @param[in] new_p1 First part of the new key.
269  @param[in] new_p2 Second part of the new key.
270 
271  @warning This method should only be called if T inherits from Hashed2. */
272  void Reparent(int id, int new_p1, int new_p2);
273 
274  /** @brief Change the key associated with an item.
275 
276  In other words, makes an item hashed under different parent IDs.
277 
278  @param[in] id Index of the item in the underlying BlockArray<T>.
279  @param[in] new_p1 First part of the new key.
280  @param[in] new_p2 Second part of the new key.
281  @param[in] new_p3 Third part of the new key.
282  @param[in] new_p4 Fourth part of the new key (optional).
283 
284  @warning This method should only be called if T inherits from Hashed4. */
285  void Reparent(int id, int new_p1, int new_p2, int new_p3, int new_p4 = -1);
286 
287  /// @brief Return total size of allocated memory (tables plus items), in bytes.
288  std::size_t MemoryUsage() const;
289 
290  /// @brief Write details of the memory usage to the mfem output stream.
291  void PrintMemoryDetail() const;
292 
293  /// @brief Print a histogram of bin sizes for debugging purposes.
294  void PrintStats() const;
295 
296  class iterator : public Base::iterator
297  {
298  protected:
299  friend class HashTable;
300  typedef typename Base::iterator base;
301 
302  iterator() { }
303  iterator(const base &it) : base(it)
304  {
305  while (base::good() && (*this)->next == -2) { base::next(); }
306  }
307 
308  public:
310  {
311  while (base::next(), base::good() && (*this)->next == -2) { }
312  return *this;
313  }
314  };
315 
317  {
318  protected:
319  friend class HashTable;
320  typedef typename Base::const_iterator base;
321 
323  const_iterator(const base &it) : base(it)
324  {
325  while (base::good() && (*this)->next == -2) { base::next(); }
326  }
327 
328  public:
330  {
331  while (base::next(), base::good() && (*this)->next == -2) { }
332  return *this;
333  }
334  };
335 
336  iterator begin() { return iterator(Base::begin()); }
337  iterator end() { return iterator(); }
338 
339  const_iterator cbegin() const { return const_iterator(Base::cbegin()); }
340  const_iterator cend() const { return const_iterator(); }
341 
342 protected:
343  /** The hash table: each bin is a linked list of items. For each non-empty
344  bin, this arrays stores the 'id' of the first item in the list, or -1
345  if the bin is empty. */
346  int* table;
347 
348  /** mask = table_size-1. Used for fast modulo operation in Hash(), to wrap
349  the raw hashed index around the current table size (which must be a power
350  of two). */
351  int mask;
352 
353  /** List of deleted items in the BlockArray<T>. New items are created with
354  these ids first, before they are appended to the block array. */
356 
357  /** @brief hash function for Hashed2 items.
358 
359  @param[in] p1 First part of the key.
360  @param[in] p2 Second part of the key.
361  @return The hash key "idx" identifying a bin/bucket.
362 
363  NOTE: the constants are arbitrary
364  @warning This method should only be called if T inherits from Hashed2. */
365  inline int Hash(size_t p1, size_t p2) const
366  { return (984120265ul*p1 + 125965121ul*p2) & mask; }
367 
368  /** @brief hash function for Hashed4 items.
369 
370  @param[in] p1 First part of the key.
371  @param[in] p2 Second part of the key.
372  @param[in] p3 Third part of the key.
373  @return The hash key "idx" identifying a bin/bucket.
374 
375  NOTE: The constants are arbitrary.
376  NOTE: p4 is not hashed nor stored as p1, p2, p3 identify a face uniquely.
377  @warning This method should only be called if T inherits from Hashed4. */
378  inline int Hash(size_t p1, size_t p2, size_t p3) const
379  { return (984120265ul*p1 + 125965121ul*p2 + 495698413ul*p3) & mask; }
380 
381  // Delete() and Reparent() use one of these:
382  /// @brief Hash function for items of type T that inherit from Hashed2.
383  inline int Hash(const Hashed2& item) const
384  { return Hash(item.p1, item.p2); }
385 
386  /// @brief Hash function for items of type T that inherit from Hashed4.
387  inline int Hash(const Hashed4& item) const
388  { return Hash(item.p1, item.p2, item.p3); }
389 
390  /** @brief Search the index of the item associated to the key (p1,p2)
391  starting from the item with index @a id.
392 
393  @param[in] id Index of the item in the underlying BlockArray<T>.
394  @param[in] p1 First part of the key.
395  @param[in] p2 Second part of the key.
396  @return The index "id" of the key in the BlockArray<T>.
397 
398  @warning This method should only be called if T inherits from Hashed2. */
399  int SearchList(int id, int p1, int p2) const;
400 
401  /** @brief Search the index of the item associated to the key (p1,p2,p3,(p4))
402  starting from the item with index @a id.
403 
404  @param[in] id Index of the item in the underlying BlockArray<T>.
405  @param[in] p1 First part of the key.
406  @param[in] p2 Second part of the key.
407  @param[in] p3 Third part of the key.
408  @return The index "id" of the key in the BlockArray<T>.
409 
410  @warning This method should only be called if T inherits from Hashed4. */
411  int SearchList(int id, int p1, int p2, int p3) const;
412 
413  /** @brief Insert the item 'id' into bin 'idx'.
414 
415  @param[in] idx The bin/bucket index.
416  @param[in] id The index of the item in the BlockArray<T>.
417  @param[in] item The item to insert at the beginning of the linked list.
418 
419  @warning The method only works with bin 'idx' and does not check the
420  overall fill factor of the hash table. If appropriate,
421  use CheckRehash() for that. */
422  inline void Insert(int idx, int id, T &item);
423 
424  /** @brief Unlink an item @a id from the linked list of bin @a idx.
425 
426  @param[in] idx The bin/bucket index.
427  @param[in] id The index of the item in the BlockArray<T>.
428 
429  @warning The method aborts if the item is not found. */
430  void Unlink(int idx, int id);
431 
432  /** @brief Check table fill factor and resize if necessary.
433 
434  The method checks the average size of the bins (i.e., the fill factor).
435  If the fill factor is > 2, the table is enlarged (see DoRehash()). */
436  inline void CheckRehash();
437 
438  /** @brief Double the size of the hash table (i.e., double the number of bins)
439  and reinsert all items into the new bins.
440 
441  NOTE: Rehashing is computationally expensive (O(N) in the number of items),
442  but since it is only done rarely (when the number of items doubles),
443  the amortized complexity of inserting an item is still O(1). */
444  void DoRehash();
445 
446  /** @brief Return the size of the bin "idx".
447 
448  @param[in] idx The index of the bin.
449  @return The size of the bin. */
450  int BinSize(int idx) const;
451 };
452 
453 
454 /// Hash function for data sequences.
455 /** Depends on GnuTLS for SHA-256 hashing. */
457 {
458 protected:
459  void *hash_data;
460 
461  /// Add a sequence of bytes for hashing
462  void HashBuffer(const void *buffer, size_t num_bytes);
463 
464  /// Integer encoding method; result is independent of endianness and type
465  template <typename int_type_const_iter>
466  HashFunction &EncodeAndHashInts(int_type_const_iter begin,
467  int_type_const_iter end);
468 
469  /// Double encoding method: encode in little-endian byte-order
470  template <typename double_const_iter>
471  HashFunction &EncodeAndHashDoubles(double_const_iter begin,
472  double_const_iter end);
473 
474 public:
475  /// Default constructor: initialize the hash function
476  HashFunction();
477 
478  /// Destructor
479  ~HashFunction();
480 
481  /// Add a sequence of bytes for hashing
482  HashFunction &AppendBytes(const void *seq, size_t num_bytes)
483  { HashBuffer(seq, num_bytes); return *this; }
484 
485  /// Add a sequence of integers for hashing, given as a c-array.
486  /** Before hashing the sequence is encoded so that the result is independent
487  of endianness and type: int, long, unsigned, etc. */
488  template <typename int_type>
489  HashFunction &AppendInts(const int_type *ints, size_t num_ints)
490  { return EncodeAndHashInts(ints, ints + num_ints); }
491 
492  /// Add a sequence of integers for hashing, given as a fixed-size c-array.
493  /** Before hashing the sequence is encoded so that the result is independent
494  of endianness and type: int, long, unsigned, etc. */
495  template <typename int_type, size_t num_ints>
496  HashFunction &AppendInts(const int_type (&ints)[num_ints])
497  { return EncodeAndHashInts(ints, ints + num_ints); }
498 
499  /// Add a sequence of integers for hashing, given as a container.
500  /** Before hashing the sequence is encoded so that the result is independent
501  of endianness and type: int, long, unsigned, etc. */
502  template <typename int_type_container>
503  HashFunction &AppendInts(const int_type_container &ints)
504  { return EncodeAndHashInts(ints.begin(), ints.end()); }
505 
506  /// Add a sequence of doubles for hashing, given as a c-array.
507  /** Before hashing the sequence is encoded so that the result is independent
508  of endianness. */
509  HashFunction &AppendDoubles(const double *doubles, size_t num_doubles)
510  { return EncodeAndHashDoubles(doubles, doubles + num_doubles); }
511 
512  /// Add a sequence of doubles for hashing, given as a fixed-size c-array.
513  /** Before hashing the sequence is encoded so that the result is independent
514  of endianness. */
515  template <size_t num_doubles>
516  HashFunction &AppendDoubles(const double (&doubles)[num_doubles])
517  { return EncodeAndHashDoubles(doubles, doubles + num_doubles); }
518 
519  /// Add a sequence of doubles for hashing, given as a container.
520  /** Before hashing the sequence is encoded so that the result is independent
521  of endianness. */
522  template <typename double_container>
523  HashFunction &AppendDoubles(const double_container &doubles)
524  { return EncodeAndHashDoubles(doubles.begin(), doubles.end()); }
525 
526  /** @brief Return the hash string for the current sequence and reset (clear)
527  the sequence. */
528  std::string GetHash() const;
529 };
530 
531 
532 // implementation
533 
534 template<typename T>
535 HashTable<T>::HashTable(int block_size, int init_hash_size)
536  : Base(block_size)
537 {
538  mask = init_hash_size-1;
539  MFEM_VERIFY(!(init_hash_size & mask), "init_size must be a power of two.");
540 
541  table = new int[init_hash_size];
542  for (int i = 0; i < init_hash_size; i++)
543  {
544  table[i] = -1;
545  }
546 }
547 
548 template<typename T>
550  : Base(other), mask(other.mask)
551 {
552  int size = mask+1;
553  table = new int[size];
554  memcpy(table, other.table, size*sizeof(int));
555  other.unused.Copy(unused);
556 }
557 
558 template<typename T>
560 {
561  delete [] table;
562 }
563 
564 namespace internal
565 {
566 
567 inline void sort3(int &a, int &b, int &c)
568 {
569  if (a > b) { std::swap(a, b); }
570  if (a > c) { std::swap(a, c); }
571  if (b > c) { std::swap(b, c); }
572 }
573 
574 inline void sort4(int &a, int &b, int &c, int &d)
575 {
576  if (a > b) { std::swap(a, b); }
577  if (a > c) { std::swap(a, c); }
578  if (a > d) { std::swap(a, d); }
579  sort3(b, c, d);
580 }
581 
582 inline void sort4_ext(int &a, int &b, int &c, int &d)
583 {
584  if (d < 0) // support optional last index
585  {
586  sort3(a, b, c);
587  }
588  else
589  {
590  sort4(a, b, c, d);
591  }
592 }
593 
594 } // internal
595 
596 template<typename T>
597 inline T* HashTable<T>::Get(int p1, int p2)
598 {
599  return &(Base::At(GetId(p1, p2)));
600 }
601 
602 template<typename T>
603 inline T* HashTable<T>::Get(int p1, int p2, int p3, int p4)
604 {
605  return &(Base::At(GetId(p1, p2, p3, p4)));
606 }
607 
608 template<typename T>
609 int HashTable<T>::GetId(int p1, int p2)
610 {
611  // search for the item in the hashtable
612  if (p1 > p2) { std::swap(p1, p2); }
613  int idx = Hash(p1, p2);
614  int id = SearchList(table[idx], p1, p2);
615  if (id >= 0) { return id; }
616 
617  // not found - use an unused item or create a new one
618  int new_id;
619  if (unused.Size())
620  {
621  new_id = unused.Last();
622  unused.DeleteLast();
623  }
624  else
625  {
626  new_id = Base::Append();
627  }
628  T& item = Base::At(new_id);
629  item.p1 = p1;
630  item.p2 = p2;
631 
632  // insert into hashtable
633  Insert(idx, new_id, item);
634  CheckRehash();
635 
636  return new_id;
637 }
638 
639 template<typename T>
640 int HashTable<T>::GetId(int p1, int p2, int p3, int p4)
641 {
642  // search for the item in the hashtable
643  internal::sort4_ext(p1, p2, p3, p4);
644  int idx = Hash(p1, p2, p3);
645  int id = SearchList(table[idx], p1, p2, p3);
646  if (id >= 0) { return id; }
647 
648  // not found - use an unused item or create a new one
649  int new_id;
650  if (unused.Size())
651  {
652  new_id = unused.Last();
653  unused.DeleteLast();
654  }
655  else
656  {
657  new_id = Base::Append();
658  }
659  T& item = Base::At(new_id);
660  item.p1 = p1;
661  item.p2 = p2;
662  item.p3 = p3;
663 
664  // insert into hashtable
665  Insert(idx, new_id, item);
666  CheckRehash();
667 
668  return new_id;
669 }
670 
671 template<typename T>
672 inline T* HashTable<T>::Find(int p1, int p2)
673 {
674  int id = FindId(p1, p2);
675  return (id >= 0) ? &(Base::At(id)) : NULL;
676 }
677 
678 template<typename T>
679 inline T* HashTable<T>::Find(int p1, int p2, int p3, int p4)
680 {
681  int id = FindId(p1, p2, p3, p4);
682  return (id >= 0) ? &(Base::At(id)) : NULL;
683 }
684 
685 template<typename T>
686 inline const T* HashTable<T>::Find(int p1, int p2) const
687 {
688  int id = FindId(p1, p2);
689  return (id >= 0) ? &(Base::At(id)) : NULL;
690 }
691 
692 template<typename T>
693 inline const T* HashTable<T>::Find(int p1, int p2, int p3, int p4) const
694 {
695  int id = FindId(p1, p2, p3, p4);
696  return (id >= 0) ? &(Base::At(id)) : NULL;
697 }
698 
699 template<typename T>
700 int HashTable<T>::FindId(int p1, int p2) const
701 {
702  if (p1 > p2) { std::swap(p1, p2); }
703  return SearchList(table[Hash(p1, p2)], p1, p2);
704 }
705 
706 template<typename T>
707 int HashTable<T>::FindId(int p1, int p2, int p3, int p4) const
708 {
709  internal::sort4_ext(p1, p2, p3, p4);
710  return SearchList(table[Hash(p1, p2, p3)], p1, p2, p3);
711 }
712 
713 template<typename T>
714 int HashTable<T>::SearchList(int id, int p1, int p2) const
715 {
716  while (id >= 0)
717  {
718  const T& item = Base::At(id);
719  if (item.p1 == p1 && item.p2 == p2) { return id; }
720  id = item.next;
721  }
722  return -1;
723 }
724 
725 template<typename T>
726 int HashTable<T>::SearchList(int id, int p1, int p2, int p3) const
727 {
728  while (id >= 0)
729  {
730  const T& item = Base::At(id);
731  if (item.p1 == p1 && item.p2 == p2 && item.p3 == p3) { return id; }
732  id = item.next;
733  }
734  return -1;
735 }
736 
737 template<typename T>
739 {
740  const int fill_factor = 2;
741 
742  // is the table overfull?
743  if (Base::Size() > (mask+1) * fill_factor)
744  {
745  DoRehash();
746  }
747 }
748 
749 template<typename T>
751 {
752  delete [] table;
753 
754  // double the table size
755  int new_table_size = 2*(mask+1);
756  table = new int[new_table_size];
757  for (int i = 0; i < new_table_size; i++) { table[i] = -1; }
758  mask = new_table_size-1;
759 
760 #if defined(MFEM_DEBUG) && !defined(MFEM_USE_MPI)
761  mfem::out << _MFEM_FUNC_NAME << ": rehashing to size " << new_table_size
762  << std::endl;
763 #endif
764 
765  // reinsert all items
766  for (iterator it = begin(); it != end(); ++it)
767  {
768  Insert(Hash(*it), it.index(), *it);
769  }
770 }
771 
772 template<typename T>
773 inline void HashTable<T>::Insert(int idx, int id, T &item)
774 {
775  // add item at the beginning of the linked list
776  item.next = table[idx];
777  table[idx] = id;
778 }
779 
780 template<typename T>
781 void HashTable<T>::Unlink(int idx, int id)
782 {
783  // remove item from the linked list
784  int* p_id = table + idx;
785  while (*p_id >= 0)
786  {
787  T& item = Base::At(*p_id);
788  if (*p_id == id)
789  {
790  *p_id = item.next;
791  return;
792  }
793  p_id = &(item.next);
794  }
795  MFEM_ABORT("HashTable<>::Unlink: item not found!");
796 }
797 
798 template<typename T>
800 {
801  T& item = Base::At(id);
802  Unlink(Hash(item), id);
803  item.next = -2; // mark item as unused
804  unused.Append(id); // add its id to the unused ids
805 }
806 
807 template<typename T>
809 {
810  Base::DeleteAll();
811  for (int i = 0; i <= mask; i++) { table[i] = -1; }
812  unused.DeleteAll();
813 }
814 
815 template<typename T>
816 void HashTable<T>::Alloc(int id, int p1, int p2)
817 {
818  // enlarge the BlockArray to hold 'id'
819  while (id >= Base::Size())
820  {
821  Base::At(Base::Append()).next = -2; // append "unused" items
822  }
823 
824  T& item = Base::At(id);
825  if (item.next == -2)
826  {
827  item.next = -1;
828  item.p1 = p1;
829  item.p2 = p2;
830 
831  Insert(Hash(p1, p2), id, item);
832  CheckRehash();
833  }
834 }
835 
836 template<typename T>
838 {
839  unused.DeleteAll();
840  for (int i = 0; i < Base::Size(); i++)
841  {
842  if (Base::At(i).next == -2) { unused.Append(i); }
843  }
844 }
845 
846 template<typename T>
847 void HashTable<T>::Reparent(int id, int new_p1, int new_p2)
848 {
849  T& item = Base::At(id);
850  Unlink(Hash(item), id);
851 
852  if (new_p1 > new_p2) { std::swap(new_p1, new_p2); }
853  item.p1 = new_p1;
854  item.p2 = new_p2;
855 
856  // reinsert under new parent IDs
857  int new_idx = Hash(new_p1, new_p2);
858  Insert(new_idx, id, item);
859 }
860 
861 template<typename T>
863  int new_p1, int new_p2, int new_p3, int new_p4)
864 {
865  T& item = Base::At(id);
866  Unlink(Hash(item), id);
867 
868  internal::sort4_ext(new_p1, new_p2, new_p3, new_p4);
869  item.p1 = new_p1;
870  item.p2 = new_p2;
871  item.p3 = new_p3;
872 
873  // reinsert under new parent IDs
874  int new_idx = Hash(new_p1, new_p2, new_p3);
875  Insert(new_idx, id, item);
876 }
877 
878 template<typename T>
879 std::size_t HashTable<T>::MemoryUsage() const
880 {
881  return (mask+1) * sizeof(int) + Base::MemoryUsage() + unused.MemoryUsage();
882 }
883 
884 template<typename T>
886 {
887  mfem::out << Base::MemoryUsage() << " + " << (mask+1) * sizeof(int)
888  << " + " << unused.MemoryUsage();
889 }
890 
891 template<typename T>
892 int HashTable<T>::BinSize(int idx) const
893 {
894  int count = 0;
895  int id = table[idx];
896  while (id >= 0)
897  {
898  const T& item = Base::At(id);
899  id = item.next;
900  count++;
901  }
902  return count;
903 }
904 
905 template<typename T>
907 {
908  int table_size = mask+1;
909  mfem::out << "Hash table size: " << table_size << "\n";
910  mfem::out << "Item count: " << Size() << "\n";
911  mfem::out << "BlockArray size: " << Base::Size() << "\n";
912 
913  const int H = 16;
914  int hist[H];
915 
916  for (int i = 0; i < H; i++) { hist[i] = 0; }
917 
918  for (int i = 0; i < table_size; i++)
919  {
920  int bs = BinSize(i);
921  if (bs >= H) { bs = H-1; }
922  hist[bs]++;
923  }
924 
925  mfem::out << "Bin size histogram:\n";
926  for (int i = 0; i < H; i++)
927  {
928  mfem::out << " size " << i << ": "
929  << hist[i] << " bins" << std::endl;
930  }
931 }
932 
933 
934 template <typename int_type_const_iter>
935 HashFunction &HashFunction::EncodeAndHashInts(int_type_const_iter begin,
936  int_type_const_iter end)
937 {
938  // For hashing, an integer k is encoded as follows:
939  // * 1 byte = sign_bit(k) + num_bytes(k), where
940  // - sign_bit(k) = (k >= 0) ? 0 : 128
941  // - num_bytes(k) = minimum number of bytes needed to represent abs(k)
942  // with the convention that num_bytes(0) = 0.
943  // * num_bytes(k) bytes = the bytes of abs(k), starting with the least
944  // significant byte.
945 
946  static_assert(
947  std::is_integral<
948  /**/ typename std::remove_reference<decltype(*begin)>::type
949  /**/ >::value,
950  "invalid iterator type");
951 
952  // Skip encoding if hashing is not available:
953  if (hash_data == nullptr) { return *this; }
954 
955  constexpr int max_buffer_bytes = 64*1024;
956  unsigned char buffer[max_buffer_bytes];
957  int buffer_counter = 0;
958  while (begin != end)
959  {
960  int byte_counter = 0;
961  auto k = *begin;
962  buffer[buffer_counter] = (k >= 0) ? 0 : (k = -k, 128);
963  while (k != 0)
964  {
965  byte_counter++;
966  buffer[buffer_counter + byte_counter] = (unsigned char)(k % 256);
967  k /= 256; // (k >>= 8) results in error, e.g. for 'char'
968  }
969  buffer[buffer_counter] |= byte_counter;
970  buffer_counter += (byte_counter + 1);
971 
972  ++begin;
973 
974  if (begin == end ||
975  buffer_counter + (1 + sizeof(*begin)) > max_buffer_bytes)
976  {
977  HashBuffer(buffer, buffer_counter);
978  buffer_counter = 0;
979  }
980  }
981  return *this;
982 }
983 
984 template <typename double_const_iter>
985 HashFunction &HashFunction::EncodeAndHashDoubles(double_const_iter begin,
986  double_const_iter end)
987 {
988  // For hashing, a double is encoded in little endian byte-order.
989 
990  static_assert(
991  std::is_same<decltype(*begin), const double &>::value,
992  "invalid iterator type");
993 
994  // Skip encoding if hashing is not available:
995  if (hash_data == nullptr) { return *this; }
996 
997  constexpr int max_buffer_bytes = 64*1024;
998  unsigned char buffer[max_buffer_bytes];
999  int buffer_counter = 0;
1000  while (begin != end)
1001  {
1002  auto k = reinterpret_cast<const uint64_t &>(*begin);
1003  for (int i = 0; i != 7; i++)
1004  {
1005  buffer[buffer_counter++] = (unsigned char)(k & 255); k >>= 8;
1006  }
1007  buffer[buffer_counter++] = (unsigned char)k;
1008 
1009  ++begin;
1010 
1011  if (begin == end || buffer_counter + 8 > max_buffer_bytes)
1012  {
1013  HashBuffer(buffer, buffer_counter);
1014  buffer_counter = 0;
1015  }
1016  }
1017  return *this;
1018 }
1019 
1020 } // namespace mfem
1021 
1022 #endif
Hash function for data sequences.
Definition: hash.hpp:456
int Hash(const Hashed2 &item) const
Hash function for items of type T that inherit from Hashed2.
Definition: hash.hpp:383
HashFunction & AppendInts(const int_type(&ints)[num_ints])
Add a sequence of integers for hashing, given as a fixed-size c-array.
Definition: hash.hpp:496
Array< int > unused
Definition: hash.hpp:355
void * hash_data
Definition: hash.hpp:459
int Size() const
Return the number of items actually stored.
Definition: array.hpp:510
void UpdateUnused()
Reinitialize the internal list of unallocated items.
Definition: hash.hpp:837
std::size_t MemoryUsage() const
Return total size of allocated memory (tables plus items), in bytes.
Definition: hash.hpp:879
HashFunction & EncodeAndHashDoubles(double_const_iter begin, double_const_iter end)
Double encoding method: encode in little-endian byte-order.
Definition: hash.hpp:985
int Hash(size_t p1, size_t p2, size_t p3) const
hash function for Hashed4 items.
Definition: hash.hpp:378
void Unlink(int idx, int id)
Unlink an item id from the linked list of bin idx.
Definition: hash.hpp:781
void Delete(int id)
Remove an item from the hash table.
Definition: hash.hpp:799
const_iterator cbegin() const
Definition: hash.hpp:339
const_iterator cend() const
Definition: hash.hpp:340
HashTable(int block_size=16 *1024, int init_hash_size=32 *1024)
Main constructor of the HashTable class.
Definition: hash.hpp:535
void HashBuffer(const void *buffer, size_t num_bytes)
Add a sequence of bytes for hashing.
Definition: hash.cpp:45
void PrintStats() const
Print a histogram of bin sizes for debugging purposes.
Definition: hash.hpp:906
HashFunction & EncodeAndHashInts(int_type_const_iter begin, int_type_const_iter end)
Integer encoding method; result is independent of endianness and type.
Definition: hash.hpp:935
void PrintMemoryDetail() const
Write details of the memory usage to the mfem output stream.
Definition: hash.hpp:885
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition: hash.cpp:60
iterator & operator++()
Definition: hash.hpp:309
T * Get(int p1, int p2)
Item accessor with key (or parents) the pair &#39;p1&#39;, &#39;p2&#39;. Default construct an item of type T if no va...
Definition: hash.hpp:597
HashFunction & AppendBytes(const void *seq, size_t num_bytes)
Add a sequence of bytes for hashing.
Definition: hash.hpp:482
const_iterator(const base &it)
Definition: hash.hpp:323
void DeleteAll()
Remove all items.
Definition: hash.hpp:808
int SearchList(int id, int p1, int p2) const
Search the index of the item associated to the key (p1,p2) starting from the item with index id...
Definition: hash.hpp:714
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition: hash.hpp:509
int Hash(size_t p1, size_t p2) const
hash function for Hashed2 items.
Definition: hash.hpp:365
void CheckRehash()
Check table fill factor and resize if necessary.
Definition: hash.hpp:738
iterator begin()
Definition: array.hpp:604
int BinSize(int idx) const
Return the size of the bin "idx".
Definition: hash.hpp:892
HashFunction()
Default constructor: initialize the hash function.
Definition: hash.cpp:29
const_iterator & operator++()
Definition: hash.hpp:329
int NumFreeIds() const
Return the number of free/unused ids in the HashTable.
Definition: hash.hpp:228
double b
Definition: lissajous.cpp:42
void Reparent(int id, int new_p1, int new_p2)
Change the key associated with an item.
Definition: hash.hpp:847
T * Find(int p1, int p2)
Find item whose parents are p1, p2... Return NULL if it doesn&#39;t exist.
Definition: hash.hpp:672
iterator(const base &it)
Definition: hash.hpp:303
HashFunction & AppendDoubles(const double(&doubles)[num_doubles])
Add a sequence of doubles for hashing, given as a fixed-size c-array.
Definition: hash.hpp:516
iterator end()
Definition: hash.hpp:337
int * table
Definition: hash.hpp:346
int Hash(const Hashed4 &item) const
Hash function for items of type T that inherit from Hashed4.
Definition: hash.hpp:387
BlockArray< T > Base
Definition: hash.hpp:85
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
int next
Definition: hash.hpp:39
HashFunction & AppendInts(const int_type *ints, size_t num_ints)
Add a sequence of integers for hashing, given as a c-array.
Definition: hash.hpp:489
iterator begin()
Definition: hash.hpp:336
const_iterator cbegin() const
Definition: array.hpp:607
double a
Definition: lissajous.cpp:41
int FindId(int p1, int p2) const
Find id of item whose parents are p1, p2... Return -1 if it doesn&#39;t exist.
Definition: hash.hpp:700
bool IdExists(int id) const
Return true if item &#39;id&#39; exists in (is used by) the container.
Definition: hash.hpp:235
void Copy(Array &copy) const
Create a copy of the internal array to the provided copy.
Definition: array.hpp:864
int NumIds() const
Return the total number of ids (used and unused) in the HashTable.
Definition: hash.hpp:225
int Size() const
Return the logical size of the array.
Definition: array.hpp:141
HashFunction & AppendInts(const int_type_container &ints)
Add a sequence of integers for hashing, given as a container.
Definition: hash.hpp:503
~HashFunction()
Destructor.
Definition: hash.cpp:38
int GetId(int p1, int p2)
Get id of item whose parents are p1, p2... Create it if it doesn&#39;t exist.
Definition: hash.hpp:609
HashTable & operator=(const HashTable &)=delete
Copy assignment not supported.
int Size() const
Return the number of elements currently stored in the HashTable.
Definition: hash.hpp:222
HashFunction & AppendDoubles(const double_container &doubles)
Add a sequence of doubles for hashing, given as a container.
Definition: hash.hpp:523
Base::iterator base
Definition: hash.hpp:300
Base::const_iterator base
Definition: hash.hpp:320
T & At(int index)
Access item of the array.
Definition: array.hpp:494
int next
Definition: hash.hpp:30
void DoRehash()
Double the size of the hash table (i.e., double the number of bins) and reinsert all items into the n...
Definition: hash.hpp:750
void Insert(int idx, int id, T &item)
Insert the item &#39;id&#39; into bin &#39;idx&#39;.
Definition: hash.hpp:773