MFEM  v4.5.2
Finite element discretization library
hash.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_HASH
13 #define MFEM_HASH
14 
15 #include "../config/config.hpp"
16 #include "array.hpp"
17 #include "globals.hpp"
18 #include <type_traits>
19 
20 namespace mfem
21 {
22 
23 /** A concept for items that should be used in HashTable and be accessible by
24  * hashing two IDs.
25  */
26 struct Hashed2
27 {
28  int p1, p2;
29  int next;
30 };
31 
32 /** A concept for items that should be used in HashTable and be accessible by
33  * hashing four IDs.
34  */
35 struct Hashed4
36 {
37  int p1, p2, p3; // NOTE: p4 is neither hashed nor stored
38  int next;
39 };
40 
41 
42 /** HashTable is a container for items that require associative access through
43  * pairs (or quadruples) of indices:
44  *
45  * (p1, p2) -> item
46  * (p1, p2, p3, p4) -> item
47  *
48  * An example of this are edges and faces in a mesh. Each edge is uniquely
49  * identified by two parent vertices and so can be easily accessed from
50  * different elements using this class. Similarly for faces.
51  *
52  * The order of the p1, p2, ... indices is not relevant as they are sorted
53  * each time this class is invoked.
54  *
55  * There are two main methods this class provides. The Get(...) methods always
56  * return an item given the two or four indices. If the item didn't previously
57  * exist, the methods creates a new one. The Find(...) methods, on the other
58  * hand, just return NULL or -1 if the item doesn't exist.
59  *
60  * Each new item is automatically assigned a unique ID - the index of the item
61  * inside the BlockArray. The IDs may (but need not) be used as p1, p2, ... of
62  * other items.
63  *
64  * The item type (T) needs to follow either the Hashed2 or the Hashed4
65  * concept. It is easiest to just inherit from these structs.
66  *
67  * All items in the container can also be accessed sequentially using the
68  * provided iterator.
69  *
70  * Notes:
71  * The data structure and implementation is based on a BlockArray<T> which
72  * provides an efficient item storage that avoids heap fragmentation, and
73  * index-based item access. The hash table implemented on top of the
74  * BlockArray provides fast associative (key -> value) access by grouping
75  * items into bins (buckets) of O(1) size.
76  * - "id" denotes the index of an item in the underlying BlockArray<T>,
77  * - "idx" denotes the index of a bin, determined by hashing a key with
78  * the function `Hash`.
79  */
80 template<typename T>
81 class HashTable : public BlockArray<T>
82 {
83 protected:
85 
86 public:
87  /** @brief Main constructor of the HashTable class.
88 
89  @param[in] block_size The size of the storage blocks of the underlying
90  BlockArray<T>.
91  @param[in] init_hash_size The initial size of the hash table. Must be
92  a power of 2. */
93  HashTable(int block_size = 16*1024, int init_hash_size = 32*1024);
94  /// @brief Deep copy
95  HashTable(const HashTable& other);
96  /// @brief Copy assignment not supported
97  HashTable& operator=(const HashTable&) = delete;
98  ~HashTable();
99 
100  /** @brief Item accessor with key (or parents) the pair 'p1', 'p2'. Default
101  construct an item of type T if no value correspond to the requested key.
102 
103  @param[in] p1 First part of the key.
104  @param[in] p2 Second part of the key.
105  @return The index "id" of the key in the BlockArray<T>.
106 
107  @warning This method should only be called if T inherits from Hashed2. */
108  T* Get(int p1, int p2);
109 
110  /** @brief Item accessor with key (or parents) the quadruplet 'p1', 'p2',
111  'p3', 'p4'. The key 'p4' is optional. Default construct an item of type T
112  if no value corresponds to the requested key.
113 
114  @param[in] p1 First part of the key.
115  @param[in] p2 Second part of the key.
116  @param[in] p3 Third part of the key.
117  @param[in] p4 Fourth part of the key (optional).
118  @return The index "id" of the key in the BlockArray<T>.
119 
120  @warning This method should only be called if T inherits from Hashed4. */
121  T* Get(int p1, int p2, int p3, int p4 = -1 /* p4 optional */);
122 
123  /// Get id of item whose parents are p1, p2... Create it if it doesn't exist.
124  /** @brief Get the "id" of an item, this "id" corresponding to the index of the
125  item in the underlying BlockArray<T> object. Default construct an item
126  and id if no value corresponds to the requested key.
127 
128  @param[in] p1 First part of the key.
129  @param[in] p2 Second part of the key.
130  @return The index "id" of the key in the BlockArray<T>.
131 
132  @warning This method should only be called if T inherits from Hashed2. */
133  int GetId(int p1, int p2);
134 
135  /** @brief Get the "id" of an item, this "id" corresponding to the index of the
136  item in the underlying BlockArray<T> object. Default construct an item
137  and id if no value correspond to the requested key.
138 
139  @param[in] p1 First part of the key.
140  @param[in] p2 Second part of the key.
141  @param[in] p3 Third part of the key.
142  @param[in] p4 Fourth part of the key (optional).
143  @return The index "id" of the key in the BlockArray<T>.
144 
145  @warning This method should only be called if T inherits from Hashed4. */
146  int GetId(int p1, int p2, int p3, int p4 = -1);
147 
148  /// Find item whose parents are p1, p2... Return NULL if it doesn't exist.
149  /** @brief Item accessor with key (or parents) the pair 'p1', 'p2'. Return
150  nullptr if no value correspond to the requested key.
151 
152  @param[in] p1 First part of the key.
153  @param[in] p2 Second part of the key.
154  @return The item associated to the key (p1,p2).
155 
156  @warning This method should only be called if T inherits from Hashed2. */
157  T* Find(int p1, int p2);
158 
159  /** @brief Item accessor with key (or parents) the quadruplet 'p1', 'p2',
160  'p3', 'p4'. The key 'p4' is optional. Return nullptr if no value
161  correspond to the requested key.
162 
163  @param[in] p1 First part of the key.
164  @param[in] p2 Second part of the key.
165  @param[in] p3 Third part of the key.
166  @param[in] p4 Fourth part of the key (optional).
167  @return The item associated to the key (p1,p2,p3,p4).
168 
169  @warning This method should only be called if T inherits from Hashed4. */
170  T* Find(int p1, int p2, int p3, int p4 = -1);
171 
172  /** @brief Item const accessor with key (or parents) the pair 'p1', 'p2'.
173  Return nullptr if no value correspond to the requested key.
174 
175  @param[in] p1 First part of the key.
176  @param[in] p2 Second part of the key.
177  @return The item associated to the key (p1,p2).
178 
179  @warning This method should only be called if T inherits from Hashed2. */
180  const T* Find(int p1, int p2) const;
181 
182  /** @brief Item const accessor with key (or parents) the quadruplet 'p1',
183  'p2', 'p3', 'p4'. The key 'p4' is optional. Return nullptr if no value
184  correspond to the requested key.
185 
186  @param[in] p1 First part of the key.
187  @param[in] p2 Second part of the key.
188  @param[in] p3 Third part of the key.
189  @param[in] p4 Fourth part of the key (optional).
190  @return The item associated to the key (p1,p2,p3,p4).
191 
192  @warning This method should only be called if T inherits from Hashed4. */
193  const T* Find(int p1, int p2, int p3, int p4 = -1) const;
194 
195  /// Find id of item whose parents are p1, p2... Return -1 if it doesn't exist.
196  /** @brief Find the "id" of an item, this "id" corresponding to the index of
197  the item in the underlying BlockArray<T> object. Default construct an
198  item and id if no value correspond to the requested key.
199 
200  @param[in] p1 First part of the key.
201  @param[in] p2 Second part of the key.
202  @return The index "id" of the key in the BlockArray<T>.
203 
204  @warning This method should only be called if T inherits from Hashed2. */
205  int FindId(int p1, int p2) const;
206 
207  /** @brief Find the "id" of an item, this "id" corresponding to the index of
208  the item in the underlying BlockArray<T> object. Default construct an
209  item and id if no value correspond to the requested key.
210 
211  @param[in] p1 First part of the key.
212  @param[in] p2 Second part of the key.
213  @param[in] p3 Third part of the key.
214  @param[in] p4 Fourth part of the key (optional).
215  @return The index "id" of the key in the BlockArray<T>.
216 
217  @warning This method should only be called if T inherits from Hashed4. */
218  int FindId(int p1, int p2, int p3, int p4 = -1) const;
219 
220  /// @brief Return the number of elements currently stored in the HashTable.
221  int Size() const { return Base::Size() - unused.Size(); }
222 
223  /// @brief Return the total number of ids (used and unused) in the HashTable.
224  int NumIds() const { return Base::Size(); }
225 
226  /// @brief Return the number of free/unused ids in the HashTable.
227  int NumFreeIds() const { return unused.Size(); }
228 
229  /** @brief Return true if item 'id' exists in (is used by) the container.
230 
231  @param[in] id Index of the item in the underlying BlockArray<T>.
232 
233  @warning It is assumed that 0 <= id < NumIds(). */
234  bool IdExists(int id) const { return (Base::At(id).next != -2); }
235 
236  /** @brief Remove an item from the hash table.
237 
238  @param[in] id Index of the item in the underlying BlockArray<T>.
239 
240  @warning Its id will be reused by newly added items. */
241  void Delete(int id);
242 
243  /// @brief Remove all items.
244  void DeleteAll();
245 
246  /** @brief Allocate an item at 'id'. Enlarge the underlying BlockArray if
247  necessary.
248 
249  @param[in] id Index of the item in the underlying BlockArray<T>.
250  @param[in] p1 First part of the key.
251  @param[in] p2 Second part of the key.
252 
253  @warning This is a special purpose method used when loading data from a
254  file. Does nothing if the slot 'id' has already been allocated. */
255  void Alloc(int id, int p1, int p2);
256 
257  /** @brief Reinitialize the internal list of unallocated items.
258 
259  @warning This is a special purpose method used when loading data from a file. */
260  void UpdateUnused();
261 
262  /** @brief Change the key associated with an item.
263 
264  In other words, makes an item hashed under different parent IDs.
265 
266  @param[in] id Index of the item in the underlying BlockArray<T>.
267  @param[in] new_p1 First part of the new key.
268  @param[in] new_p2 Second part of the new key.
269 
270  @warning This method should only be called if T inherits from Hashed2. */
271  void Reparent(int id, int new_p1, int new_p2);
272 
273  /** @brief Change the key associated with an item.
274 
275  In other words, makes an item hashed under different parent IDs.
276 
277  @param[in] id Index of the item in the underlying BlockArray<T>.
278  @param[in] new_p1 First part of the new key.
279  @param[in] new_p2 Second part of the new key.
280  @param[in] new_p3 Third part of the new key.
281  @param[in] new_p4 Fourth part of the new key (optional).
282 
283  @warning This method should only be called if T inherits from Hashed4. */
284  void Reparent(int id, int new_p1, int new_p2, int new_p3, int new_p4 = -1);
285 
286  /// @brief Return total size of allocated memory (tables plus items), in bytes.
287  std::size_t MemoryUsage() const;
288 
289  /// @brief Write details of the memory usage to the mfem output stream.
290  void PrintMemoryDetail() const;
291 
292  /// @brief Print a histogram of bin sizes for debugging purposes.
293  void PrintStats() const;
294 
295  class iterator : public Base::iterator
296  {
297  protected:
298  friend class HashTable;
299  typedef typename Base::iterator base;
300 
301  iterator() { }
302  iterator(const base &it) : base(it)
303  {
304  while (base::good() && (*this)->next == -2) { base::next(); }
305  }
306 
307  public:
309  {
310  while (base::next(), base::good() && (*this)->next == -2) { }
311  return *this;
312  }
313  };
314 
316  {
317  protected:
318  friend class HashTable;
319  typedef typename Base::const_iterator base;
320 
322  const_iterator(const base &it) : base(it)
323  {
324  while (base::good() && (*this)->next == -2) { base::next(); }
325  }
326 
327  public:
329  {
330  while (base::next(), base::good() && (*this)->next == -2) { }
331  return *this;
332  }
333  };
334 
335  iterator begin() { return iterator(Base::begin()); }
336  iterator end() { return iterator(); }
337 
338  const_iterator cbegin() const { return const_iterator(Base::cbegin()); }
339  const_iterator cend() const { return const_iterator(); }
340 
341 protected:
342  /** The hash table: each bin is a linked list of items. For each non-empty
343  bin, this arrays stores the 'id' of the first item in the list, or -1
344  if the bin is empty. */
345  int* table;
346 
347  /** mask = table_size-1. Used for fast modulo operation in Hash(), to wrap
348  the raw hashed index around the current table size (which must be a power
349  of two). */
350  int mask;
351 
352  /** List of deleted items in the BlockArray<T>. New items are created with
353  these ids first, before they are appended to the block array. */
355 
356  /** @brief hash function for Hashed2 items.
357 
358  @param[in] p1 First part of the key.
359  @param[in] p2 Second part of the key.
360  @return The hash key "idx" identifying a bin/bucket.
361 
362  NOTE: the constants are arbitrary
363  @warning This method should only be called if T inherits from Hashed2. */
364  inline int Hash(size_t p1, size_t p2) const
365  { return (984120265ul*p1 + 125965121ul*p2) & mask; }
366 
367  /** @brief hash function for Hashed4 items.
368 
369  @param[in] p1 First part of the key.
370  @param[in] p2 Second part of the key.
371  @param[in] p3 Third part of the key.
372  @return The hash key "idx" identifying a bin/bucket.
373 
374  NOTE: The constants are arbitrary.
375  NOTE: p4 is not hashed nor stored as p1, p2, p3 identify a face uniquely.
376  @warning This method should only be called if T inherits from Hashed4. */
377  inline int Hash(size_t p1, size_t p2, size_t p3) const
378  { return (984120265ul*p1 + 125965121ul*p2 + 495698413ul*p3) & mask; }
379 
380  // Delete() and Reparent() use one of these:
381  /// @brief Hash function for items of type T that inherit from Hashed2.
382  inline int Hash(const Hashed2& item) const
383  { return Hash(item.p1, item.p2); }
384 
385  /// @brief Hash function for items of type T that inherit from Hashed4.
386  inline int Hash(const Hashed4& item) const
387  { return Hash(item.p1, item.p2, item.p3); }
388 
389  /** @brief Search the index of the item associated to the key (p1,p2)
390  starting from the item with index @a id.
391 
392  @param[in] id Index of the item in the underlying BlockArray<T>.
393  @param[in] p1 First part of the key.
394  @param[in] p2 Second part of the key.
395  @return The index "id" of the key in the BlockArray<T>.
396 
397  @warning This method should only be called if T inherits from Hashed2. */
398  int SearchList(int id, int p1, int p2) const;
399 
400  /** @brief Search the index of the item associated to the key (p1,p2,p3,(p4))
401  starting from the item with index @a id.
402 
403  @param[in] id Index of the item in the underlying BlockArray<T>.
404  @param[in] p1 First part of the key.
405  @param[in] p2 Second part of the key.
406  @param[in] p3 Third part of the key.
407  @return The index "id" of the key in the BlockArray<T>.
408 
409  @warning This method should only be called if T inherits from Hashed4. */
410  int SearchList(int id, int p1, int p2, int p3) const;
411 
412  /** @brief Insert the item 'id' into bin 'idx'.
413 
414  @param[in] idx The bin/bucket index.
415  @param[in] id The index of the item in the BlockArray<T>.
416  @param[in] item The item to insert at the beginning of the linked list.
417 
418  @warning The method only works with bin 'idx' and does not check the
419  overall fill factor of the hash table. If appropriate,
420  use CheckRehash() for that. */
421  inline void Insert(int idx, int id, T &item);
422 
423  /** @brief Unlink an item @a id from the linked list of bin @a idx.
424 
425  @param[in] idx The bin/bucket index.
426  @param[in] id The index of the item in the BlockArray<T>.
427 
428  @warning The method aborts if the item is not found. */
429  void Unlink(int idx, int id);
430 
431  /** @brief Check table fill factor and resize if necessary.
432 
433  The method checks the average size of the bins (i.e., the fill factor).
434  If the fill factor is > 2, the table is enlarged (see DoRehash()). */
435  inline void CheckRehash();
436 
437  /** @brief Double the size of the hash table (i.e., double the number of bins)
438  and reinsert all items into the new bins.
439 
440  NOTE: Rehashing is computationally expensive (O(N) in the number of items),
441  but since it is only done rarely (when the number of items doubles),
442  the amortized complexity of inserting an item is still O(1). */
443  void DoRehash();
444 
445  /** @brief Return the size of the bin "idx".
446 
447  @param[in] idx The index of the bin.
448  @return The size of the bin. */
449  int BinSize(int idx) const;
450 };
451 
452 
453 /// Hash function for data sequences.
454 /** Depends on GnuTLS for SHA-256 hashing. */
456 {
457 protected:
458  void *hash_data;
459 
460  /// Add a sequence of bytes for hashing
461  void HashBuffer(const void *buffer, size_t num_bytes);
462 
463  /// Integer encoding method; result is independent of endianness and type
464  template <typename int_type_const_iter>
465  HashFunction &EncodeAndHashInts(int_type_const_iter begin,
466  int_type_const_iter end);
467 
468  /// Double encoding method: encode in little-endian byte-order
469  template <typename double_const_iter>
470  HashFunction &EncodeAndHashDoubles(double_const_iter begin,
471  double_const_iter end);
472 
473 public:
474  /// Default constructor: initialize the hash function
475  HashFunction();
476 
477  /// Destructor
478  ~HashFunction();
479 
480  /// Add a sequence of bytes for hashing
481  HashFunction &AppendBytes(const void *seq, size_t num_bytes)
482  { HashBuffer(seq, num_bytes); return *this; }
483 
484  /// Add a sequence of integers for hashing, given as a c-array.
485  /** Before hashing the sequence is encoded so that the result is independent
486  of endianness and type: int, long, unsigned, etc. */
487  template <typename int_type>
488  HashFunction &AppendInts(const int_type *ints, size_t num_ints)
489  { return EncodeAndHashInts(ints, ints + num_ints); }
490 
491  /// Add a sequence of integers for hashing, given as a fixed-size c-array.
492  /** Before hashing the sequence is encoded so that the result is independent
493  of endianness and type: int, long, unsigned, etc. */
494  template <typename int_type, size_t num_ints>
495  HashFunction &AppendInts(const int_type (&ints)[num_ints])
496  { return EncodeAndHashInts(ints, ints + num_ints); }
497 
498  /// Add a sequence of integers for hashing, given as a container.
499  /** Before hashing the sequence is encoded so that the result is independent
500  of endianness and type: int, long, unsigned, etc. */
501  template <typename int_type_container>
502  HashFunction &AppendInts(const int_type_container &ints)
503  { return EncodeAndHashInts(ints.begin(), ints.end()); }
504 
505  /// Add a sequence of doubles for hashing, given as a c-array.
506  /** Before hashing the sequence is encoded so that the result is independent
507  of endianness. */
508  HashFunction &AppendDoubles(const double *doubles, size_t num_doubles)
509  { return EncodeAndHashDoubles(doubles, doubles + num_doubles); }
510 
511  /// Add a sequence of doubles for hashing, given as a fixed-size c-array.
512  /** Before hashing the sequence is encoded so that the result is independent
513  of endianness. */
514  template <size_t num_doubles>
515  HashFunction &AppendDoubles(const double (&doubles)[num_doubles])
516  { return EncodeAndHashDoubles(doubles, doubles + num_doubles); }
517 
518  /// Add a sequence of doubles for hashing, given as a container.
519  /** Before hashing the sequence is encoded so that the result is independent
520  of endianness. */
521  template <typename double_container>
522  HashFunction &AppendDoubles(const double_container &doubles)
523  { return EncodeAndHashDoubles(doubles.begin(), doubles.end()); }
524 
525  /** @brief Return the hash string for the current sequence and reset (clear)
526  the sequence. */
527  std::string GetHash() const;
528 };
529 
530 
531 // implementation
532 
533 template<typename T>
534 HashTable<T>::HashTable(int block_size, int init_hash_size)
535  : Base(block_size)
536 {
537  mask = init_hash_size-1;
538  MFEM_VERIFY(!(init_hash_size & mask), "init_size must be a power of two.");
539 
540  table = new int[init_hash_size];
541  for (int i = 0; i < init_hash_size; i++)
542  {
543  table[i] = -1;
544  }
545 }
546 
547 template<typename T>
549  : Base(other), mask(other.mask)
550 {
551  int size = mask+1;
552  table = new int[size];
553  memcpy(table, other.table, size*sizeof(int));
554  other.unused.Copy(unused);
555 }
556 
557 template<typename T>
559 {
560  delete [] table;
561 }
562 
563 namespace internal
564 {
565 
566 inline void sort3(int &a, int &b, int &c)
567 {
568  if (a > b) { std::swap(a, b); }
569  if (a > c) { std::swap(a, c); }
570  if (b > c) { std::swap(b, c); }
571 }
572 
573 inline void sort4(int &a, int &b, int &c, int &d)
574 {
575  if (a > b) { std::swap(a, b); }
576  if (a > c) { std::swap(a, c); }
577  if (a > d) { std::swap(a, d); }
578  sort3(b, c, d);
579 }
580 
581 inline void sort4_ext(int &a, int &b, int &c, int &d)
582 {
583  if (d < 0) // support optional last index
584  {
585  sort3(a, b, c);
586  }
587  else
588  {
589  sort4(a, b, c, d);
590  }
591 }
592 
593 } // internal
594 
595 template<typename T>
596 inline T* HashTable<T>::Get(int p1, int p2)
597 {
598  return &(Base::At(GetId(p1, p2)));
599 }
600 
601 template<typename T>
602 inline T* HashTable<T>::Get(int p1, int p2, int p3, int p4)
603 {
604  return &(Base::At(GetId(p1, p2, p3, p4)));
605 }
606 
607 template<typename T>
608 int HashTable<T>::GetId(int p1, int p2)
609 {
610  // search for the item in the hashtable
611  if (p1 > p2) { std::swap(p1, p2); }
612  int idx = Hash(p1, p2);
613  int id = SearchList(table[idx], p1, p2);
614  if (id >= 0) { return id; }
615 
616  // not found - use an unused item or create a new one
617  int new_id;
618  if (unused.Size())
619  {
620  new_id = unused.Last();
621  unused.DeleteLast();
622  }
623  else
624  {
625  new_id = Base::Append();
626  }
627  T& item = Base::At(new_id);
628  item.p1 = p1;
629  item.p2 = p2;
630 
631  // insert into hashtable
632  Insert(idx, new_id, item);
633  CheckRehash();
634 
635  return new_id;
636 }
637 
638 template<typename T>
639 int HashTable<T>::GetId(int p1, int p2, int p3, int p4)
640 {
641  // search for the item in the hashtable
642  internal::sort4_ext(p1, p2, p3, p4);
643  int idx = Hash(p1, p2, p3);
644  int id = SearchList(table[idx], p1, p2, p3);
645  if (id >= 0) { return id; }
646 
647  // not found - use an unused item or create a new one
648  int new_id;
649  if (unused.Size())
650  {
651  new_id = unused.Last();
652  unused.DeleteLast();
653  }
654  else
655  {
656  new_id = Base::Append();
657  }
658  T& item = Base::At(new_id);
659  item.p1 = p1;
660  item.p2 = p2;
661  item.p3 = p3;
662 
663  // insert into hashtable
664  Insert(idx, new_id, item);
665  CheckRehash();
666 
667  return new_id;
668 }
669 
670 template<typename T>
671 inline T* HashTable<T>::Find(int p1, int p2)
672 {
673  int id = FindId(p1, p2);
674  return (id >= 0) ? &(Base::At(id)) : NULL;
675 }
676 
677 template<typename T>
678 inline T* HashTable<T>::Find(int p1, int p2, int p3, int p4)
679 {
680  int id = FindId(p1, p2, p3, p4);
681  return (id >= 0) ? &(Base::At(id)) : NULL;
682 }
683 
684 template<typename T>
685 inline const T* HashTable<T>::Find(int p1, int p2) const
686 {
687  int id = FindId(p1, p2);
688  return (id >= 0) ? &(Base::At(id)) : NULL;
689 }
690 
691 template<typename T>
692 inline const T* HashTable<T>::Find(int p1, int p2, int p3, int p4) const
693 {
694  int id = FindId(p1, p2, p3, p4);
695  return (id >= 0) ? &(Base::At(id)) : NULL;
696 }
697 
698 template<typename T>
699 int HashTable<T>::FindId(int p1, int p2) const
700 {
701  if (p1 > p2) { std::swap(p1, p2); }
702  return SearchList(table[Hash(p1, p2)], p1, p2);
703 }
704 
705 template<typename T>
706 int HashTable<T>::FindId(int p1, int p2, int p3, int p4) const
707 {
708  internal::sort4_ext(p1, p2, p3, p4);
709  return SearchList(table[Hash(p1, p2, p3)], p1, p2, p3);
710 }
711 
712 template<typename T>
713 int HashTable<T>::SearchList(int id, int p1, int p2) const
714 {
715  while (id >= 0)
716  {
717  const T& item = Base::At(id);
718  if (item.p1 == p1 && item.p2 == p2) { return id; }
719  id = item.next;
720  }
721  return -1;
722 }
723 
724 template<typename T>
725 int HashTable<T>::SearchList(int id, int p1, int p2, int p3) const
726 {
727  while (id >= 0)
728  {
729  const T& item = Base::At(id);
730  if (item.p1 == p1 && item.p2 == p2 && item.p3 == p3) { return id; }
731  id = item.next;
732  }
733  return -1;
734 }
735 
736 template<typename T>
738 {
739  const int fill_factor = 2;
740 
741  // is the table overfull?
742  if (Base::Size() > (mask+1) * fill_factor)
743  {
744  DoRehash();
745  }
746 }
747 
748 template<typename T>
750 {
751  delete [] table;
752 
753  // double the table size
754  int new_table_size = 2*(mask+1);
755  table = new int[new_table_size];
756  for (int i = 0; i < new_table_size; i++) { table[i] = -1; }
757  mask = new_table_size-1;
758 
759 #if defined(MFEM_DEBUG) && !defined(MFEM_USE_MPI)
760  mfem::out << _MFEM_FUNC_NAME << ": rehashing to size " << new_table_size
761  << std::endl;
762 #endif
763 
764  // reinsert all items
765  for (iterator it = begin(); it != end(); ++it)
766  {
767  Insert(Hash(*it), it.index(), *it);
768  }
769 }
770 
771 template<typename T>
772 inline void HashTable<T>::Insert(int idx, int id, T &item)
773 {
774  // add item at the beginning of the linked list
775  item.next = table[idx];
776  table[idx] = id;
777 }
778 
779 template<typename T>
780 void HashTable<T>::Unlink(int idx, int id)
781 {
782  // remove item from the linked list
783  int* p_id = table + idx;
784  while (*p_id >= 0)
785  {
786  T& item = Base::At(*p_id);
787  if (*p_id == id)
788  {
789  *p_id = item.next;
790  return;
791  }
792  p_id = &(item.next);
793  }
794  MFEM_ABORT("HashTable<>::Unlink: item not found!");
795 }
796 
797 template<typename T>
799 {
800  T& item = Base::At(id);
801  Unlink(Hash(item), id);
802  item.next = -2; // mark item as unused
803  unused.Append(id); // add its id to the unused ids
804 }
805 
806 template<typename T>
808 {
809  Base::DeleteAll();
810  for (int i = 0; i <= mask; i++) { table[i] = -1; }
811  unused.DeleteAll();
812 }
813 
814 template<typename T>
815 void HashTable<T>::Alloc(int id, int p1, int p2)
816 {
817  // enlarge the BlockArray to hold 'id'
818  while (id >= Base::Size())
819  {
820  Base::At(Base::Append()).next = -2; // append "unused" items
821  }
822 
823  T& item = Base::At(id);
824  if (item.next == -2)
825  {
826  item.next = -1;
827  item.p1 = p1;
828  item.p2 = p2;
829 
830  Insert(Hash(p1, p2), id, item);
831  CheckRehash();
832  }
833 }
834 
835 template<typename T>
837 {
838  unused.DeleteAll();
839  for (int i = 0; i < Base::Size(); i++)
840  {
841  if (Base::At(i).next == -2) { unused.Append(i); }
842  }
843 }
844 
845 template<typename T>
846 void HashTable<T>::Reparent(int id, int new_p1, int new_p2)
847 {
848  T& item = Base::At(id);
849  Unlink(Hash(item), id);
850 
851  if (new_p1 > new_p2) { std::swap(new_p1, new_p2); }
852  item.p1 = new_p1;
853  item.p2 = new_p2;
854 
855  // reinsert under new parent IDs
856  int new_idx = Hash(new_p1, new_p2);
857  Insert(new_idx, id, item);
858 }
859 
860 template<typename T>
862  int new_p1, int new_p2, int new_p3, int new_p4)
863 {
864  T& item = Base::At(id);
865  Unlink(Hash(item), id);
866 
867  internal::sort4_ext(new_p1, new_p2, new_p3, new_p4);
868  item.p1 = new_p1;
869  item.p2 = new_p2;
870  item.p3 = new_p3;
871 
872  // reinsert under new parent IDs
873  int new_idx = Hash(new_p1, new_p2, new_p3);
874  Insert(new_idx, id, item);
875 }
876 
877 template<typename T>
878 std::size_t HashTable<T>::MemoryUsage() const
879 {
880  return (mask+1) * sizeof(int) + Base::MemoryUsage() + unused.MemoryUsage();
881 }
882 
883 template<typename T>
885 {
886  mfem::out << Base::MemoryUsage() << " + " << (mask+1) * sizeof(int)
887  << " + " << unused.MemoryUsage();
888 }
889 
890 template<typename T>
891 int HashTable<T>::BinSize(int idx) const
892 {
893  int count = 0;
894  int id = table[idx];
895  while (id >= 0)
896  {
897  const T& item = Base::At(id);
898  id = item.next;
899  count++;
900  }
901  return count;
902 }
903 
904 template<typename T>
906 {
907  int table_size = mask+1;
908  mfem::out << "Hash table size: " << table_size << "\n";
909  mfem::out << "Item count: " << Size() << "\n";
910  mfem::out << "BlockArray size: " << Base::Size() << "\n";
911 
912  const int H = 16;
913  int hist[H];
914 
915  for (int i = 0; i < H; i++) { hist[i] = 0; }
916 
917  for (int i = 0; i < table_size; i++)
918  {
919  int bs = BinSize(i);
920  if (bs >= H) { bs = H-1; }
921  hist[bs]++;
922  }
923 
924  mfem::out << "Bin size histogram:\n";
925  for (int i = 0; i < H; i++)
926  {
927  mfem::out << " size " << i << ": "
928  << hist[i] << " bins" << std::endl;
929  }
930 }
931 
932 
933 template <typename int_type_const_iter>
934 HashFunction &HashFunction::EncodeAndHashInts(int_type_const_iter begin,
935  int_type_const_iter end)
936 {
937  // For hashing, an integer k is encoded as follows:
938  // * 1 byte = sign_bit(k) + num_bytes(k), where
939  // - sign_bit(k) = (k >= 0) ? 0 : 128
940  // - num_bytes(k) = minimum number of bytes needed to represent abs(k)
941  // with the convention that num_bytes(0) = 0.
942  // * num_bytes(k) bytes = the bytes of abs(k), starting with the least
943  // significant byte.
944 
945  static_assert(
946  std::is_integral<
947  /**/ typename std::remove_reference<decltype(*begin)>::type
948  /**/ >::value,
949  "invalid iterator type");
950 
951  // Skip encoding if hashing is not available:
952  if (hash_data == nullptr) { return *this; }
953 
954  constexpr int max_buffer_bytes = 64*1024;
955  unsigned char buffer[max_buffer_bytes];
956  int buffer_counter = 0;
957  while (begin != end)
958  {
959  int byte_counter = 0;
960  auto k = *begin;
961  buffer[buffer_counter] = (k >= 0) ? 0 : (k = -k, 128);
962  while (k != 0)
963  {
964  byte_counter++;
965  buffer[buffer_counter + byte_counter] = (unsigned char)(k % 256);
966  k /= 256; // (k >>= 8) results in error, e.g. for 'char'
967  }
968  buffer[buffer_counter] |= byte_counter;
969  buffer_counter += (byte_counter + 1);
970 
971  ++begin;
972 
973  if (begin == end ||
974  buffer_counter + (1 + sizeof(*begin)) > max_buffer_bytes)
975  {
976  HashBuffer(buffer, buffer_counter);
977  buffer_counter = 0;
978  }
979  }
980  return *this;
981 }
982 
983 template <typename double_const_iter>
984 HashFunction &HashFunction::EncodeAndHashDoubles(double_const_iter begin,
985  double_const_iter end)
986 {
987  // For hashing, a double is encoded in little endian byte-order.
988 
989  static_assert(
990  std::is_same<decltype(*begin), const double &>::value,
991  "invalid iterator type");
992 
993  // Skip encoding if hashing is not available:
994  if (hash_data == nullptr) { return *this; }
995 
996  constexpr int max_buffer_bytes = 64*1024;
997  unsigned char buffer[max_buffer_bytes];
998  int buffer_counter = 0;
999  while (begin != end)
1000  {
1001  auto k = reinterpret_cast<const uint64_t &>(*begin);
1002  for (int i = 0; i != 7; i++)
1003  {
1004  buffer[buffer_counter++] = (unsigned char)(k & 255); k >>= 8;
1005  }
1006  buffer[buffer_counter++] = (unsigned char)k;
1007 
1008  ++begin;
1009 
1010  if (begin == end || buffer_counter + 8 > max_buffer_bytes)
1011  {
1012  HashBuffer(buffer, buffer_counter);
1013  buffer_counter = 0;
1014  }
1015  }
1016  return *this;
1017 }
1018 
1019 } // namespace mfem
1020 
1021 #endif
Hash function for data sequences.
Definition: hash.hpp:455
int Hash(const Hashed2 &item) const
Hash function for items of type T that inherit from Hashed2.
Definition: hash.hpp:382
HashFunction & AppendInts(const int_type(&ints)[num_ints])
Add a sequence of integers for hashing, given as a fixed-size c-array.
Definition: hash.hpp:495
Array< int > unused
Definition: hash.hpp:354
void * hash_data
Definition: hash.hpp:458
int Size() const
Return the number of items actually stored.
Definition: array.hpp:507
void UpdateUnused()
Reinitialize the internal list of unallocated items.
Definition: hash.hpp:836
std::size_t MemoryUsage() const
Return total size of allocated memory (tables plus items), in bytes.
Definition: hash.hpp:878
HashFunction & EncodeAndHashDoubles(double_const_iter begin, double_const_iter end)
Double encoding method: encode in little-endian byte-order.
Definition: hash.hpp:984
int Hash(size_t p1, size_t p2, size_t p3) const
hash function for Hashed4 items.
Definition: hash.hpp:377
void Unlink(int idx, int id)
Unlink an item id from the linked list of bin idx.
Definition: hash.hpp:780
void Delete(int id)
Remove an item from the hash table.
Definition: hash.hpp:798
const_iterator cbegin() const
Definition: hash.hpp:338
const_iterator cend() const
Definition: hash.hpp:339
HashTable(int block_size=16 *1024, int init_hash_size=32 *1024)
Main constructor of the HashTable class.
Definition: hash.hpp:534
void HashBuffer(const void *buffer, size_t num_bytes)
Add a sequence of bytes for hashing.
Definition: hash.cpp:45
void PrintStats() const
Print a histogram of bin sizes for debugging purposes.
Definition: hash.hpp:905
HashFunction & EncodeAndHashInts(int_type_const_iter begin, int_type_const_iter end)
Integer encoding method; result is independent of endianness and type.
Definition: hash.hpp:934
void PrintMemoryDetail() const
Write details of the memory usage to the mfem output stream.
Definition: hash.hpp:884
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition: hash.cpp:60
iterator & operator++()
Definition: hash.hpp:308
T * Get(int p1, int p2)
Item accessor with key (or parents) the pair &#39;p1&#39;, &#39;p2&#39;. Default construct an item of type T if no va...
Definition: hash.hpp:596
HashFunction & AppendBytes(const void *seq, size_t num_bytes)
Add a sequence of bytes for hashing.
Definition: hash.hpp:481
const_iterator(const base &it)
Definition: hash.hpp:322
void DeleteAll()
Remove all items.
Definition: hash.hpp:807
int SearchList(int id, int p1, int p2) const
Search the index of the item associated to the key (p1,p2) starting from the item with index id...
Definition: hash.hpp:713
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition: hash.hpp:508
int Hash(size_t p1, size_t p2) const
hash function for Hashed2 items.
Definition: hash.hpp:364
void CheckRehash()
Check table fill factor and resize if necessary.
Definition: hash.hpp:737
iterator begin()
Definition: array.hpp:601
int BinSize(int idx) const
Return the size of the bin "idx".
Definition: hash.hpp:891
HashFunction()
Default constructor: initialize the hash function.
Definition: hash.cpp:29
const_iterator & operator++()
Definition: hash.hpp:328
int NumFreeIds() const
Return the number of free/unused ids in the HashTable.
Definition: hash.hpp:227
double b
Definition: lissajous.cpp:42
void Reparent(int id, int new_p1, int new_p2)
Change the key associated with an item.
Definition: hash.hpp:846
T * Find(int p1, int p2)
Find item whose parents are p1, p2... Return NULL if it doesn&#39;t exist.
Definition: hash.hpp:671
iterator(const base &it)
Definition: hash.hpp:302
HashFunction & AppendDoubles(const double(&doubles)[num_doubles])
Add a sequence of doubles for hashing, given as a fixed-size c-array.
Definition: hash.hpp:515
iterator end()
Definition: hash.hpp:336
int * table
Definition: hash.hpp:345
int Hash(const Hashed4 &item) const
Hash function for items of type T that inherit from Hashed4.
Definition: hash.hpp:386
BlockArray< T > Base
Definition: hash.hpp:84
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
int next
Definition: hash.hpp:38
HashFunction & AppendInts(const int_type *ints, size_t num_ints)
Add a sequence of integers for hashing, given as a c-array.
Definition: hash.hpp:488
iterator begin()
Definition: hash.hpp:335
const_iterator cbegin() const
Definition: array.hpp:604
double a
Definition: lissajous.cpp:41
int FindId(int p1, int p2) const
Find id of item whose parents are p1, p2... Return -1 if it doesn&#39;t exist.
Definition: hash.hpp:699
bool IdExists(int id) const
Return true if item &#39;id&#39; exists in (is used by) the container.
Definition: hash.hpp:234
void Copy(Array &copy) const
Create a copy of the internal array to the provided copy.
Definition: array.hpp:861
int NumIds() const
Return the total number of ids (used and unused) in the HashTable.
Definition: hash.hpp:224
int Size() const
Return the logical size of the array.
Definition: array.hpp:141
HashFunction & AppendInts(const int_type_container &ints)
Add a sequence of integers for hashing, given as a container.
Definition: hash.hpp:502
~HashFunction()
Destructor.
Definition: hash.cpp:38
int GetId(int p1, int p2)
Get id of item whose parents are p1, p2... Create it if it doesn&#39;t exist.
Definition: hash.hpp:608
HashTable & operator=(const HashTable &)=delete
Copy assignment not supported.
int Size() const
Return the number of elements currently stored in the HashTable.
Definition: hash.hpp:221
HashFunction & AppendDoubles(const double_container &doubles)
Add a sequence of doubles for hashing, given as a container.
Definition: hash.hpp:522
Base::iterator base
Definition: hash.hpp:299
Base::const_iterator base
Definition: hash.hpp:319
T & At(int index)
Access item of the array.
Definition: array.hpp:491
int next
Definition: hash.hpp:29
void DoRehash()
Double the size of the hash table (i.e., double the number of bins) and reinsert all items into the n...
Definition: hash.hpp:749
void Insert(int idx, int id, T &item)
Insert the item &#39;id&#39; into bin &#39;idx&#39;.
Definition: hash.hpp:772