Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief generates a unique id number for a string (str2id) that can be used
10 : !> two compare two strings. I.e.
11 : !> if (str1==str2) => str2id(str1)==str2id(str2)
12 : !> if (str1.NE.str2) => str2id(str1).NE.str2id(str2)
13 : !> and the other way around. Given an id, the string can be retrieved.
14 : !> \note
15 : !> the purpose of this routine is to speed up string handling,
16 : !> string searching, ... as an operation on an int is much faster than an
17 : !> operation on a long string.
18 : !> \par History
19 : !> 9.2006 [Joost VandeVondele]
20 : !> \author Joost VandeVondele
21 : ! **************************************************************************************************
22 : MODULE string_table
23 :
24 : USE kinds, ONLY: default_string_length,&
25 : int_8
26 : #include "../base/base_uses.f90"
27 :
28 : IMPLICIT NONE
29 :
30 : ! user functions
31 : PUBLIC :: str2id, id2str, s2s
32 :
33 : ! setup function
34 : PUBLIC :: string_table_allocate, string_table_deallocate
35 :
36 : PRIVATE
37 : ! For good performance, the hash table should be larger than the largest number
38 : ! of strings that will be saved, but the memory for an empty table is 16*hash_table_size
39 : ! the string_table should remain functional for up to ~ 2**32 strings
40 : INTEGER, PARAMETER :: Nbit = 16
41 : INTEGER, PARAMETER :: hash_table_size = 2**Nbit
42 :
43 : ! actual elements in the hash table
44 : INTEGER, SAVE :: actual_strings
45 : INTEGER, SAVE :: inserted_strings
46 :
47 : ! an element of the linked list of hashed strings
48 : ! **************************************************************************************************
49 : TYPE hash_element_type
50 : CHARACTER(LEN=default_string_length), POINTER :: str => NULL()
51 : TYPE(hash_element_type), POINTER :: next => NULL()
52 : END TYPE
53 :
54 : ! the array of linked lists of hashed strings
55 : TYPE(hash_element_type), SAVE, ALLOCATABLE, TARGET, DIMENSION(:) :: hash_table
56 :
57 : CONTAINS
58 :
59 : ! **************************************************************************************************
60 : !> \brief returns a unique id for a given string, and stores the string for
61 : !> later retrieval using the id.
62 : !> \param str the string to be stored (default_string_length)
63 : !> \return ...
64 : !> \par History
65 : !> 09.2006 created [Joost VandeVondele]
66 : !> \note
67 : !> pass literal strings using the s2s function,
68 : !> which converts strings of any length to default_string_length
69 : !> id=str2id(s2s("my short string"))
70 : ! **************************************************************************************************
71 4597856 : FUNCTION str2id(str) RESULT(id)
72 : CHARACTER(LEN=*) :: str
73 : INTEGER :: id
74 :
75 : INTEGER :: index, ipos
76 : TYPE(hash_element_type), POINTER :: this
77 :
78 4597856 : inserted_strings = inserted_strings + 1
79 : ! index is the index in the array, ipos is the Nth element of the linked list
80 4597856 : index = joaat_hash(str)
81 4597856 : ipos = 0
82 4597856 : this => hash_table(index)
83 5190 : DO ! walk the list
84 9005752 : IF (.NOT. ASSOCIATED(this%str)) THEN
85 : ! str was not in the linked list, add it now
86 195150 : ALLOCATE (this%str)
87 195150 : this%str = str
88 195150 : actual_strings = actual_strings + 1
89 195150 : EXIT
90 : ELSE
91 4407896 : IF (this%str == str) THEN
92 : ! str is in the list already
93 : EXIT
94 : ELSE
95 5190 : IF (.NOT. ASSOCIATED(this%next)) ALLOCATE (this%next)
96 5190 : ipos = ipos + 1
97 5190 : this => this%next
98 : END IF
99 : END IF
100 : END DO
101 4597856 : id = IOR(index, ISHFT(ipos, Nbit))
102 4597856 : END FUNCTION str2id
103 :
104 : ! **************************************************************************************************
105 : !> \brief returns the string associated with a given id
106 : !> \param id the id to be converted into a string
107 : !> \return ...
108 : !> \par History
109 : !> 09.2006 created [Joost VandeVondele]
110 : !> \note
111 : !> only id's of previously 'registered' strings (str2id) should be passed,
112 : !> otherwise things crash
113 : ! **************************************************************************************************
114 45919238 : FUNCTION id2str(id) RESULT(str)
115 : INTEGER :: id
116 : CHARACTER(LEN=default_string_length) :: str
117 :
118 : INTEGER :: i, index, ipos
119 : TYPE(hash_element_type), POINTER :: this
120 :
121 45919238 : index = IAND(id, 2**Nbit - 1)
122 45919238 : ipos = ISHFT(id, -Nbit)
123 45919238 : this => hash_table(index)
124 45941610 : DO i = 1, ipos
125 45941610 : this => this%next
126 : END DO
127 45919238 : str = this%str
128 45919238 : END FUNCTION id2str
129 :
130 : ! **************************************************************************************************
131 : !> \brief converts a string in a string of default_string_length
132 : !> \param str ...
133 : !> \return ...
134 : !> \par History
135 : !> 09.2006 created [Joost VandeVondele]
136 : !> \note
137 : !> useful to pass a literal string to str2id
138 : !> i.e. id=str2id(s2s("X"))
139 : ! **************************************************************************************************
140 4269238 : FUNCTION s2s(str) RESULT(res)
141 : CHARACTER(LEN=*) :: str
142 : CHARACTER(LEN=default_string_length) :: res
143 :
144 4269238 : res = str
145 4269238 : END FUNCTION s2s
146 :
147 : ! **************************************************************************************************
148 : !> \brief allocates the string table
149 : !> \par History
150 : !> 09.2006 created [Joost VandeVondele]
151 : !> \note
152 : !> this needs to be done only once at program startup, before any use
153 : !> of other procedures of this module. The scope of this table is global
154 : ! **************************************************************************************************
155 9174 : SUBROUTINE string_table_allocate()
156 601236438 : ALLOCATE (hash_table(0:hash_table_size - 1))
157 9174 : actual_strings = 0
158 9174 : inserted_strings = 0
159 9174 : END SUBROUTINE string_table_allocate
160 :
161 : ! **************************************************************************************************
162 : !> \brief deallocates the string table
163 : !> \param iw a unit to which some info about the table usage can be printed
164 : !> \par History
165 : !> 09.2006 created [Joost VandeVondele]
166 : !> \note
167 : !> This should be done before program termination, all associated ids become meaningless
168 : ! **************************************************************************************************
169 9174 : SUBROUTINE string_table_deallocate(iw)
170 : INTEGER, INTENT(IN) :: iw
171 :
172 : INTEGER :: i, ilist, ipos, ipos_max
173 : TYPE(hash_element_type), POINTER :: next, this
174 :
175 : ! clean up all the linked lists of entries
176 :
177 9174 : ipos_max = 0
178 9174 : ilist = 0
179 601236438 : DO i = 0, hash_table_size - 1
180 601227264 : ipos = 1
181 601227264 : IF (ASSOCIATED(hash_table(i)%str)) THEN
182 194046 : DEALLOCATE (hash_table(i)%str)
183 194046 : ilist = ilist + 1
184 : END IF
185 601227264 : this => hash_table(i)%next
186 601228368 : DO WHILE (ASSOCIATED(this))
187 1104 : ipos = ipos + 1
188 1104 : next => this%next
189 1104 : IF (ASSOCIATED(this%str)) DEALLOCATE (this%str)
190 1104 : DEALLOCATE (this)
191 1104 : this => next
192 : END DO
193 601236438 : ipos_max = MAX(ipos_max, ipos)
194 : END DO
195 9174 : DEALLOCATE (hash_table)
196 9174 : IF (iw > 0) THEN
197 0 : WRITE (iw, *) "string table: # inserted str = ", inserted_strings
198 0 : WRITE (iw, *) " # actual = ", actual_strings
199 0 : WRITE (iw, *) " # lists = ", ilist, " / ", hash_table_size
200 0 : WRITE (iw, *) " longest list = ", ipos_max
201 : END IF
202 9174 : actual_strings = 0
203 9174 : inserted_strings = 0
204 9174 : END SUBROUTINE string_table_deallocate
205 :
206 : ! **************************************************************************************************
207 : !> \brief generates the hash of a string and the index in the table
208 : !> \param key a string of any length
209 : !> \return ...
210 : !> \par History
211 : !> 09.2006 created [Joost VandeVondele]
212 : !> \note
213 : !> http://en.wikipedia.org/wiki/Hash_table
214 : !> http://www.burtleburtle.net/bob/hash/doobs.html
215 : !> However, since fortran doesn't have an unsigned 4 byte int
216 : !> we compute it using an integer with the appropriate range
217 : !> we return already the index in the table as a final result
218 : ! **************************************************************************************************
219 4597856 : FUNCTION joaat_hash(key) RESULT(hash_index)
220 : CHARACTER(LEN=*), INTENT(IN) :: key
221 : INTEGER :: hash_index
222 :
223 : INTEGER(KIND=int_8), PARAMETER :: b32 = 2_int_8**32 - 1_int_8
224 :
225 : INTEGER :: i
226 : INTEGER(KIND=int_8) :: hash
227 :
228 4597856 : hash = 0_int_8
229 375453868 : DO i = 1, LEN(key)
230 370856012 : hash = IAND(hash + ICHAR(key(i:i)), b32)
231 370856012 : hash = IAND(hash + IAND(ISHFT(hash, 10), b32), b32)
232 375453868 : hash = IAND(IEOR(hash, IAND(ISHFT(hash, -6), b32)), b32)
233 : END DO
234 4597856 : hash = IAND(hash + IAND(ISHFT(hash, 3), b32), b32)
235 4597856 : hash = IAND(IEOR(hash, IAND(ISHFT(hash, -11), b32)), b32)
236 4597856 : hash = IAND(hash + IAND(ISHFT(hash, 15), b32), b32)
237 : ! hash is the real 32bit hash value of the string,
238 : ! hash_index is an index in the hash_table
239 4597856 : hash_index = INT(MOD(hash, INT(hash_table_size, KIND=int_8)))
240 4597856 : END FUNCTION joaat_hash
241 0 : END MODULE string_table
|