better Hash function

This commit is contained in:
2020-08-01 22:00:15 +02:00
parent 0ea49d42c9
commit b5082e571b
2 changed files with 44 additions and 31 deletions

View File

@ -8,7 +8,7 @@ if(CCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
endif() endif()
set( CMAKE_EXPORT_COMPILE_COMMANDS ON ) set( CMAKE_EXPORT_COMPILE_COMMANDS ON )
set_property(TARGET acl PROPERTY C_STANDARD 99) set_property(TARGET acl PROPERTY C_STANDARD 11)
INCLUDE(TestBigEndian) INCLUDE(TestBigEndian)
TEST_BIG_ENDIAN(IS_BIG_ENDIAN) TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
if(IS_BIG_ENDIAN) if(IS_BIG_ENDIAN)

View File

@ -1,3 +1,12 @@
#include <math.h>
#include <acl/alignof.h>
#include <stddef.h>
#include <stdlib.h>
#include <acl/hashmap.h>
#include <acl/array.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#ifdef ACL_LITTLE_ENDIAN #ifdef ACL_LITTLE_ENDIAN
#define LEFT_OR_RIGHT_SHIFT >> #define LEFT_OR_RIGHT_SHIFT >>
#else #else
@ -7,13 +16,21 @@
#error "endianiss not specified. make sure to properly add the cmake subdirectory." #error "endianiss not specified. make sure to properly add the cmake subdirectory."
#endif #endif
#endif #endif
#include <math.h> #define acl_hashVarLen(type)\
#include <stddef.h> if(((uintptr_t)data & (alignof(type) - 1)) == 0) {\
#include <stdlib.h> printf(#type);\
#include <acl/hashmap.h> type *src;\
#include <acl/array.h> for(uint64_t *keySrc = data; (char*)(keySrc + 1) <= dest; ++keySrc) {\
#include <stdint.h> src = (type*)keySrc;\
#include <string.h> for(type *hashValue_ptr = (type*)&hashValue; src < (type*)(keySrc + 1); ++src, ++hashValue_ptr) {\
*hashValue_ptr ^= *src;\
}\
}\
char *hash_char = (char*)&hashValue;\
for(char *i = (char*)src; i <= dest; ++i, ++hashValue) {\
*hash_char ^= *i;\
}\
}
size_t acl_hash(void *data, size_t dataSize, size_t bucketBits) { size_t acl_hash(void *data, size_t dataSize, size_t bucketBits) {
switch(dataSize) { switch(dataSize) {
@ -36,26 +53,16 @@ size_t acl_hash(void *data, size_t dataSize, size_t bucketBits) {
memcpy(&cache, data, dataSize); memcpy(&cache, data, dataSize);
return (uint64_t)(10223372036854775833u * cache >> (64 - bucketBits)); return (uint64_t)(10223372036854775833u * cache >> (64 - bucketBits));
} }
uint8_t rest = (uintptr_t)data % sizeof(uint64_t); uint64_t hashValue = 0;
size_t data_len = dataSize / sizeof(uint64_t); char * dest = (char*)data + dataSize - 1;
uint8_t restEnd = dataSize % sizeof(uint64_t); acl_hashVarLen(uint64_t)
if(restEnd) ++data_len; else acl_hashVarLen(uint32_t)
uint64_t *cache; else acl_hashVarLen(uint16_t)
if(rest) { else acl_hashVarLen(uint8_t)
cache = malloc(data_len * sizeof *cache); return hashValue * 10223372036854775833u >> (64 - bucketBits);
memcpy(cache, data, dataSize);
cache[data_len - 1] = 0;
}
else cache = data;
uint64_t hash = 0;
for(size_t i = 0; i < data_len - 1; ++i) {
hash ^= (uint64_t)(10223372036854775833u * cache[i]);
}
hash ^= (uint64_t)(10223372036854775833u * cache[data_len - 1] & (18446744073709551615u LEFT_OR_RIGHT_SHIFT (sizeof *cache - restEnd)));
free(cache);
return hash >> (64 - bucketBits);
} }
} }
union acl_hashmap_meta* acl_hashmap_create(size_t bucketCount, size_t sizeOneElement, size_t keySize) { union acl_hashmap_meta* acl_hashmap_create(size_t bucketCount, size_t sizeOneElement, size_t keySize) {
union acl_hashmap_meta *hashmap_meta = malloc(sizeof *hashmap_meta + bucketCount * sizeof(void*)); union acl_hashmap_meta *hashmap_meta = malloc(sizeof *hashmap_meta + bucketCount * sizeof(void*));
hashmap_meta->bucketCount = bucketCount; hashmap_meta->bucketCount = bucketCount;
@ -74,6 +81,12 @@ void acl_hashmap_put(union acl_hashmap_meta *hashmap_meta, void *key, void *elem
} }
#include <stdio.h> #include <stdio.h>
int main() { int main() {
int baum[3] = {89, 120, 36}; char baum[10] = {1};
printf("Hash: %lu\n", acl_hash(baum, 3 * sizeof *baum , 5)); char blau[10] = {1};
char green[10] = {1};
char w[10] = {1};
printf("Hash: %lu\n", acl_hash(baum, 9 * sizeof *baum , 5));
printf("Hash: %lu\n", acl_hash(blau, 9 * sizeof *baum , 5));
printf("Hash: %lu\n", acl_hash(green, 9 * sizeof *baum , 5));
printf("Hash: %lu\n", acl_hash(w, 9 * sizeof *baum , 5));
} }