ํฐ์คํ ๋ฆฌ ๋ทฐ
์๋ฃ๊ตฌ์กฐ: Hash
dirmathfl 2021. 1. 10. 15:28ํด์์ ๋ํ ๊ฐ๋ ๋ณด๋ค๋ C๋ก ํด์๋ฅผ ๊ตฌํํ๊ณ , data type์ ์์ ๋กญ๊ฒ ์ฌ์ฉํ ์ ์๋๋ก ๊ตฌํ์ ์ธก๋ฉด์์ ํด์๋ฅผ ๋ค๋ฃจ๊ณ ์ ํ๋ค. key-value pair๋ฅผ ๊ตฌ์กฐ์ฒด์์ ๋ณ๋๋ก ๋ช ์ํ์ง ์๊ณ , entry์ ๋ค๋ฅธ ๊ตฌ์กฐ์ฒด๋ฅผ ํฌ์ธํ ํจ์ผ๋ก์จ ๋ ๋ง์ ์๋ฃ๋ฅผ ๋ค๋ฃฐ ์ ์๋๋ก ๊ตฌํํ์๋ค.
Hash?
ํด์๋ ์์ ๊ทธ๋ฆผ๊ณผ ๊ฐ์ด, key ๊ฐ์ ์์ฑํ๋ `ํด์ ํจ์(hash function)`๋ฅผ ํตํด ๋ฐฐ์ด์ ์ด๋ค ๊ฐ์ด ์๋์ง ์ฆ๊ฐ์ ์ ์ฐพ์ ์ ์๋ ์๋ฃ๊ตฌ์กฐ์ด๋ค. ํธ๋ฆฌ์ ๊ฒฝ์ฐ ํ์ชฝ์ผ๋ก ํ์์ ํ์ฌ ๋ฒ์๋ฅผ ์ขํ๊ฐ๋ฉด์ ํ์์ ํ๋ค. ํ์ง๋ง ํด์์ ๊ฒฝ์ฐ ํด์ ํจ์๋ฅผ ํตํด, ์ฐพ๊ณ ์ ํ๋ ๊ฐ์ key๋ฅผ ์ฆ๊ฐ์ ์ผ๋ก ์ ์ ์์ผ๋ฏ๋ก ํ์์ ๋งค์ฐ ํจ์จ์ ์ธ ์๋ฃ๊ตฌ์กฐ์ด๋ค.
ํ์ง๋ง ํด์์์๋ ๋ฌธ์ ์ ์ ์๋ค. `์ถฉ๋(Collision)`์ด ๋ฐ์ํ๋ ๊ฒฝ์ฐ, ์ด๋ค ์์ผ๋ก ์ฒ๋ฆฌํ๋๋์ ๋ฐ๋ผ, ํจ์จ์ฑ์ด ๊ธ๊ฒฉํ ๋ฌ๋ผ์ง๋ค. ๋ค์ ๋งํ๋ฉด, ์ถฉ๋์ ์ ๊ฒ ์ผ์ผํค๋ ํด์ ํจ์์ ์ฑ๋ฅ์ ๋ฐ๋ผ ํด๋น ์๋ฃ๊ตฌ์กฐ์ ์ฑ๋ฅ์ด ์ข์ฐ๋๋ค. ๊ตฌํ์์๋ `Jenkins hash function`์ ์ฌ์ฉํ์๋ค.
์ฝ๋
header
#ifndef HASH_H
#define HASH_H
typedef struct entry {
struct entry *next;
char data[0];
} entry_t;
typedef struct {
int size;
entry_t **table;
} hash_t;
hash_t *init_hash(unsigned int table_size, int data_size);
void free_hash(hash_t *hash);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
int data_size, int (*compare)(const void *, const void *));
void *get_data_in_hash(hash_t *hash, const void *key, int (*compare)(const void *, const void *));
void del_data_in_hash(hash_t *hash, const void *key, int (*compare)(const void *, const void *));
void print_hash(hash_t *hash, void (*print_item)(const void *));
#endif
entry๊ฐ ํฌ์ธํ ํ๋ ๊ฐ์ ํ๋๋ก ์ง์ ํ์ง ์๊ณ , `char data[0]`๋ก ์ ์ธํ์๋ค. ๋ํ, ์ฃผ์ ํจ์์ ์ธ์๋ฅผ `const void *` ํ์ ์ผ๋ก ์ ์ธํ์ฌ `data type`์ ์ข ์์ ์ด์ง ์๋๋ก ๊ตฌํํ์๋ค.
init
hash_t *init_hash(unsigned int table_size, int data_size)
{
unsigned int i;
hash_t *hash = NULL;
/* hash ๊ตฌ์กฐ์ฒด ํ ๋น */
if ((hash = (hash_t *)malloc(sizeof(hash_t))) == NULL) {
//error logging
return NULL;
}
/* hash์ ํ
์ด๋ธ ๊ตฌ์กฐ์ฒด ํ ๋น */
if ((hash->table = malloc((sizeof(entry_t) + data_size) * table_size)) == NULL) {
//error logging
return NULL;
}
for (i = 0; i < table_size; i++)
hash->table[i] = NULL;
hash->size = table_size;
return hash;
}
`init`๋ฅผ ํ๊ธฐ ์ํด์๋ ๋ค๋ฅธ ๋ถ๋ถ์ ํฌ๊ฒ ์ ๊ฒฝ ์ธ ํ์๊ฐ ์๊ณ , `entry`์ ํ ๋นํ๋ ๊ตฌ์กฐ์ฒด ๋๋ ๋ฐ์ดํฐ ํ์ ์ ํฌ๊ธฐ๋ฅผ ๋ช ์ํ์ฌ ํ์ ์ ์๊ด์์ด ๊ณต๊ฐ์ ์ฌ์ฉํ ์ ์๋๋ก ํ์ฌ์ผ ํ๋ค.
free
static void free_chaining_entry(entry_t *entry)
{
if (entry->next)
free_chaning(entry->next);
free(entry);
}
static void free_all_entry(hash_t *hash)
{
unsigned int i;
for (i = 0; i < hash->size; i++) {
if (hash->table[i] == NULL)
continue;
free_chaining_entry(hash->table[i]);
}
}
void free_hash(hash_t *hash)
{
free_all_entry(hash);
free(hash->table);
free(hash);
}
`entry`์ data๊ฐ ํฌ์ธํ ํ ๊ฐ๋ ๋์ ํ ๋น์ผ๋ก ๊ตฌํํ์๊ธฐ์, ๊ฐ `entry`์ ๋ํด `free`๋ฅผ ์งํํ์ฌ์ผ ํ๋ค. ๋จ์ํ ํด์ ํ ์ด๋ธ์ ๋ํด์๋ง `free`๋ฅผ ํ๊ฒ ๋๋ฉด ๋ฉ๋ชจ๋ฆฌ ๋์๊ฐ ๋ฐ์ํ๋ค. ๋ฐ๋ผ์ `free_chaining_entry`์ ๊ฐ์ด ์ฌ๊ท ํธ์ถ์ ํตํด ์ฐ๊ฒฐ๋ `entry`๋ค์ด ์๋์ง ํ์ธํ์ฌ, ๋ชจ๋ ํด์ ํ์ฌ์ผ ํ๋ค.
insert
static int get_hash_key(hash_t *hash, const void *key);
static entry_t *init_entry(const void *data, int data_size);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
int data_size, int (*compare_key)(const void *, const void *))
{
unsigned int hash_key = 0;
entry_t *new = NULL;
entry_t *next = NULL;
entry_t *last = NULL;
hash_key = get_hash_key(hash, key);
next = hash->table[hash_key];
while (next && next->data && compare_key(next->data, key) < 0) {
last = next;
next = next->next;
}
/* ์ด๋ฏธ, ์ฝ์
ํ๊ณ ์ ํ๋ ๋ฐ์ดํฐ๊ฐ ์๋ ๊ฒฝ์ฐ */
if (next && next->data && compare_key(next->data, key) == 0)
memcpy(next->data, data, data_size);
/* ์ฝ์
ํ๊ณ ์ ํ๋ ๋ฐ์ดํฐ๊ฐ ์๋ ๊ฒฝ์ฐ */
else {
new = init_entry(data, data_size);
if (next == hash->table[hash_key]) {
new->next = next;
hash->table[hash_key] = new;
} else if (next == NULL)
last->next = new;
else {
new->next = next;
last->next = new;
}
}
}
ํ์ฌ `table`์ ์ฝ์ ํ๊ณ ์ ํ๋ ๊ฐ์ด ์กด์ฌํ๋์ง ์ฌ๋ถ์ ๋ฐ๋ผ, ์ฒ๋ฆฌํ๋ ๋ก์ง์ด ๋ฌ๋ผ์ง๊ฒ ๋๋ค. `compare_key`๋ ๋ค๋ฅธ ํจ์๋ฅผ ํฌ์ธํ ํ๋ฏ๋ก ์ฒ๋ฆฌํ๊ณ ์ ํ๋ data type์ ๋ฐ๋ผ ํจ์๋ฅผ ๋ณ๋๋ก ์ ์ธํ์ฌ์ผ ํ๋ค.
delete
void del_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void *, const void *))
{
unsigned int hash_key;
entry_t *entry = NULL;
hash_key = get_hash_key(hash, key);
entry = hash->table[hash_key];
if (!entry)
return;
// ์ธ๋ฑ์ค ์ฒซ ๋ฒ์งธ ๊ฐ์ด ์ง์ฐ๊ณ ์ ํ๋ ๊ฐ์ธ ๊ฒฝ์ฐ
if (entry->data && compare_key(entry->data, key) == 0) {
hash->table[hash_key] = NULL;
free(entry);
return;
// ๋๋จธ์ง ๊ฒฝ์ฐ
} else {
entry_t *next = entry->next;
entry_t *last = NULL;
while (next && next->data && compare_key(next->data, key) != 0) {
last = next;
next = next->next;
}
if (next == NULL)
return;
last->next = next->next;
free(next);
}
}
ํน์ ๊ฐ์ ํด์ ํ ์ด๋ธ์์ ์ญ์ ํ๊ธฐ ์ํด์๋ ํ ์ด๋ธ์ ์ฒซ ๋ฒ์งธ ๊ฐ๊ณผ ์ผ์นํ๋ ๊ฒฝ์ฐ๋ ๋จ์ํ ์ญ์ ํ๋ฉด ๋๋ค. ํ์ง๋ง `chainig`์ด ๋ ๊ฒฝ์ฐ๋ผ๋ฉด, ์ญ์ ๋ ๊ฐ์ ์ ์ธํ๊ณ ๋๋จธ์ง ํฌ์ธํ ๋๋ ๋ถ๋ถ๋ค์ ์ฐ๊ฒฐํด ์ฃผ์ด์ผ ํ๋ค.
get_data
void *get_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void*, const void *))
{
unsigned int hash_key = 0;
entry_t *entry = NULL;
hash_key = get_hash_key(hash, key);
entry = hash->table[hash_key];
while (entry && entry->data && compare_key(entry->data, key) < 0) {
entry = entry->next;
}
if (entry == NULL || entry->data == NULL || compare_key(entry->data, key) != 0) {
return NULL;
}
return entry->data;
}
`key`๋ฅผ ํตํด ํด์ ํ ์ด๋ธ ๋ด์ ๋ด๊ฐ ์ฐพ๊ณ ์ ํ๋ ๊ฐ์ ์ฐพ๋๋ค. ์์ ๊ตฌ์กฐ์ฒด ์ ์ธ์์ `entry`์ `data`๋ ๋ค๋ฅธ ๊ตฌ์กฐ์ฒด ๋๋ ๋ณ์๋ฅผ ํฌ์ธํ ํ๋ฏ๋ก ํจ์์ ๋ฐํ ๊ฐ์ ํตํด `data`์ ๋ฐ๋ก ์ ๊ทผํ ์ ์๋ค.
hash function
static unsigned int get_hash_key(hash_t *hash, const void *key)
{
/* Jeckins hash function.
* http://en.wikipedia.org/wiki/Jeckins_hash_function
*/
unsigned int i;
unsigned int hash_key;
char *data_key = (char *)key;
for (hash_key = i = 0; i < strlen(data_key); i++) {
hash_key += data_key[i], hash_key += (hash_key << 10), hash_key ^= (hash_key >> 6);
}
hash_key += (hash_key << 3), hash_key ^= (hash_key >> 11), hash_key += (hash_key << 15);
return hash_key % hash->size;
}
`Jeckins hash function`์ ๊ทธ๋๋ก ๊ตฌํํ์ฌ, ํด์ ํจ์๋ก ์ฌ์ฉํ์๋ค.
์ ์ฒด ์ฝ๋
#include "common.h"
#include "hash.h"
hash_t *init_hash(unsigned int table_size, int data_size)
{
unsigned int i;
hash_t *hash = NULL;
/* Alloc the hash */
if ((hash = (hash_t *)malloc(sizeof(hash_t))) == NULL) {
//error logging
return NULL;
}
/* Alloc pointer to the head nodes. */
if ((hash->table = malloc((sizeof(entry_t) + data_size) * table_size)) == NULL) {
//error logging
return NULL;
}
for (i = 0; i < table_size; i++)
hash->table[i] = NULL;
hash->size = table_size;
return hash;
}
static void free_all_entry(hash_t *hash);
void free_hash(hash_t *hash)
{
free_all_entry(hash);
free(hash->table);
free(hash);
}
static unsigned int get_hash_key(hash_t *hash, const void *key);
static entry_t *init_entry(const void *data, int data_size);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
int data_size, int (*compare_key)(const void *, const void *))
{
unsigned int hash_key = 0;
entry_t *new = NULL;
entry_t *next = NULL;
entry_t *last = NULL;
hash_key = get_hash_key(hash, key);
next = hash->table[hash_key];
while (next && next->data && compare_key(next->data, key) < 0) {
last = next;
next = next->next;
}
if (next && next->data && compare_key(next->data, key) == 0)
memcpy(next->data, data, data_size);
else {
new = init_entry(data, data_size);
if (next == hash->table[hash_key]) {
new->next = next;
hash->table[hash_key] = new;
} else if (next == NULL)
last->next = new;
else {
new->next = next;
last->next = new;
}
}
}
void *get_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void*, const void *))
{
unsigned int hash_key = 0;
entry_t *entry = NULL;
hash_key = get_hash_key(hash, key);
entry = hash->table[hash_key];
while (entry && entry->data && compare_key(entry->data, key) < 0) {
entry = entry->next;
}
if (entry == NULL || entry->data == NULL || compare_key(entry->data, key) != 0) {
return NULL;
}
return entry->data;
}
void del_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void *, const void *))
{
unsigned int hash_key;
entry_t *entry = NULL;
hash_key = get_hash_key(hash, key);
entry = hash->table[hash_key];
if (!entry)
return;
if (entry->data && compare_key(entry->data, key) == 0) {
hash->table[hash_key] = NULL;
free(entry);
return;
} else {
entry_t *next = entry->next;
entry_t *last = NULL;
while (next && next->data && compare_key(next->data, key) != 0) {
last = next;
next = next->next;
}
if (next == NULL)
return;
last->next = next->next;
free(next);
}
}
static void print_chaining_entry(entry_t *entry, void (*print_data)(const void *));
void print_hash(hash_t *hash, void (*print_data)(const void *))
{
unsigned int i;
for (i = 0; i < hash->size; i++) {
if (hash->table[i] == NULL)
continue;
print_chaining_entry(hash->table[i], print_data);
}
}
/* Private functions */
static unsigned int get_hash_key(hash_t *hash, const void *key)
{
/* Jeckins hash function.
* http://en.wikipedia.org/wiki/Jeckins_hash_function
*/
unsigned int i;
unsigned int hash_key;
char *data_key = (char *)key;
for (hash_key = i = 0; i < strlen(data_key); i++) {
hash_key += data_key[i], hash_key += (hash_key << 10), hash_key ^= (hash_key >> 6);
}
hash_key += (hash_key << 3), hash_key ^= (hash_key >> 11), hash_key += (hash_key << 15);
return hash_key % hash->size;
}
static void print_chaining_entry(entry_t *entry, void (*print_data)(const void *))
{
if (entry->next)
print_chaining_entry(entry->next, print_data);
print_data(entry->data);
}
static entry_t *init_entry(const void *data, int data_size)
{
entry_t *entry = (entry_t *)malloc(sizeof(entry_t) + data_size);
if (entry == NULL) {
// error logging
return NULL;
}
entry->next = NULL;
memcpy(entry->data, data, data_size);
return entry;
}
static void free_chaining_entry(entry_t *entry)
{
if (entry->next)
free_chaining_entry(entry->next);
free(entry);
}
static void free_all_entry(hash_t *hash)
{
int i;
for (i = 0; i < hash->size; i++) {
if (hash->table[i] == NULL)
continue;
free_chaining_entry(hash->table[i]);
}
}
ํ ์คํธ
#include "common.h"
#include "hash.h"
#define MAX_TABLE_SIZE 10
#define MAX_NAME_LEN 30
typedef struct info {
char name[MAX_NAME_LEN];
int age;
} info_t;
void init_info(info_t *temp, const char *name, int age)
{
memset(temp, 0, sizeof(info_t));
memcpy(temp->name, name, MAX_NAME_LEN);
temp->age = age;
}
int compare_name(const void *entry, const void *name)
{
return strcmp(((info_t *)entry)->name, ((char *)name));
}
void print_info_data(const void *entry)
{
info_t *info = (info_t *)entry;
printf("%s, %d\n", info->name, info->age);
}
void ins_info_data(hash_t *hash, info_t *temp)
{
info_t *info = (info_t *)get_data_in_hash(hash, temp->name, compare_name);
if (!info)
ins_data_in_hash(hash, temp, temp->name, sizeof(info_t), compare_name);
}
int main()
{
hash_t *hash = NULL;
hash = init_hash(MAX_TABLE_SIZE, sizeof(info_t));
info_t temp;
init_info(&temp, "L", 15);
ins_info_data(hash, &temp);
init_info(&temp, "B", 18);
ins_info_data(hash, &temp);
print_hash(hash, print_info_data);
free_hash(hash);
}
์์ ๋ก๋ ๊ฐ๋จํ๊ฒ ํ ์คํธ ํด๋ณด์๋ค. ์์๋ก ํด์ ํ ์ด๋ธ ํ๋์ ์ธ๋ฑ์ค์ ์ฒด์ด๋๋ ํด๋ณด๊ณ , ์ฌ๋ฌ ๊ฒฝ์ฐ์ ์๋ฅผ ๋ชจ๋ ํ ์คํธํ์๋๋ฐ ์ด์ ์์ด ๋์ํ์๋ค.
'๐๏ธโโ๏ธ ๊ธฐ๋ฐ ๋ค์ง๊ธฐ > ์๋ฃ๊ตฌ์กฐ์ ์๊ณ ๋ฆฌ์ฆ' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
์๋ฃ๊ตฌ์กฐ : Binary Search Tree (0) | 2021.01.17 |
---|---|
์๊ณ ๋ฆฌ์ฆ: ์ ๋ ฌ (0) | 2020.06.17 |
์๊ณ ๋ฆฌ์ฆ: DFS์ BFS (0) | 2020.06.16 |
์๊ณ ๋ฆฌ์ฆ: Recursion (0) | 2020.06.16 |
์๋ฃ๊ตฌ์กฐ: Linked List (0) | 2020.06.15 |