ํ‹ฐ์Šคํ† ๋ฆฌ ๋ทฐ

728x90
๋ฐ˜์‘ํ˜•
ํ•ด์‹œ์— ๋Œ€ํ•œ ๊ฐœ๋…๋ณด๋‹ค๋Š” C๋กœ ํ•ด์‹œ๋ฅผ ๊ตฌํ˜„ํ•˜๊ณ , data type์— ์ž์œ ๋กญ๊ฒŒ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋„๋ก ๊ตฌํ˜„์  ์ธก๋ฉด์—์„œ ํ•ด์‹œ๋ฅผ ๋‹ค๋ฃจ๊ณ ์ž ํ•œ๋‹ค. key-value pair๋ฅผ ๊ตฌ์กฐ์ฒด์—์„œ ๋ณ„๋„๋กœ ๋ช…์‹œํ•˜์ง€ ์•Š๊ณ , entry์— ๋‹ค๋ฅธ ๊ตฌ์กฐ์ฒด๋ฅผ ํฌ์ธํŒ…ํ•จ์œผ๋กœ์จ ๋” ๋งŽ์€ ์ž๋ฃŒ๋ฅผ ๋‹ค๋ฃฐ ์ˆ˜ ์žˆ๋„๋ก ๊ตฌํ˜„ํ•˜์˜€๋‹ค.

 

Hash?

https://yourbasic.org/algorithms/hash-tables-explained/

 ํ•ด์‹œ๋Š” ์œ„์˜ ๊ทธ๋ฆผ๊ณผ ๊ฐ™์ด, key ๊ฐ’์„ ์ƒ์„ฑํ•˜๋Š” `ํ•ด์‹œ ํ•จ์ˆ˜(hash function)`๋ฅผ ํ†ตํ•ด ๋ฐฐ์—ด์— ์–ด๋–ค ๊ฐ’์ด ์žˆ๋Š”์ง€ ์ฆ‰๊ฐ์ ์„ ์ฐพ์„ ์ˆ˜ ์žˆ๋Š” ์ž๋ฃŒ๊ตฌ์กฐ์ด๋‹ค. ํŠธ๋ฆฌ์˜ ๊ฒฝ์šฐ ํ•œ์ชฝ์œผ๋กœ ํƒ์ƒ‰์„ ํ•˜์—ฌ ๋ฒ”์œ„๋ฅผ ์ขํ˜€๊ฐ€๋ฉด์„œ ํƒ์ƒ‰์„ ํ•œ๋‹ค. ํ•˜์ง€๋งŒ ํ•ด์‹œ์˜ ๊ฒฝ์šฐ ํ•ด์‹œ ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด, ์ฐพ๊ณ ์ž ํ•˜๋Š” ๊ฐ’์˜ key๋ฅผ ์ฆ‰๊ฐ์ ์œผ๋กœ ์•Œ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ํƒ์ƒ‰์— ๋งค์šฐ ํšจ์œจ์ ์ธ ์ž๋ฃŒ๊ตฌ์กฐ์ด๋‹ค.

 

 ํ•˜์ง€๋งŒ ํ•ด์‹œ์—์„œ๋„ ๋ฌธ์ œ์ ์€ ์žˆ๋‹ค. `์ถฉ๋Œ(Collision)`์ด ๋ฐœ์ƒํ•˜๋Š” ๊ฒฝ์šฐ, ์–ด๋–ค ์‹์œผ๋กœ ์ฒ˜๋ฆฌํ•˜๋Š๋ƒ์— ๋”ฐ๋ผ, ํšจ์œจ์„ฑ์ด ๊ธ‰๊ฒฉํžˆ ๋‹ฌ๋ผ์ง„๋‹ค. ๋‹ค์‹œ ๋งํ•˜๋ฉด, ์ถฉ๋Œ์„ ์ ๊ฒŒ ์ผ์œผํ‚ค๋Š” ํ•ด์‹œ ํ•จ์ˆ˜์˜ ์„ฑ๋Šฅ์— ๋”ฐ๋ผ ํ•ด๋‹น ์ž๋ฃŒ๊ตฌ์กฐ์˜ ์„ฑ๋Šฅ์ด ์ขŒ์šฐ๋œ๋‹ค. ๊ตฌํ˜„์—์„œ๋Š” `Jenkins hash function`์„ ์‚ฌ์šฉํ•˜์˜€๋‹ค.

 

์ฝ”๋“œ

header

#ifndef HASH_H
#define HASH_H

typedef struct entry {
    struct entry *next;
    char data[0];
} entry_t;

typedef struct {
    int size;
    entry_t **table;
} hash_t;

hash_t *init_hash(unsigned int table_size, int data_size);
void free_hash(hash_t *hash);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
            int data_size, int (*compare)(const void *, const void *));
void *get_data_in_hash(hash_t *hash, const void *key, int (*compare)(const void *, const void *));
void del_data_in_hash(hash_t *hash, const void *key, int (*compare)(const void *, const void *));
void print_hash(hash_t *hash, void (*print_item)(const void *));
#endif

 entry๊ฐ€ ํฌ์ธํŒ… ํ•˜๋Š” ๊ฐ’์„ ํ•˜๋‚˜๋กœ ์ง€์ •ํ•˜์ง€ ์•Š๊ณ , `char data[0]`๋กœ ์„ ์–ธํ•˜์˜€๋‹ค. ๋˜ํ•œ, ์ฃผ์š” ํ•จ์ˆ˜์˜ ์ธ์ž๋ฅผ `const void *` ํƒ€์ž…์œผ๋กœ ์„ ์–ธํ•˜์—ฌ `data type`์— ์ข…์†์ ์ด์ง€ ์•Š๋„๋ก ๊ตฌํ˜„ํ•˜์˜€๋‹ค.

 

init

hash_t *init_hash(unsigned int table_size, int data_size)
{
    unsigned int i;
    hash_t *hash = NULL;

    /* hash ๊ตฌ์กฐ์ฒด ํ• ๋‹น */
    if ((hash = (hash_t *)malloc(sizeof(hash_t))) == NULL) {
        //error logging
        return NULL;
    }

    /* hash์˜ ํ…Œ์ด๋ธ” ๊ตฌ์กฐ์ฒด ํ• ๋‹น */
    if ((hash->table = malloc((sizeof(entry_t) + data_size) * table_size)) == NULL) {
        //error logging
        return NULL;
    }

    for (i = 0; i < table_size; i++) 
         hash->table[i] = NULL;

    hash->size = table_size;

    return hash;
}

 `init`๋ฅผ ํ•˜๊ธฐ ์œ„ํ•ด์„œ๋Š” ๋‹ค๋ฅธ ๋ถ€๋ถ„์€ ํฌ๊ฒŒ ์‹ ๊ฒฝ ์“ธ ํ•„์š”๊ฐ€ ์—†๊ณ , `entry`์— ํ• ๋‹นํ•˜๋Š” ๊ตฌ์กฐ์ฒด ๋˜๋Š” ๋ฐ์ดํ„ฐ ํƒ€์ž…์˜ ํฌ๊ธฐ๋ฅผ ๋ช…์‹œํ•˜์—ฌ ํƒ€์ž…์— ์ƒ๊ด€์—†์ด ๊ณต๊ฐ„์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋„๋ก ํ•˜์—ฌ์•ผ ํ•œ๋‹ค.

 

free

static void free_chaining_entry(entry_t *entry)
{
    if (entry->next)
        free_chaning(entry->next);
    free(entry);
}

static void free_all_entry(hash_t *hash)
{
    unsigned int i;

    for (i = 0; i < hash->size; i++) {
        if (hash->table[i] == NULL)
            continue;
		
        free_chaining_entry(hash->table[i]);
    }
}

void free_hash(hash_t *hash)
{
    free_all_entry(hash);
    free(hash->table);
    free(hash);
}

 `entry`์˜ data๊ฐ€ ํฌ์ธํŒ…ํ•  ๊ฐ’๋„ ๋™์  ํ• ๋‹น์œผ๋กœ ๊ตฌํ˜„ํ•˜์˜€๊ธฐ์—, ๊ฐ `entry`์— ๋Œ€ํ•ด `free`๋ฅผ ์ง„ํ–‰ํ•˜์—ฌ์•ผ ํ•œ๋‹ค. ๋‹จ์ˆœํžˆ ํ•ด์‹œ ํ…Œ์ด๋ธ”์— ๋Œ€ํ•ด์„œ๋งŒ `free`๋ฅผ ํ•˜๊ฒŒ ๋˜๋ฉด ๋ฉ”๋ชจ๋ฆฌ ๋ˆ„์ˆ˜๊ฐ€ ๋ฐœ์ƒํ•œ๋‹ค. ๋”ฐ๋ผ์„œ `free_chaining_entry`์™€ ๊ฐ™์ด ์žฌ๊ท€ ํ˜ธ์ถœ์„ ํ†ตํ•ด ์—ฐ๊ฒฐ๋œ `entry`๋“ค์ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์—ฌ, ๋ชจ๋‘ ํ•ด์ œํ•˜์—ฌ์•ผ ํ•œ๋‹ค.

 

 insert

static int get_hash_key(hash_t *hash, const void *key);
static entry_t *init_entry(const void *data, int data_size);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
        int data_size, int (*compare_key)(const void *, const void *))
{
    unsigned int hash_key = 0;
    entry_t *new = NULL;
    entry_t *next = NULL;
    entry_t *last = NULL;

    hash_key = get_hash_key(hash, key);
    next = hash->table[hash_key];

    while (next && next->data && compare_key(next->data, key) < 0) {
        last = next;
        next = next->next;
    }

    /* ์ด๋ฏธ, ์‚ฝ์ž…ํ•˜๊ณ ์ž ํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ */
    if (next && next->data && compare_key(next->data, key) == 0)
        memcpy(next->data, data, data_size);
    /* ์‚ฝ์ž…ํ•˜๊ณ ์ž ํ•˜๋Š” ๋ฐ์ดํ„ฐ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ */
    else {
        new = init_entry(data, data_size);

        if (next == hash->table[hash_key]) {
            new->next = next;
            hash->table[hash_key] = new;
        } else if (next == NULL)
            last->next = new;
        else {
            new->next = next;
            last->next = new;
        }
    }
}

 ํ˜„์žฌ `table`์— ์‚ฝ์ž…ํ•˜๊ณ ์ž ํ•˜๋Š” ๊ฐ’์ด ์กด์žฌํ•˜๋Š”์ง€ ์—ฌ๋ถ€์— ๋”ฐ๋ผ, ์ฒ˜๋ฆฌํ•˜๋Š” ๋กœ์ง์ด ๋‹ฌ๋ผ์ง€๊ฒŒ ๋œ๋‹ค. `compare_key`๋Š” ๋‹ค๋ฅธ ํ•จ์ˆ˜๋ฅผ ํฌ์ธํŒ… ํ•˜๋ฏ€๋กœ ์ฒ˜๋ฆฌํ•˜๊ณ ์ž ํ•˜๋Š” data type์— ๋”ฐ๋ผ ํ•จ์ˆ˜๋ฅผ ๋ณ„๋„๋กœ ์„ ์–ธํ•˜์—ฌ์•ผ ํ•œ๋‹ค.

 

delete

void del_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void *, const void *))
{
    unsigned int hash_key;
    entry_t *entry = NULL;

    hash_key = get_hash_key(hash, key);

    entry = hash->table[hash_key];

    if (!entry)
        return;

    // ์ธ๋ฑ์Šค ์ฒซ ๋ฒˆ์งธ ๊ฐ’์ด ์ง€์šฐ๊ณ ์ž ํ•˜๋Š” ๊ฐ’์ธ ๊ฒฝ์šฐ
    if (entry->data && compare_key(entry->data, key) == 0) {
        hash->table[hash_key] = NULL;
        free(entry);
        return;
    // ๋‚˜๋จธ์ง€ ๊ฒฝ์šฐ
    } else {
        entry_t *next = entry->next;
        entry_t *last = NULL;

        while (next && next->data && compare_key(next->data, key) != 0) {
            last = next;
            next = next->next;
        }

        if (next == NULL)
            return;

        last->next = next->next;
        free(next);
    }
}

 ํŠน์ • ๊ฐ’์„ ํ•ด์‹œ ํ…Œ์ด๋ธ”์—์„œ ์‚ญ์ œํ•˜๊ธฐ ์œ„ํ•ด์„œ๋Š” ํ…Œ์ด๋ธ”์˜ ์ฒซ ๋ฒˆ์งธ ๊ฐ’๊ณผ ์ผ์น˜ํ•˜๋Š” ๊ฒฝ์šฐ๋Š” ๋‹จ์ˆœํžˆ ์‚ญ์ œํ•˜๋ฉด ๋œ๋‹ค. ํ•˜์ง€๋งŒ `chainig`์ด ๋œ ๊ฒฝ์šฐ๋ผ๋ฉด, ์‚ญ์ œ๋œ ๊ฐ’์„ ์ œ์™ธํ•˜๊ณ  ๋‚˜๋จธ์ง€ ํฌ์ธํŒ… ๋˜๋Š” ๋ถ€๋ถ„๋“ค์„ ์—ฐ๊ฒฐํ•ด ์ฃผ์–ด์•ผ ํ•œ๋‹ค.

 

get_data

void *get_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void*, const void *))
{
    unsigned int hash_key = 0;
    entry_t *entry = NULL;

    hash_key = get_hash_key(hash, key);

    entry = hash->table[hash_key];

    while (entry && entry->data && compare_key(entry->data, key) < 0) {
        entry = entry->next;
    }

    if (entry == NULL || entry->data == NULL || compare_key(entry->data, key) != 0) {
        return NULL;
    }

    return entry->data;
}

 `key`๋ฅผ ํ†ตํ•ด ํ•ด์‹œ ํ…Œ์ด๋ธ” ๋‚ด์— ๋‚ด๊ฐ€ ์ฐพ๊ณ ์ž ํ•˜๋Š” ๊ฐ’์„ ์ฐพ๋Š”๋‹ค. ์œ„์˜ ๊ตฌ์กฐ์ฒด ์„ ์–ธ์—์„œ `entry`์— `data`๋Š” ๋‹ค๋ฅธ ๊ตฌ์กฐ์ฒด ๋˜๋Š” ๋ณ€์ˆ˜๋ฅผ ํฌ์ธํŒ… ํ•˜๋ฏ€๋กœ ํ•จ์ˆ˜์˜ ๋ฐ˜ํ™˜ ๊ฐ’์„ ํ†ตํ•ด `data`์— ๋ฐ”๋กœ ์ ‘๊ทผํ•  ์ˆ˜ ์žˆ๋‹ค.

 

hash function

static unsigned int get_hash_key(hash_t *hash, const void *key)
{
    /* Jeckins hash function. 
     * http://en.wikipedia.org/wiki/Jeckins_hash_function
     */
    unsigned int i;
    unsigned int hash_key;
    char *data_key = (char *)key;

    for (hash_key = i = 0; i < strlen(data_key); i++) {
        hash_key += data_key[i], hash_key += (hash_key << 10), hash_key ^= (hash_key >> 6);
    }

    hash_key += (hash_key << 3), hash_key ^= (hash_key >> 11), hash_key += (hash_key << 15);	

    return hash_key % hash->size;
}

 `Jeckins hash function`์„ ๊ทธ๋Œ€๋กœ ๊ตฌํ˜„ํ•˜์—ฌ, ํ•ด์‹œ ํ•จ์ˆ˜๋กœ ์‚ฌ์šฉํ•˜์˜€๋‹ค.

 

์ „์ฒด ์ฝ”๋“œ

#include "common.h"
#include "hash.h"

hash_t *init_hash(unsigned int table_size, int data_size)
{
    unsigned int i;
    hash_t *hash = NULL;

    /* Alloc the hash */
    if ((hash = (hash_t *)malloc(sizeof(hash_t))) == NULL) {
        //error logging
        return NULL;
    }

    /* Alloc pointer to the head nodes. */
    if ((hash->table = malloc((sizeof(entry_t) + data_size) * table_size)) == NULL) {
        //error logging
        return NULL;
    }

    for (i = 0; i < table_size; i++)
        hash->table[i] = NULL;

    hash->size = table_size;

    return hash;
}

static void free_all_entry(hash_t *hash);
void free_hash(hash_t *hash)
{
    free_all_entry(hash);
    free(hash->table);
    free(hash);
}

static unsigned int get_hash_key(hash_t *hash, const void *key);
static entry_t *init_entry(const void *data, int data_size);
void ins_data_in_hash(hash_t *hash, const void *data, const void *key,
        int data_size, int (*compare_key)(const void *, const void *))
{
    unsigned int hash_key = 0;
    entry_t *new = NULL;
    entry_t *next = NULL;
    entry_t *last = NULL;

    hash_key = get_hash_key(hash, key);
    next = hash->table[hash_key];


    while (next && next->data && compare_key(next->data, key) < 0) {
        last = next;
        next = next->next;
    }

    if (next && next->data && compare_key(next->data, key) == 0)
        memcpy(next->data, data, data_size);
    else {
        new = init_entry(data, data_size);

        if (next == hash->table[hash_key]) {
            new->next = next;
            hash->table[hash_key] = new;
        } else if (next == NULL)
            last->next = new;
        else {
           new->next = next;
            last->next = new;
        }
    }
}

void *get_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void*, const void *))
{
    unsigned int hash_key = 0;
    entry_t *entry = NULL;

    hash_key = get_hash_key(hash, key);
    entry = hash->table[hash_key];

    while (entry && entry->data && compare_key(entry->data, key) < 0) {
        entry = entry->next;
    }

    if (entry == NULL || entry->data == NULL || compare_key(entry->data, key) != 0) {
        return NULL;
    }

    return entry->data;
}

void del_data_in_hash(hash_t *hash, const void *key, int (*compare_key)(const void *, const void *))
{
    unsigned int  hash_key;
    entry_t *entry = NULL;

    hash_key = get_hash_key(hash, key);

    entry = hash->table[hash_key];

    if (!entry)
        return;

    if (entry->data && compare_key(entry->data, key) == 0) {
        hash->table[hash_key] = NULL;
        free(entry);
        return;
    } else {
        entry_t *next = entry->next;
        entry_t *last = NULL;

        while (next && next->data && compare_key(next->data, key) != 0) {
            last = next;
            next = next->next;
        }

        if (next == NULL)
            return;

        last->next = next->next;
        free(next);
    }
}

static void print_chaining_entry(entry_t *entry, void (*print_data)(const void *));
void print_hash(hash_t *hash, void (*print_data)(const void *))
{
    unsigned int i;
    for (i = 0; i < hash->size; i++) {
        if (hash->table[i] == NULL)
            continue;

        print_chaining_entry(hash->table[i], print_data);
    }
}

/* Private functions */
static unsigned int get_hash_key(hash_t *hash, const void *key)
{
    /* Jeckins hash function. 
     * http://en.wikipedia.org/wiki/Jeckins_hash_function
     */
    unsigned int i;
    unsigned int hash_key;
    char *data_key = (char *)key;

    for (hash_key = i = 0; i < strlen(data_key); i++) {
        hash_key += data_key[i], hash_key += (hash_key << 10), hash_key ^= (hash_key >> 6);
    }

    hash_key += (hash_key << 3), hash_key ^= (hash_key >> 11), hash_key += (hash_key << 15);

    return hash_key % hash->size;
}

static void print_chaining_entry(entry_t *entry, void (*print_data)(const void *))
{
    if (entry->next)
        print_chaining_entry(entry->next, print_data);

    print_data(entry->data);
}

static entry_t *init_entry(const void *data, int data_size)
{
    entry_t *entry = (entry_t *)malloc(sizeof(entry_t) + data_size);

    if (entry == NULL) {
        // error logging
        return NULL;
    }

    entry->next = NULL;
    memcpy(entry->data, data, data_size);

    return entry;
}


static void free_chaining_entry(entry_t *entry)
{
    if (entry->next)
        free_chaining_entry(entry->next);
    free(entry);
}

static void free_all_entry(hash_t *hash)
{
    int i;

    for (i = 0; i < hash->size; i++) {
        if (hash->table[i] == NULL)
            continue;

        free_chaining_entry(hash->table[i]);
    }
}

 

ํ…Œ์ŠคํŠธ

#include "common.h"
#include "hash.h"

#define MAX_TABLE_SIZE 10
#define MAX_NAME_LEN 30

typedef struct info {
    char name[MAX_NAME_LEN];
    int age;
} info_t;

void init_info(info_t *temp, const char *name, int age)
{
    memset(temp, 0, sizeof(info_t));
    memcpy(temp->name, name, MAX_NAME_LEN);
    temp->age = age;
}

int compare_name(const void *entry, const void *name)
{
    return strcmp(((info_t *)entry)->name, ((char *)name));
}

void print_info_data(const void *entry)
{
    info_t *info = (info_t *)entry;
    printf("%s, %d\n", info->name, info->age);
}

void ins_info_data(hash_t *hash, info_t *temp)
{
    info_t *info = (info_t *)get_data_in_hash(hash, temp->name, compare_name);
    
    if (!info)
        ins_data_in_hash(hash, temp, temp->name, sizeof(info_t), compare_name);
}

int main()
{
    hash_t *hash = NULL;
    hash = init_hash(MAX_TABLE_SIZE, sizeof(info_t));
    
    info_t temp;
    init_info(&temp, "L", 15);
    ins_info_data(hash, &temp);
    
    init_info(&temp, "B", 18);
    ins_info_data(hash, &temp);

    print_hash(hash, print_info_data);

    free_hash(hash);
}

 

valgrind ํ…Œ์ŠคํŠธ ๋ฐ ์‹คํ–‰ ๊ฒฐ๊ณผ

 

 ์˜ˆ์ œ๋กœ๋Š” ๊ฐ„๋‹จํ•˜๊ฒŒ ํ…Œ์ŠคํŠธ ํ•ด๋ณด์•˜๋‹ค. ์ž„์˜๋กœ ํ•ด์‹œ ํ…Œ์ด๋ธ” ํ•˜๋‚˜์˜ ์ธ๋ฑ์Šค์— ์ฒด์ด๋‹๋„ ํ•ด๋ณด๊ณ , ์—ฌ๋Ÿฌ ๊ฒฝ์šฐ์˜ ์ˆ˜๋ฅผ ๋ชจ๋‘ ํ…Œ์ŠคํŠธํ•˜์˜€๋Š”๋ฐ ์ด์ƒ ์—†์ด ๋™์ž‘ํ•˜์˜€๋‹ค.

 

728x90
๋ฐ˜์‘ํ˜•
๋Œ“๊ธ€
๊ธ€ ๋ณด๊ด€ํ•จ
์ตœ๊ทผ์— ์˜ฌ๋ผ์˜จ ๊ธ€
์ตœ๊ทผ์— ๋‹ฌ๋ฆฐ ๋Œ“๊ธ€