This is a small on-disk graph library, which I find very convenient whenever I am dealing with huge graphs.
Back a year or so ago, I could not find any disk-based graph library, which was the motivation for the present code.
It uses gdbm for persistent data storage, meaning that the graph size is only limited by your storage space, but beware, as operations on huge graphs will take a long time.
You should either compile this code with a Linux variant or compatible.
Adjacency lists are kept sorted and the look-up uses a binary search.
I needed to create a Graph that could scale and run a DFS on it, that's the reason why DFS is the only algorithm available to walk the graph.
There is this function
bpresearch taken from Minix.
Everything else is
GPLv3.
I've used it on several projects successfully, but it is alpha code so use carefully if at all, and you should probably rewrite
bpresearch if you want to work with a GPLv3 licence.
graph.h:
#define GRAPH_EDGES_FILE "graph_edges.dbf"
#define GRAPH_VISITED_FILE "graph_visited.dbf"
typedef struct graph{
unsigned long int nnodes;
unsigned long int nedges;
GDBM_FILE edges;
GDBM_FILE visited;
size_t size; // size of one data element
int (*compar)(const void* n1, const void* n2);
} graph_t;
/* Call this function if you want to create a new graph */
int graph_init (graph_t* g_ptr, const char* edges_file, size_t size, int (*compar)(const void* n1, const void* n2));
int graph_restore (graph_t* g_ptr, const char* edges_file, size_t size, int (*compar)(const void* n1, const void* n2));
void graph_close (graph_t* g_ptr);
int graph_node_exists (graph_t* g_ptr, void* n);
int graph_edge_exists (graph_t* g_ptr, void* n1, void* n2);
int graph_add_node (graph_t* g_ptr, void* n);
int graph_add_edge (graph_t* g_ptr, void* n1, void* n2);
int graph_del_node (graph_t* g_ptr, void* n);
int graph_del_edge (graph_t* g_ptr, void* n1, void* n2);
void* graph_first_node (graph_t* g_ptr);
void* graph_next_node (graph_t* g_ptr, void* n);
/* calls @process on every node of the search */
void graph_dfs (graph_t* g_ptr, const void* n, void (*process)(const void* n));
/* calls @process_first on the first node of every component and calls @process_all on every node. The first node is passed to both functions */
void graph_cc (graph_t* g_ptr, void (*process_first)(const void* n), void (*process_all)(const void* n));
graph.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <gdbm.h>
#include "graph.h"
int graph_init (graph_t* g_ptr, const char* edges_file, size_t size, int (*compar)(const void* n1, const void* n2)){
g_ptr->nnodes = 0L;
g_ptr->nedges = 0L;
g_ptr->edges = gdbm_open ((char*)edges_file, 0/* use default */, GDBM_NEWDB|GDBM_SYNC, 0600, 0);
g_ptr->visited = gdbm_open (GRAPH_VISITED_FILE, 0/* use default */, GDBM_NEWDB|GDBM_SYNC, 0600, 0);
if (!g_ptr->edges || !g_ptr->visited){
fprintf (stderr, "(EE) Failed creating graph file(s)\n");
return (-1);
}
g_ptr->size = size;
g_ptr->compar = compar;
return 0;
}
int graph_restore (graph_t* g_ptr, const char* edges_file, size_t size, int (*compar)(const void* n1, const void* n2)){
g_ptr->edges = gdbm_open ((char*)edges_file, 0/* use default */, GDBM_WRITER|GDBM_SYNC, 0600, 0);
g_ptr->visited = gdbm_open (GRAPH_VISITED_FILE, 0/* use default */, GDBM_NEWDB|GDBM_SYNC, 0600, 0);
g_ptr->size = size;
if (!g_ptr->edges || !g_ptr->visited){
fprintf (stderr, "(EE) Failed opening graph file(s)\n");
return (-1);
}
unsigned long int nnodes = 0L;
unsigned long int nedges = 0L;
datum key = gdbm_firstkey (g_ptr->edges);
datum data;
while (key.dptr){
data = gdbm_fetch (g_ptr->edges, key);
datum nextkey = gdbm_nextkey (g_ptr->edges, key);
nnodes += 1;
nedges += data.dsize / g_ptr->size;
free (key.dptr);
free (data.dptr);
key = nextkey;
}
g_ptr->nnodes = nnodes;
g_ptr->nedges = nedges;
g_ptr->size = size;
g_ptr->compar = compar;
return 0;
}
void graph_close (graph_t* g_ptr){
gdbm_close (g_ptr->edges);
gdbm_close (g_ptr->visited);
unlink (GRAPH_VISITED_FILE);
}
int graph_node_exists (graph_t* g_ptr, void* n){
datum key = {(char*)n, g_ptr->size};
return gdbm_exists (g_ptr->edges, key);
}
int graph_edge_exists (graph_t* g_ptr, void* n1, void* n2){
if (!graph_node_exists (g_ptr, n1) || !graph_node_exists (g_ptr, n2))
return 0;
int result;
datum key = {(char*)n1, g_ptr->size};
datum data = gdbm_fetch (g_ptr->edges, key);
void* edges = (void*)data.dptr;
size_t len = data.dsize / g_ptr->size;
if (bsearch (n2, edges, len, g_ptr->size, g_ptr->compar))
result = 1;
else
result = 0;
if (data.dsize)
free (edges);
return result;
}
int graph_add_node (graph_t* g_ptr, void* n){
datum key = {(char*)n, g_ptr->size};
datum data = {"", 1};
int result = gdbm_store (g_ptr->edges, key, data, GDBM_INSERT);
if (0 == result)
g_ptr->nnodes += 1;
return result;
}
/*
* Searches an array of nmemb objects, the initial
* member of which is pointed to by base, for a member that would follow the
* object pointed to by key.
* Modified from Minix bsearch
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
* $Header: /opt/proj/minix/cvsroot/src/lib/ansi/bsearch.c,v 1.1.1.1 2005/04/21 14:56:04 beng Exp $
*/
static void* bpresearch (const void* key, const void* base, size_t nmemb, size_t size, int (*compar)(const void*, const void*)){
register const void* mid_point = base;
int moreflag = 0;
while (nmemb > 0){
mid_point = (char*)base + size * (nmemb >> 1);
if (compar (key, mid_point) > 0){
base = (char*)mid_point + size;
nmemb = (nmemb - 1) >> 1;
moreflag = 1;
}else{
nmemb >>= 1;
moreflag = 0;
}
}
return (void*)((char*)mid_point + moreflag * size);
}
static void insert_sorted (void* dest, const void* key, const void* base, size_t nmemb, size_t size, int (*compar)(const void*, const void*)){
void* follow = bpresearch (key, base, nmemb, size, compar);
const char* bse = base;
char* dst = dest;
size_t first_half = (char*)follow - bse;
size_t second_half = (bse + nmemb * size) - (char*)follow;
memcpy (dst, bse, first_half);
memcpy (dst + first_half, key, size);
memcpy (dst + first_half + size, bse + first_half, second_half);
}
int graph_add_edge (graph_t* g_ptr, void* n1, void* n2){
if (0 == memcmp (n1, n2, g_ptr->size)
|| !graph_node_exists (g_ptr, n1)
|| !graph_node_exists (g_ptr, n2)
|| graph_edge_exists (g_ptr, n1, n2))
return -1;
datum key = {(char*)n1, g_ptr->size};
datum data = gdbm_fetch (g_ptr->edges, key);
void* edges = (void*)data.dptr;
size_t new_size = data.dsize + g_ptr->size;
char new_edges[new_size];
insert_sorted (new_edges, n2, edges, data.dsize/g_ptr->size, g_ptr->size, g_ptr->compar);
datum new_data = {new_edges, new_size};
if (data.dsize)
free (edges);
int result = gdbm_store (g_ptr->edges, key, new_data, GDBM_REPLACE);
if (0 == result)
g_ptr->nedges += 1;
return result;
}
int graph_del_node (graph_t* g_ptr, void* n){
if (!graph_node_exists (g_ptr, n))
return -1;
datum key = {(char*)n, g_ptr->size};
int result = gdbm_delete (g_ptr->edges, key);
if (0 == result)
g_ptr->nnodes -= 1;
return result;
}
static void remove_sorted (void* dest, const void* key, const void* base, size_t nmemb, size_t size, int (*compar)(const void*, const void*)){
void* ptr = bsearch (key, base, nmemb, size, compar);
if (!ptr) return;
const char* bse = base;
char* dst = dest;
size_t first_half = (char*)ptr - bse;
size_t second_half = (bse + nmemb * size) - ((char*)ptr + size);
memcpy (dst, bse, first_half);
memcpy (dst + first_half, bse + first_half + size, second_half);
}
int graph_del_edge (graph_t* g_ptr, void* n1, void* n2){
if (!graph_edge_exists (g_ptr, n1, n2))
return -1;
datum key = {(char*)n1, g_ptr->size};
datum data = gdbm_fetch (g_ptr->edges, key);
void* edges = (void*)data.dptr;
size_t new_size = data.dsize - g_ptr->size;
datum new_data;
if (0 == new_size){
new_data.dptr = "";
new_data.dsize = 1;
}else{
char new_edges[new_size];
remove_sorted (new_edges, n2, edges, data.dsize/g_ptr->size, g_ptr->size, g_ptr->compar);
new_data.dptr = new_edges;
new_data.dsize = new_size;
}
if (data.dsize)
free (edges);
int result = gdbm_store (g_ptr->edges, key, new_data, GDBM_REPLACE);
if (0 == result)
g_ptr->nedges -= 1;
return result;
}
void* graph_first_node (graph_t* g_ptr){
datum key = gdbm_firstkey (g_ptr->edges);
return (void*)key.dptr;
}
void* graph_next_node (graph_t* g_ptr, void* n){
datum prevkey = {(char*)n, g_ptr->size};
datum key = gdbm_nextkey (g_ptr->edges, prevkey);
return (void*)key.dptr;
}
static int is_visited (graph_t* g_ptr, const void* n){
datum key = {(char*)n, g_ptr->size};
return gdbm_exists (g_ptr->visited, key);
}
static void mark_visited (graph_t* g_ptr, const void* n){
datum key = {(char*)n, g_ptr->size};
datum data = {"", 1};
gdbm_store (g_ptr->visited, key, data, GDBM_REPLACE);
}
static void reset_visited (graph_t* g_ptr){
gdbm_close (g_ptr->visited);
g_ptr->visited = gdbm_open (GRAPH_VISITED_FILE, 0/* use default */, GDBM_NEWDB|GDBM_SYNC, 0600, 0);
}
static void dfs (graph_t* g_ptr, const void* n, void (*process)(const void* n)){
mark_visited (g_ptr, n);
process (n);
datum key = {(char*)n, g_ptr->size};
datum data = gdbm_fetch (g_ptr->edges, key);
void* edges = (void*)data.dptr;
size_t len = data.dsize / g_ptr->size;
for (int i=0; i<len; i+=1){
if (!is_visited (g_ptr, edges)){
dfs (g_ptr, edges, process);
edges = (char*)edges + g_ptr->size;
}
}
if (data.dptr)
free (data.dptr);
}
void graph_dfs (graph_t* g_ptr, const void* n, void (*process)(const void* n)){
reset_visited (g_ptr);
dfs (g_ptr, n, process);
}
/* Iterate the graph and find connected components
The function @process_first is called on the first node of a connected component - this node can be any node of the component.
@process_all is called on every other node
*/
void graph_cc (graph_t* g_ptr, void (*process_first)(const void* n), void (*process_all)(const void* n)){
reset_visited (g_ptr);
void* node = graph_first_node (g_ptr);
while (node){
void* nextnode = graph_next_node (g_ptr, node);
if (!is_visited (g_ptr, node)){
process_first (node);
dfs (g_ptr, node, process_all);
}
free (node);
node = nextnode;
}
}