I released the initial stable version of Kyoto Cabinet on 25th May 2010. This entry describes introduction to Kyoto Cabinet.
Features
Now that all of my plans have been achieved, Kyoto Cabinet has the following features. Especially, Windows support is remarkable.
- time efficiency: Throughput of updating is more than 100 millions query-per-second.
- space efficiency: Footprint for each record is 8-16 bytes in the hash DB, 2-4 bytes in the tree DB.
- concurrency: The hash DB uses read-write lock for each record. The tree DB uses read-write lock for each page.
- usability: Generic operations of database by interface like the "Visitor" pattern are provided.
- robustness: Manual transaction, auto transaction, and auto recovery are provided.
- portability: UNIX-like systems (Linux, FreeBSD, Solaris, Mac OS X) and Windows (VC++) are supported.
- language bindings: C++, C, Java, Python, Ruby, and Perl are supported.
Compared with Tokyo Cabinet, KC is superior in concurrency, usability, and portability. Although time efficiency for single-thread is better in TC, I recommend KC from now on because multi-core/many-core CPU has been popular. However, I will keep on maintaining TC and fix bugs if they are found.
Language Bindings
I recommend Java first. Typical use cases of KC which I expected mainly are in the backend of large Web services. I think that Java is the most popular one except for C++ in such use cases. Because concurrency is a major sales point of KC, concurrent runtime environments are preferred.
Python and Ruby come second. Although each has the GIL (global interpreter lock) to guard its API from race condition, KC provides "concurrent mode" which uses the API to unlock the GIL temporarily while native functions are called.
Perl is also supported. However, the Perl binding is not thread-safe for ithread (the thread mechanism of Perl). Because ithread is "green thread", which implemented in the user land, such native threading primitives as locking and thread local storage do not work.
Getting Started
Download the latest version from the homepage. Then read the installation section and the tutorial section. Choose your favorite language and write some sample codes.
Basic Example
Although all of the following sample codes are described in the documents for each language, I place them here to compare them with each other. All bindings conform to the common interface defined by IDL.
C++
#include <kcpolydb.h>
using namespace std;
using namespace kyotocabinet;
// main routine
int main(int argc, char** argv) {
// create the database object
PolyDB db;
// open the database
if (!db.open("casket.kch", PolyDB::OWRITER | PolyDB::OCREATE)) {
cerr << "open error: " << db.error().name() << endl;
}
// store records
if (!db.set("foo", "hop") ||
!db.set("bar", "step") ||
!db.set("baz", "jump")) {
cerr << "set error: " << db.error().name() << endl;
}
// retrieve a record
string* value = db.get("foo");
if (value) {
cout << *value << endl;
delete value;
} else {
cerr << "get error: " << db.error().name() << endl;
}
// traverse records
DB::Cursor* cur = db.cursor();
cur->jump();
pair<string, string>* rec;
while ((rec = cur->get_pair(true)) != NULL) {
cout << rec->first << ":" << rec->second << endl;
delete rec;
}
delete cur;
// close the database
if (!db.close()) {
cerr << "close error: " << db.error().name() << endl;
}
return 0;
}
Java
import kyotocabinet.*;
public class KCDBEX1 {
public static void main(String[] args) {
// create the object
DB db = new DB();
// open the database
if (!db.open("casket.kch", DB.OWRITER | DB.OCREATE)){
System.err.println("open error: " + db.error());
}
// store records
if (!db.set("foo", "hop") ||
!db.set("bar", "step") ||
!db.set("baz", "jump")){
System.err.println("set error: " + db.error());
}
// retrieve records
String value = db.get("foo");
if (value != null){
System.out.println(value);
} else {
System.err.println("set error: " + db.error());
}
// traverse records
Cursor cur = db.cursor();
cur.jump();
String[] rec;
while ((rec = cur.get_str(true)) != null) {
System.out.println(rec[0] + ":" + rec[1]);
}
cur.disable();
// close the database
if(!db.close()){
System.err.println("close error: " + db.error());
}
}
}
Python
from kyotocabinet import *
import sys
# create the database object
db = DB()
# open the database
if not db.open("casket.kch", DB.OWRITER | DB.OCREATE):
print("open error: " + str(db.error()), file=sys.stderr)
# store records
if not db.set("foo", "hop") or \
not db.set("bar", "step") or \
not db.set("baz", "jump"):
print("set error: " + str(db.error()), file=sys.stderr)
# retrieve records
value = db.get_str("foo")
if value:
print(value)
else:
print("get error: " + str(db.error()), file=sys.stderr)
# traverse records
cur = db.cursor()
cur.jump()
while True:
rec = cur.get_str(True)
if not rec: break
print(rec[0] + ":" + rec[1])
cur.disable()
# close the database
if not db.close():
print("close error: " + str(db.error()), file=sys.stderr)
Ruby
require 'kyotocabinet'
include KyotoCabinet
# create the database object
db = DB::new
# open the database
unless db.open('casket.kch', DB::OWRITER | DB::OCREATE)
STDERR.printf("open error: %s\n", db.error)
end
# store records
unless db.set('foo', 'hop') and
db.set('bar', 'step') and
db.set('baz', 'jump')
STDERR.printf("set error: %s\n", db.error)
end
# retrieve records
value = db.get('foo')
if value
printf("%s\n", value)
else
STDERR.printf("get error: %s\n", db.error)
end
# traverse records
cur = db.cursor
cur.jump
while rec = cur.get(true)
printf("%s:%s\n", rec[0], rec[1])
end
cur.disable
# close the database
unless db.close
STDERR.printf("close error: %s\n", db.error)
end
Perl
use KyotoCabinet;
# create the database object
my $db = new KyotoCabinet::DB;
# open the database
if (!$db->open('casket.kch', $db->OWRITER | $db->OCREATE)) {
printf STDERR ("open error: %s\n", $db->error);
}
# store records
if (!$db->set('foo', 'hop') ||
!$db->set('bar', 'step') ||
!$db->set('baz', 'jump')) {
printf STDERR ("set error: %s\n", $db->error);
}
# retrieve records
my $value = $db->get('foo');
if (defined($value)) {
printf("%s\n", $value);
} else {
printf STDERR ("get error: %s\n", $db->error);
}
# traverse records
my $cur = $db->cursor;
$cur->jump;
while (my ($key, $value) = $cur->get(1)) {
printf("%s:%s\n", $key, $value);
}
$cur->disable;
# close the database
if (!$db->close) {
printf STDERR ("close error: %s\n", $db->error);
}
Visitor Pattern
All database classes of KC have methods to operate records like associative array. "set", "remove", and "get" are typical. More complex methods such as "increment" and "cas" are also provided by default. However, you may want to use your own operations. The "visitor" pattern is preferable for that purpose. Define a visitor object and pass it to the "accept" method so that the call back method defined by the visitor is executed with a record data atomically.
Atomicity is a key feature in multi-thread environment. While one thread are incrementing a record value by several method call such as "get" and "set", another thread may update the same record at the same time. In that case, former operation get whitewashed. KC solves the problem by the "accept" method which is guarded by record locking.
The following are examples in Java and Ruby. Other language bindings also support the visitor pattern.
Java
import kyotocabinet.*;
public class KCDBEX2 {
public static void main(String[] args) {
// create the object
DB db = new DB();
// open the database
if (!db.open("casket.kch", DB.OREADER)) {
System.err.println("open error: " + db.error());
}
// define the visitor
class VisitorImpl implements Visitor {
public byte[] visit_full(byte[] key, byte[] value) {
System.out.println(new String(key) + ":" + new String(value));
return NOP;
}
public byte[] visit_empty(byte[] key) {
System.err.println(new String(key) + " is missing");
return NOP;
}
}
Visitor visitor = new VisitorImpl();
// retrieve a record with visitor
if (!db.accept("foo".getBytes(), visitor, false) ||
!db.accept("dummy".getBytes(), visitor, false)) {
System.err.println("accept error: " + db.error());
}
// traverse records with visitor
if (!db.iterate(visitor, false)) {
System.err.println("iterate error: " + db.error());
}
// close the database
if(!db.close()){
System.err.println("close error: " + db.error());
}
}
}
Ruby
require 'kyotocabinet'
include KyotoCabinet
# create the database object
db = DB::new
# open the database
unless db.open('casket.kch', DB::OREADER)
STDERR.printf("open error: %s\n", db.error)
end
# define the visitor
class VisitorImpl < Visitor
# call back function for an existing record
def visit_full(key, value)
printf("%s:%s\n", key, value)
return NOP
end
# call back function for an empty record space
def visit_empty(key)
STDERR.printf("%s is missing\n", key)
return NOP
end
end
visitor = VisitorImpl::new
# retrieve a record with visitor
unless db.accept("foo", visitor, false) and
db.accept("dummy", visitor, false)
STDERR.printf("accept error: %s\n", db.error)
end
# traverse records with visitor
unless db.iterate(visitor, false)
STDERR.printf("iterate error: %s\n", db.error)
end
# close the database
unless db.close
STDERR.printf("close error: %s\n", db.error)
end
Popular scripting languages provide "closure" mechanisms and you can use it as a visitor instead of a derived object under the class inheritance mechanism. The following is an example in Python.
from kyotocabinet import *
import sys
# define the functor
def dbproc(db):
# store records
db[b'foo'] = b'step'; # bytes is fundamental
db['bar'] = 'hop'; # string is also ok
db[3] = 'jump'; # number is also ok
# retrieve a record value
print("{}".format(db['foo'].decode()))
# update records in transaction
def tranproc():
db['foo'] = 2.71828
return True
db.transaction(tranproc)
# multiply a record value
def mulproc(key, value):
return float(value) * 2
db.accept('foo', mulproc)
# traverse records by iterator
for key in db:
print("{}:{}".format(key.decode(), db[key].decode()))
# upcase values by iterator
def upproc(key, value):
return value.upper()
db.iterate(upproc)
# traverse records by cursor
def curproc(cur):
cur.jump()
def printproc(key, value):
print("{}:{}".format(key.decode(), value.decode()))
return Visitor.NOP
while cur.accept(printproc):
cur.step()
db.cursor_process(curproc)
# process the database by the functor
DB.process(dbproc, 'casket.kch')
Conclusion
Kyoto Cabinet is a powerful tool to operate persistent associative array or key-value storage. Time and space efficiency is great. KC is easy to use in most popular languages and provides extreme flexibility by the visitor pattern. Please try it and probably you will take to it.