MeCabはC/C++のライブラリを提供しています。また, SWIGを通して Perl/Ruby/Python から利用することも可能です。
シングルスレッド環境で単純な形態素解析を行う場合は、MeCab::Tagger クラスのみでほとんどのことが行えます。マルチスレッド環境で1つの辞書を共有しながら形態素解析を行いたい場合や、MeCabの辞書を解析中にアップデートするなど、高度な応用には MeCab::Taggerに加えて、 MeCab::Model, MeCab::Lattice クラスを使用します。
C++のAPIセットのドキュメントはこちらを参照ください。 CのAPIセットはこちらを参照ください。
シングルスレッド環境 (MeCab::Tagger)
#include <iostream> #include <mecab.h> #define CHECK(eval) if (! eval) { \ const char *e = tagger ? tagger->what() : MeCab::getTaggerError(); \ std::cerr << "Exception:" << e << std::endl; \ delete tagger; \ return -1; } // Sample of MeCab::Tagger class. int main (int argc, char **argv) { char input[1024] = "太郎は次郎が持っている本を花子に渡した。"; MeCab::Tagger *tagger = MeCab::createTagger(""); CHECK(tagger); // Gets tagged result in string format. const char *result = tagger->parse(input); CHECK(result); std::cout << "INPUT: " << input << std::endl; std::cout << "RESULT: " << result << std::endl; // Gets N best results in string format. result = tagger->parseNBest(3, input); CHECK(result); std::cout << "NBEST: " << std::endl << result; // Gets N best results in sequence. CHECK(tagger->parseNBestInit(input)); for (int i = 0; i < 3; ++i) { std::cout << i << ":" << std::endl << tagger->next(); } // Gets Node object. const MeCab::Node* node = tagger->parseToNode(input); CHECK(node); for (; node; node = node->next) { std::cout << node->id << ' '; if (node->stat == MECAB_BOS_NODE) std::cout << "BOS"; else if (node->stat == MECAB_EOS_NODE) std::cout << "EOS"; else std::cout.write (node->surface, node->length); std::cout << ' ' << node->feature << ' ' << (int)(node->surface - input) << ' ' << (int)(node->surface - input + node->length) << ' ' << node->rcAttr << ' ' << node->lcAttr << ' ' << node->posid << ' ' << (int)node->char_type << ' ' << (int)node->stat << ' ' << (int)node->isbest << ' ' << node->alpha << ' ' << node->beta << ' ' << node->prob << ' ' << node->cost << std::endl; } // Dictionary info. const MeCab::DictionaryInfo *d = tagger->dictionary_info(); for (; d; d = d->next) { std::cout << "filename: " << d->filename << std::endl; std::cout << "charset: " << d->charset << std::endl; std::cout << "size: " << d->size << std::endl; std::cout << "type: " << d->type << std::endl; std::cout << "lsize: " << d->lsize << std::endl; std::cout << "rsize: " << d->rsize << std::endl; std::cout << "version: " << d->version << std::endl; } delete tagger; return 0; }
マルチスレッド環境 (MeCab::Tagger, MeCab::Model, MeCab::Lattice)
#include <iostream> #include <mecab.h> #define CHECK(eval) if (! eval) { \ const char *e = tagger ? tagger->what() : MeCab::getTaggerError(); \ std::cerr << "Exception:" << e << std::endl; \ delete tagger; \ return -1; } int main (int argc, char **argv) { char input[1024] = "太郎は次郎が持っている本を花子に渡した。"; // Create model object. MeCab::Model *model = MeCab::createModel(argc, argv); // Create Tagger // All taggers generated by Model::createTagger() method share // the same model/dictoinary. MeCab::Tagger *tagger = model->createTagger(); CHECK(tagger); // Create lattice object per thread. MeCab::Lattice *lattice = model->createLattice(); // Gets tagged result in string lattice->set_sentence(input); // this method is thread safe, as long as |lattice| is thread local. CHECK(tagger->parse(lattice)); std::cout << lattice->toString() << std::endl; // Gets node object. const MeCab::Node* node = lattice->bos_node(); CHECK(node); for (; node; node = node->next) { std::cout << node->id << ' '; if (node->stat == MECAB_BOS_NODE) std::cout << "BOS"; else if (node->stat == MECAB_EOS_NODE) std::cout << "EOS"; else std::cout.write (node->surface, node->length); std::cout << ' ' << node->feature << ' ' << (int)(node->surface - input) << ' ' << (int)(node->surface - input + node->length) << ' ' << node->rcAttr << ' ' << node->lcAttr << ' ' << node->posid << ' ' << (int)node->char_type << ' ' << (int)node->stat << ' ' << (int)node->isbest << ' ' << node->alpha << ' ' << node->beta << ' ' << node->prob << ' ' << node->cost << std::endl; } // begin_nodes/end_nodes const size_t len = lattice->size(); for (int i = 0; i <= len; ++i) { MeCab::Node *b = lattice->begin_nodes(i); MeCab::Node *e = lattice->end_nodes(i); for (; b; b = b->bnext) { printf("B[%d] %s\t%s\n", i, b->surface, b->feature); } for (; e; e = e->enext) { printf("E[%d] %s\t%s\n", i, e->surface, e->feature); } } // N best results lattice->set_request_type(MECAB_NBEST); lattice->set_sentence(input); CHECK(tagger->parse(lattice)); for (int i = 0; i < 10; ++i) { std::cout << "NBEST: " << i << std::endl; std::cout << lattice->toString(); if (!lattice->next()) { // No more results break; } } // Marginal probabilities lattice->remove_request_type(MECAB_NBEST); lattice->set_request_type(MECAB_MARGINAL_PROB); lattice->set_sentence(input); CHECK(tagger->parse(lattice)); std::cout << lattice->theta() << std::endl; for (const MeCab::Node *node = lattice->bos_node(); node; node = node->next) { std::cout.write(node->surface, node->length); std::cout << "\t" << node->feature; std::cout << "\t" << node->prob << std::endl; } // Dictionary info const MeCab::DictionaryInfo *d = model->dictionary_info(); for (; d; d = d->next) { std::cout << "filename: " << d->filename << std::endl; std::cout << "charset: " << d->charset << std::endl; std::cout << "size: " << d->size << std::endl; std::cout << "type: " << d->type << std::endl; std::cout << "lsize: " << d->lsize << std::endl; std::cout << "rsize: " << d->rsize << std::endl; std::cout << "version: " << d->version << std::endl; } // Swap model atomically. MeCab::Model *another_model = MeCab::createModel(""); model->swap(another_model); delete lattice; delete tagger; delete model; return 0; }
#include <mecab.h> #include <stdio.h> #define CHECK(eval) if (! eval) { \ fprintf (stderr, "Exception:%s\n", mecab_strerror (mecab)); \ mecab_destroy(mecab); \ return -1; } int main (int argc, char **argv) { char input[] = "太郎は次郎が持っている本を花子に渡した。"; mecab_t *mecab; const mecab_node_t *node; const char *result; int i; size_t len; // Create tagger object mecab = mecab_new(argc, argv); CHECK(mecab); // Gets tagged result in string. result = mecab_sparse_tostr(mecab, input); CHECK(result) printf ("INPUT: %s\n", input); printf ("RESULT:\n%s", result); // Gets N best results result = mecab_nbest_sparse_tostr (mecab, 3, input); CHECK(result); fprintf (stdout, "NBEST:\n%s", result); CHECK(mecab_nbest_init(mecab, input)); for (i = 0; i < 3; ++i) { printf ("%d:\n%s", i, mecab_nbest_next_tostr (mecab)); } // Gets node object node = mecab_sparse_tonode(mecab, input); CHECK(node); for (; node; node = node->next) { if (node->stat == MECAB_NOR_NODE || node->stat == MECAB_UNK_NODE) { fwrite (node->surface, sizeof(char), node->length, stdout); printf("\t%s\n", node->feature); } } // Dictionary info const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab); for (; d; d = d->next) { printf("filename: %s\n", d->filename); printf("charset: %s\n", d->charset); printf("size: %d\n", d->size); printf("type: %d\n", d->type); printf("lsize: %d\n", d->lsize); printf("rsize: %d\n", d->rsize); printf("version: %d\n", d->version); } mecab_destroy(mecab); return 0; }
#include <mecab.h> #include <stdio.h> #define CHECK(eval) if (! eval) { \ fprintf (stderr, "Exception:%s\n", mecab_strerror (mecab)); \ mecab_destroy(mecab); \ return -1; } int main (int argc, char **argv) { char input[] = "太郎は次郎が持っている本を花子に渡した。"; mecab_model_t *model, *another_model; mecab_t *mecab; mecab_lattice_t *lattice; const mecab_node_t *node; const char *result; int i; size_t len; model = mecab_model_new(argc, argv); CHECK(model); mecab = mecab_model_new_tagger(model); CHECK(mecab); lattice = mecab_model_new_lattice(model); CHECK(lattice); mecab_lattice_set_sentence(lattice, input); mecab_parse_lattice(mecab, lattice); printf("RESULT: %s\n", mecab_lattice_tostr(lattice)); node = mecab_lattice_get_bos_node(lattice); for (; node; node = node->next) { printf("%d ", node->id); if (node->stat == MECAB_BOS_NODE) printf("BOS"); else if (node->stat == MECAB_EOS_NODE) printf("EOS"); else fwrite (node->surface, sizeof(char), node->length, stdout); printf(" %s %d %d %d %d %d %d %d %d %f %f %f %ld\n", node->feature, (int)(node->surface - input), (int)(node->surface - input + node->length), node->rcAttr, node->lcAttr, node->posid, (int)node->char_type, (int)node->stat, (int)node->isbest, node->alpha, node->beta, node->prob, node->cost); } len = mecab_lattice_get_size(lattice); for (i = 0; i <= len; ++i) { mecab_node_t *b, *e; b = mecab_lattice_get_begin_nodes(lattice, (size_t)i); e = mecab_lattice_get_end_nodes(lattice, (size_t)i); for (; b; b = b->bnext) { printf("B[%d] %s\t%s\n", i, b->surface, b->feature); } for (; e; e = e->enext) { printf("E[%d] %s\t%s\n", i, e->surface, e->feature); } } mecab_lattice_set_sentence(lattice, input); mecab_lattice_set_request_type(lattice, MECAB_NBEST); mecab_parse_lattice(mecab, lattice); for (i = 0; i < 10; ++i) { fprintf(stdout, "%s", mecab_lattice_tostr(lattice)); if (!mecab_lattice_next(lattice)) { break; } } mecab_lattice_set_sentence(lattice, input); mecab_lattice_set_request_type(lattice, MECAB_MARGINAL_PROB); mecab_lattice_set_theta(lattice, 0.001); mecab_parse_lattice(mecab, lattice); node = mecab_lattice_get_bos_node(lattice); for (; node; node = node->next) { fwrite(node->surface, sizeof(char), node->length, stdout); fprintf(stdout, "\t%s\t%f\n", node->feature, node->prob); } mecab_set_lattice_level(mecab, 0); mecab_set_all_morphs(mecab, 1); node = mecab_sparse_tonode(mecab, input); CHECK(node); for (; node; node = node->next) { fwrite (node->surface, sizeof(char), node->length, stdout); printf("\t%s\n", node->feature); } const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab); for (; d; d = d->next) { printf("filename: %s\n", d->filename); printf("charset: %s\n", d->charset); printf("size: %d\n", d->size); printf("type: %d\n", d->type); printf("lsize: %d\n", d->lsize); printf("rsize: %d\n", d->rsize); printf("version: %d\n", d->version); } mecab_destroy(mecab); mecab_lattice_destroy(lattice); mecab_model_destory(model); return 0; }
% cc -O2 `mecab-config --cflags` example.c -o example \ `mecab-config --libs`
まず, コンパイル作業を行うディレクトリに include\mecab.h, bin\libmecab.dll lib\libmecab.lib をコピーします. この後の作業は, 使用するコンパイラによって微妙に変わります.
% gcc -DDLL_IMPORT -I. example.c -o example.exe libmecab.dll
% cl -DDLL_IMPORT -I. example.c libmecab.lib