MeCab
|
00001 /* 00002 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer 00003 00004 Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org> 00005 Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation 00006 */ 00007 #ifndef MECAB_MECAB_H_ 00008 #define MECAB_MECAB_H_ 00009 00010 /* C/C++ common data structures */ 00011 00015 struct mecab_dictionary_info_t { 00020 const char *filename; 00021 00025 const char *charset; 00026 00030 unsigned int size; 00031 00036 int type; 00037 00041 unsigned int lsize; 00042 00046 unsigned int rsize; 00047 00051 unsigned short version; 00052 00056 struct mecab_dictionary_info_t *next; 00057 }; 00058 00062 struct mecab_path_t { 00066 struct mecab_node_t* rnode; 00067 00071 struct mecab_path_t* rnext; 00072 00076 struct mecab_node_t* lnode; 00077 00082 struct mecab_path_t* lnext; 00083 00087 int cost; 00088 00092 float prob; 00093 }; 00094 00098 struct mecab_node_t { 00102 struct mecab_node_t *prev; 00103 00107 struct mecab_node_t *next; 00108 00112 struct mecab_node_t *enext; 00113 00117 struct mecab_node_t *bnext; 00118 00123 struct mecab_path_t *rpath; 00124 00129 struct mecab_path_t *lpath; 00130 00136 const char *surface; 00137 00141 const char *feature; 00142 00146 unsigned int id; 00147 00151 unsigned short length; 00152 00156 unsigned short rlength; 00157 00161 unsigned short rcAttr; 00162 00166 unsigned short lcAttr; 00167 00171 unsigned short posid; 00172 00176 unsigned char char_type; 00177 00182 unsigned char stat; 00183 00187 unsigned char isbest; 00188 00193 float alpha; 00194 00199 float beta; 00200 00205 float prob; 00206 00210 short wcost; 00211 00215 long cost; 00216 }; 00217 00221 enum { 00225 MECAB_NOR_NODE = 0, 00229 MECAB_UNK_NODE = 1, 00233 MECAB_BOS_NODE = 2, 00237 MECAB_EOS_NODE = 3, 00238 00242 MECAB_EON_NODE = 4 00243 }; 00244 00248 enum { 00252 MECAB_SYS_DIC = 0, 00253 00257 MECAB_USR_DIC = 1, 00258 00262 MECAB_UNK_DIC = 2 00263 }; 00264 00268 enum { 00272 MECAB_ONE_BEST = 1, 00276 MECAB_NBEST = 2, 00282 MECAB_PARTIAL = 4, 00288 MECAB_MARGINAL_PROB = 8, 00293 MECAB_ALTERNATIVE = 16, 00298 MECAB_ALL_MORPHS = 32, 00299 00304 MECAB_ALLOCATE_SENTENCE = 64 00305 }; 00306 00310 enum { 00314 MECAB_ANY_BOUNDARY = 0, 00315 00319 MECAB_TOKEN_BOUNDARY = 1, 00320 00324 MECAB_INSIDE_TOKEN = 2 00325 }; 00326 00327 /* C interface */ 00328 #ifdef __cplusplus 00329 #include <cstdio> 00330 #else 00331 #include <stdio.h> 00332 #endif 00333 00334 #ifdef __cplusplus 00335 extern "C" { 00336 #endif 00337 00338 #ifdef _WIN32 00339 #include <windows.h> 00340 # ifdef DLL_EXPORT 00341 # define MECAB_DLL_EXTERN __declspec(dllexport) 00342 # define MECAB_DLL_CLASS_EXTERN __declspec(dllexport) 00343 # else 00344 # define MECAB_DLL_EXTERN __declspec(dllimport) 00345 # endif 00346 #endif 00347 00348 #ifndef MECAB_DLL_EXTERN 00349 # define MECAB_DLL_EXTERN extern 00350 #endif 00351 00352 #ifndef MECAB_DLL_CLASS_EXTERN 00353 # define MECAB_DLL_CLASS_EXTERN 00354 #endif 00355 00356 typedef struct mecab_t mecab_t; 00357 typedef struct mecab_model_t mecab_model_t; 00358 typedef struct mecab_lattice_t mecab_lattice_t; 00359 typedef struct mecab_dictionary_info_t mecab_dictionary_info_t; 00360 typedef struct mecab_node_t mecab_node_t; 00361 typedef struct mecab_path_t mecab_path_t; 00362 00363 #ifndef SWIG 00364 /* C interface */ 00365 00366 /* old mecab interface */ 00370 MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); 00371 00375 MECAB_DLL_EXTERN mecab_t* mecab_new2(const char *arg); 00376 00380 MECAB_DLL_EXTERN const char* mecab_version(); 00381 00385 MECAB_DLL_EXTERN const char* mecab_strerror(mecab_t *mecab); 00386 00390 MECAB_DLL_EXTERN void mecab_destroy(mecab_t *mecab); 00391 00395 MECAB_DLL_EXTERN int mecab_get_partial(mecab_t *mecab); 00396 00400 MECAB_DLL_EXTERN void mecab_set_partial(mecab_t *mecab, int partial); 00401 00405 MECAB_DLL_EXTERN float mecab_get_theta(mecab_t *mecab); 00406 00410 MECAB_DLL_EXTERN void mecab_set_theta(mecab_t *mecab, float theta); 00411 00415 MECAB_DLL_EXTERN int mecab_get_lattice_level(mecab_t *mecab); 00416 00420 MECAB_DLL_EXTERN void mecab_set_lattice_level(mecab_t *mecab, int level); 00421 00425 MECAB_DLL_EXTERN int mecab_get_all_morphs(mecab_t *mecab); 00426 00430 MECAB_DLL_EXTERN void mecab_set_all_morphs(mecab_t *mecab, int all_morphs); 00431 00435 MECAB_DLL_EXTERN int mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice); 00436 00440 MECAB_DLL_EXTERN const char* mecab_sparse_tostr(mecab_t *mecab, const char *str); 00441 00445 MECAB_DLL_EXTERN const char* mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len); 00446 00450 MECAB_DLL_EXTERN char* mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len, 00451 char *ostr, size_t olen); 00452 00456 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*); 00457 00461 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t); 00462 00466 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str); 00467 00471 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N, 00472 const char *str, size_t len); 00473 00477 MECAB_DLL_EXTERN char* mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N, 00478 const char *str, size_t len, 00479 char *ostr, size_t olen); 00480 00484 MECAB_DLL_EXTERN int mecab_nbest_init(mecab_t *mecab, const char *str); 00485 00489 MECAB_DLL_EXTERN int mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len); 00490 00494 MECAB_DLL_EXTERN const char* mecab_nbest_next_tostr(mecab_t *mecab); 00495 00499 MECAB_DLL_EXTERN char* mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen); 00500 00504 MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab); 00505 00509 MECAB_DLL_EXTERN const char* mecab_format_node(mecab_t *mecab, const mecab_node_t *node); 00510 00514 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab); 00515 00516 /* lattice interface */ 00520 MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new(); 00521 00525 MECAB_DLL_EXTERN void mecab_lattice_destroy(mecab_lattice_t *lattice); 00526 00530 MECAB_DLL_EXTERN void mecab_lattice_clear(mecab_lattice_t *lattice); 00531 00536 MECAB_DLL_EXTERN int mecab_lattice_is_available(mecab_lattice_t *lattice); 00537 00541 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_bos_node(mecab_lattice_t *lattice); 00542 00546 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_eos_node(mecab_lattice_t *lattice); 00547 00552 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice); 00556 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice); 00557 00561 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos); 00562 00566 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos); 00567 00571 MECAB_DLL_EXTERN const char *mecab_lattice_get_sentence(mecab_lattice_t *lattice); 00572 00576 MECAB_DLL_EXTERN void mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence); 00577 00582 MECAB_DLL_EXTERN void mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len); 00583 00587 MECAB_DLL_EXTERN size_t mecab_lattice_get_size(mecab_lattice_t *lattice); 00588 00592 MECAB_DLL_EXTERN double mecab_lattice_get_z(mecab_lattice_t *lattice); 00593 00597 MECAB_DLL_EXTERN void mecab_lattice_set_z(mecab_lattice_t *lattice, double Z); 00598 00602 MECAB_DLL_EXTERN double mecab_lattice_get_theta(mecab_lattice_t *lattice); 00603 00608 MECAB_DLL_EXTERN void mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta); 00609 00613 MECAB_DLL_EXTERN int mecab_lattice_next(mecab_lattice_t *lattice); 00614 00618 MECAB_DLL_EXTERN int mecab_lattice_get_request_type(mecab_lattice_t *lattice); 00619 00623 MECAB_DLL_EXTERN int mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type); 00624 00628 MECAB_DLL_EXTERN void mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type); 00629 00634 MECAB_DLL_EXTERN void mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type); 00635 00639 MECAB_DLL_EXTERN void mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type); 00640 00644 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_new_node(mecab_lattice_t *lattice); 00645 00649 MECAB_DLL_EXTERN const char *mecab_lattice_tostr(mecab_lattice_t *lattice); 00650 00654 MECAB_DLL_EXTERN const char *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size); 00655 00659 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N); 00660 00665 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size); 00666 00670 MECAB_DLL_EXTERN int mecab_lattice_has_constraint(mecab_lattice_t *lattice); 00671 00675 MECAB_DLL_EXTERN int mecab_lattice_get_boundary_constraint(mecab_lattice_t *lattice, size_t pos); 00676 00677 00681 MECAB_DLL_EXTERN const char *mecab_lattice_get_feature_constraint(mecab_lattice_t *lattice, size_t pos); 00682 00686 MECAB_DLL_EXTERN void mecab_lattice_set_boundary_constraint(mecab_lattice_t *lattice, size_t pos, int boundary_type); 00687 00691 MECAB_DLL_EXTERN void mecab_lattice_set_feature_constraint(mecab_lattice_t *lattice, size_t begin_pos, size_t end_pos, const char *feature); 00692 00696 MECAB_DLL_EXTERN void mecab_lattice_set_result(mecab_lattice_t *lattice, const char *result); 00697 00701 MECAB_DLL_EXTERN const char *mecab_lattice_strerror(mecab_lattice_t *lattice); 00702 00703 00704 /* model interface */ 00708 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new(int argc, char **argv); 00709 00713 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new2(const char *arg); 00714 00719 MECAB_DLL_EXTERN void mecab_model_destroy(mecab_model_t *model); 00720 00724 MECAB_DLL_EXTERN mecab_t *mecab_model_new_tagger(mecab_model_t *model); 00725 00729 MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model); 00730 00734 MECAB_DLL_EXTERN int mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model); 00735 00739 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model); 00740 00744 MECAB_DLL_EXTERN int mecab_model_transition_cost(mecab_model_t *model, 00745 unsigned short rcAttr, 00746 unsigned short lcAttr); 00747 00751 MECAB_DLL_EXTERN mecab_node_t *mecab_model_lookup(mecab_model_t *model, 00752 const char *begin, 00753 const char *end, 00754 mecab_lattice_t *lattice); 00755 00756 /* static functions */ 00757 MECAB_DLL_EXTERN int mecab_do(int argc, char **argv); 00758 MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); 00759 MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); 00760 MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); 00761 MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); 00762 MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); 00763 #endif 00764 00765 #ifdef __cplusplus 00766 } 00767 #endif 00768 00769 /* C++ interface */ 00770 #ifdef __cplusplus 00771 00772 namespace MeCab { 00773 typedef struct mecab_dictionary_info_t DictionaryInfo; 00774 typedef struct mecab_path_t Path; 00775 typedef struct mecab_node_t Node; 00776 00777 template <typename N, typename P> class Allocator; 00778 class Tagger; 00779 00783 class MECAB_DLL_CLASS_EXTERN Lattice { 00784 public: 00788 virtual void clear() = 0; 00789 00794 virtual bool is_available() const = 0; 00795 00801 virtual Node *bos_node() const = 0; 00802 00807 virtual Node *eos_node() const = 0; 00808 00809 #ifndef SWIG 00810 00813 virtual Node **begin_nodes() const = 0; 00814 00818 virtual Node **end_nodes() const = 0; 00819 #endif 00820 00827 virtual Node *end_nodes(size_t pos) const = 0; 00828 00835 virtual Node *begin_nodes(size_t pos) const = 0; 00836 00842 virtual const char *sentence() const = 0; 00843 00848 virtual void set_sentence(const char *sentence) = 0; 00849 00850 #ifndef SWIG 00851 00856 virtual void set_sentence(const char *sentence, size_t len) = 0; 00857 #endif 00858 00863 virtual size_t size() const = 0; 00864 00869 virtual void set_Z(double Z) = 0; 00870 00875 virtual double Z() const = 0; 00876 00881 virtual void set_theta(float theta) = 0; 00882 00887 virtual float theta() const = 0; 00888 00895 virtual bool next() = 0; 00896 00901 virtual int request_type() const = 0; 00902 00907 virtual bool has_request_type(int request_type) const = 0; 00908 00913 virtual void set_request_type(int request_type) = 0; 00914 00919 virtual void add_request_type(int request_type) = 0; 00920 00925 virtual void remove_request_type(int request_type) = 0; 00926 00927 #ifndef SWIG 00928 00931 virtual Allocator<Node, Path> *allocator() const = 0; 00932 #endif 00933 00938 virtual Node *newNode() = 0; 00939 00946 virtual const char *toString() = 0; 00947 00955 virtual const char *toString(const Node *node) = 0; 00956 00964 virtual const char *enumNBestAsString(size_t N) = 0; 00965 00966 #ifndef SWIG 00967 00974 virtual const char *toString(char *buf, size_t size) = 0; 00975 00984 virtual const char *toString(const Node *node, 00985 char *buf, size_t size) = 0; 00986 00995 virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0; 00996 #endif 00997 01001 virtual bool has_constraint() const = 0; 01002 01008 virtual int boundary_constraint(size_t pos) const = 0; 01009 01015 virtual const char *feature_constraint(size_t pos) const = 0; 01016 01022 virtual void set_boundary_constraint(size_t pos, 01023 int boundary_constraint_type) = 0; 01024 01031 virtual void set_feature_constraint( 01032 size_t begin_pos, size_t end_pos, 01033 const char *feature) = 0; 01034 01039 virtual void set_result(const char *result) = 0; 01040 01045 virtual const char *what() const = 0; 01046 01051 virtual void set_what(const char *str) = 0; 01052 01053 #ifndef SWIG 01054 01058 static Lattice *create(); 01059 #endif 01060 01061 virtual ~Lattice() {} 01062 }; 01063 01067 class MECAB_DLL_CLASS_EXTERN Model { 01068 public: 01073 virtual const DictionaryInfo *dictionary_info() const = 0; 01074 01079 virtual int transition_cost(unsigned short rcAttr, 01080 unsigned short lcAttr) const = 0; 01081 01087 virtual Node *lookup(const char *begin, const char *end, 01088 Lattice *lattice) const = 0; 01089 01096 virtual Tagger *createTagger() const = 0; 01097 01102 virtual Lattice *createLattice() const = 0; 01103 01115 virtual bool swap(Model *model) = 0; 01116 01121 static const char *version(); 01122 01123 virtual ~Model() {} 01124 01125 #ifndef SIWG 01126 01134 static Model* create(int argc, char **argv); 01135 01144 static Model* create(const char *arg); 01145 #endif 01146 }; 01147 01151 class MECAB_DLL_CLASS_EXTERN Tagger { 01152 public: 01165 static bool parse(const Model &model, Lattice *lattice); 01166 01176 virtual bool parse(Lattice *lattice) const = 0; 01177 01186 virtual const char* parse(const char *str) = 0; 01187 01197 virtual const Node* parseToNode(const char *str) = 0; 01198 01209 virtual const char* parseNBest(size_t N, const char *str) = 0; 01210 01220 virtual bool parseNBestInit(const char *str) = 0; 01221 01229 virtual const Node* nextNode() = 0; 01230 01238 virtual const char* next() = 0; 01239 01250 virtual const char* formatNode(const Node *node) = 0; 01251 01252 #ifndef SWIG 01253 01263 virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0; 01264 01271 virtual const char* parse(const char *str, size_t len) = 0; 01272 01279 virtual const Node* parseToNode(const char *str, size_t len) = 0; 01280 01288 virtual const char* parseNBest(size_t N, const char *str, size_t len) = 0; 01289 01297 virtual bool parseNBestInit(const char *str, size_t len) = 0; 01298 01306 virtual const char* next(char *ostr , size_t olen) = 0; 01307 01318 virtual const char* parseNBest(size_t N, const char *str, 01319 size_t len, char *ostr, size_t olen) = 0; 01320 01329 virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0; 01330 #endif 01331 01337 virtual void set_request_type(int request_type) = 0; 01338 01344 virtual int request_type() const = 0; 01345 01351 virtual bool partial() const = 0; 01352 01358 virtual void set_partial(bool partial) = 0; 01359 01365 virtual int lattice_level() const = 0; 01366 01372 virtual void set_lattice_level(int level) = 0; 01373 01379 virtual bool all_morphs() const = 0; 01380 01386 virtual void set_all_morphs(bool all_morphs) = 0; 01387 01392 virtual void set_theta(float theta) = 0; 01393 01398 virtual float theta() const = 0; 01399 01404 virtual const DictionaryInfo* dictionary_info() const = 0; 01405 01410 virtual const char* what() const = 0; 01411 01412 virtual ~Tagger() {} 01413 01414 #ifndef SIWG 01415 01423 static Tagger *create(int argc, char **argv); 01424 01433 static Tagger *create(const char *arg); 01434 #endif 01435 01440 static const char *version(); 01441 }; 01442 01443 #ifndef SWIG 01444 01447 MECAB_DLL_EXTERN Lattice *createLattice(); 01448 01452 MECAB_DLL_EXTERN Model *createModel(int argc, char **argv); 01453 01457 MECAB_DLL_EXTERN Model *createModel(const char *arg); 01458 01462 MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv); 01463 01467 MECAB_DLL_EXTERN Tagger *createTagger(const char *arg); 01468 01475 MECAB_DLL_EXTERN void deleteLattice(Lattice *lattice); 01476 01477 01484 MECAB_DLL_EXTERN void deleteModel(Model *model); 01485 01492 MECAB_DLL_EXTERN void deleteTagger(Tagger *tagger); 01493 01498 MECAB_DLL_EXTERN const char* getLastError(); 01499 01505 MECAB_DLL_EXTERN const char* getTaggerError(); 01506 #endif 01507 } 01508 #endif 01509 #endif /* MECAB_MECAB_H_ */