MeCab
/home/taku/proj/mecab/mecab/src/mecab.h
Go to the documentation of this file.
00001 /*
00002   MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
00003 
00004   Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
00005   Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
00006 */
00007 #ifndef MECAB_MECAB_H_
00008 #define MECAB_MECAB_H_
00009 
00010 /* C/C++ common data structures  */
00011 
00015 struct mecab_dictionary_info_t {
00020   const char                     *filename;
00021 
00025   const char                     *charset;
00026 
00030   unsigned int                    size;
00031 
00036   int                             type;
00037 
00041   unsigned int                    lsize;
00042 
00046   unsigned int                    rsize;
00047 
00051   unsigned short                  version;
00052 
00056   struct mecab_dictionary_info_t *next;
00057 };
00058 
00062 struct mecab_path_t {
00066   struct mecab_node_t* rnode;
00067 
00071   struct mecab_path_t* rnext;
00072 
00076   struct mecab_node_t* lnode;
00077 
00082   struct mecab_path_t* lnext;
00083 
00087   int                  cost;
00088 
00092   float                prob;
00093 };
00094 
00098 struct mecab_node_t {
00102   struct mecab_node_t  *prev;
00103 
00107   struct mecab_node_t  *next;
00108 
00112   struct mecab_node_t  *enext;
00113 
00117   struct mecab_node_t  *bnext;
00118 
00123   struct mecab_path_t  *rpath;
00124 
00129   struct mecab_path_t  *lpath;
00130 
00136   const char           *surface;
00137 
00141   const char           *feature;
00142 
00146   unsigned int          id;
00147 
00151   unsigned short        length;
00152 
00156   unsigned short        rlength;
00157 
00161   unsigned short        rcAttr;
00162 
00166   unsigned short        lcAttr;
00167 
00171   unsigned short        posid;
00172 
00176   unsigned char         char_type;
00177 
00182   unsigned char         stat;
00183 
00187   unsigned char         isbest;
00188 
00193   float                 alpha;
00194 
00199   float                 beta;
00200 
00205   float                 prob;
00206 
00210   short                 wcost;
00211 
00215   long                  cost;
00216 };
00217 
00221 enum {
00225   MECAB_NOR_NODE = 0,
00229   MECAB_UNK_NODE = 1,
00233   MECAB_BOS_NODE = 2,
00237   MECAB_EOS_NODE = 3,
00238 
00242   MECAB_EON_NODE = 4
00243 };
00244 
00248 enum {
00252   MECAB_SYS_DIC = 0,
00253 
00257   MECAB_USR_DIC = 1,
00258 
00262   MECAB_UNK_DIC = 2
00263 };
00264 
00268 enum {
00272   MECAB_ONE_BEST          = 1,
00276   MECAB_NBEST             = 2,
00282   MECAB_PARTIAL           = 4,
00288   MECAB_MARGINAL_PROB     = 8,
00293   MECAB_ALTERNATIVE       = 16,
00298   MECAB_ALL_MORPHS        = 32,
00299 
00304   MECAB_ALLOCATE_SENTENCE = 64
00305 };
00306 
00310 enum {
00314   MECAB_ANY_BOUNDARY = 0,
00315 
00319   MECAB_TOKEN_BOUNDARY = 1,
00320 
00324   MECAB_INSIDE_TOKEN = 2
00325 };
00326 
00327 /* C interface  */
00328 #ifdef __cplusplus
00329 #include <cstdio>
00330 #else
00331 #include <stdio.h>
00332 #endif
00333 
00334 #ifdef __cplusplus
00335 extern "C" {
00336 #endif
00337 
00338 #ifdef _WIN32
00339 #include <windows.h>
00340 #  ifdef DLL_EXPORT
00341 #    define MECAB_DLL_EXTERN  __declspec(dllexport)
00342 #    define MECAB_DLL_CLASS_EXTERN  __declspec(dllexport)
00343 #  else
00344 #    define MECAB_DLL_EXTERN  __declspec(dllimport)
00345 #  endif
00346 #endif
00347 
00348 #ifndef MECAB_DLL_EXTERN
00349 #  define MECAB_DLL_EXTERN extern
00350 #endif
00351 
00352 #ifndef MECAB_DLL_CLASS_EXTERN
00353 #  define MECAB_DLL_CLASS_EXTERN
00354 #endif
00355 
00356   typedef struct mecab_t                 mecab_t;
00357   typedef struct mecab_model_t           mecab_model_t;
00358   typedef struct mecab_lattice_t         mecab_lattice_t;
00359   typedef struct mecab_dictionary_info_t mecab_dictionary_info_t;
00360   typedef struct mecab_node_t            mecab_node_t;
00361   typedef struct mecab_path_t            mecab_path_t;
00362 
00363 #ifndef SWIG
00364   /* C interface */
00365 
00366   /* old mecab interface */
00370   MECAB_DLL_EXTERN mecab_t*      mecab_new(int argc, char **argv);
00371 
00375   MECAB_DLL_EXTERN mecab_t*      mecab_new2(const char *arg);
00376 
00380   MECAB_DLL_EXTERN const char*   mecab_version();
00381 
00385   MECAB_DLL_EXTERN const char*   mecab_strerror(mecab_t *mecab);
00386 
00390   MECAB_DLL_EXTERN void          mecab_destroy(mecab_t *mecab);
00391 
00395   MECAB_DLL_EXTERN int           mecab_get_partial(mecab_t *mecab);
00396 
00400   MECAB_DLL_EXTERN void          mecab_set_partial(mecab_t *mecab, int partial);
00401 
00405   MECAB_DLL_EXTERN float         mecab_get_theta(mecab_t *mecab);
00406 
00410   MECAB_DLL_EXTERN void          mecab_set_theta(mecab_t *mecab, float theta);
00411 
00415   MECAB_DLL_EXTERN int           mecab_get_lattice_level(mecab_t *mecab);
00416 
00420   MECAB_DLL_EXTERN void          mecab_set_lattice_level(mecab_t *mecab, int level);
00421 
00425   MECAB_DLL_EXTERN int           mecab_get_all_morphs(mecab_t *mecab);
00426 
00430   MECAB_DLL_EXTERN void          mecab_set_all_morphs(mecab_t *mecab, int all_morphs);
00431 
00435   MECAB_DLL_EXTERN int           mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice);
00436 
00440   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr(mecab_t *mecab, const char *str);
00441 
00445   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len);
00446 
00450   MECAB_DLL_EXTERN char*         mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len,
00451                                                      char *ostr, size_t olen);
00452 
00456   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*);
00457 
00461   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t);
00462 
00466   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str);
00467 
00471   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N,
00472                                                            const char *str, size_t len);
00473 
00477   MECAB_DLL_EXTERN char*         mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N,
00478                                                            const char *str, size_t len,
00479                                                            char *ostr, size_t olen);
00480 
00484   MECAB_DLL_EXTERN int           mecab_nbest_init(mecab_t *mecab, const char *str);
00485 
00489   MECAB_DLL_EXTERN int           mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len);
00490 
00494   MECAB_DLL_EXTERN const char*   mecab_nbest_next_tostr(mecab_t *mecab);
00495 
00499   MECAB_DLL_EXTERN char*         mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen);
00500 
00504   MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab);
00505 
00509   MECAB_DLL_EXTERN const char*   mecab_format_node(mecab_t *mecab, const mecab_node_t *node);
00510 
00514   MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab);
00515 
00516   /* lattice interface */
00520   MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new();
00521 
00525   MECAB_DLL_EXTERN void             mecab_lattice_destroy(mecab_lattice_t *lattice);
00526 
00530   MECAB_DLL_EXTERN void             mecab_lattice_clear(mecab_lattice_t *lattice);
00531 
00536   MECAB_DLL_EXTERN int              mecab_lattice_is_available(mecab_lattice_t *lattice);
00537 
00541   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_bos_node(mecab_lattice_t *lattice);
00542 
00546   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_eos_node(mecab_lattice_t *lattice);
00547 
00552   MECAB_DLL_EXTERN mecab_node_t   **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice);
00556   MECAB_DLL_EXTERN mecab_node_t   **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice);
00557 
00561   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos);
00562 
00566   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos);
00567 
00571   MECAB_DLL_EXTERN const char      *mecab_lattice_get_sentence(mecab_lattice_t *lattice);
00572 
00576   MECAB_DLL_EXTERN void             mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence);
00577 
00582   MECAB_DLL_EXTERN void             mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len);
00583 
00587   MECAB_DLL_EXTERN size_t           mecab_lattice_get_size(mecab_lattice_t *lattice);
00588 
00592   MECAB_DLL_EXTERN double           mecab_lattice_get_z(mecab_lattice_t *lattice);
00593 
00597   MECAB_DLL_EXTERN void             mecab_lattice_set_z(mecab_lattice_t *lattice, double Z);
00598 
00602   MECAB_DLL_EXTERN double           mecab_lattice_get_theta(mecab_lattice_t *lattice);
00603 
00608   MECAB_DLL_EXTERN void             mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta);
00609 
00613   MECAB_DLL_EXTERN int              mecab_lattice_next(mecab_lattice_t *lattice);
00614 
00618   MECAB_DLL_EXTERN int              mecab_lattice_get_request_type(mecab_lattice_t *lattice);
00619 
00623   MECAB_DLL_EXTERN int              mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type);
00624 
00628   MECAB_DLL_EXTERN void             mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type);
00629 
00634   MECAB_DLL_EXTERN void             mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type);
00635 
00639   MECAB_DLL_EXTERN void             mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type);
00640 
00644   MECAB_DLL_EXTERN mecab_node_t    *mecab_lattice_new_node(mecab_lattice_t *lattice);
00645 
00649   MECAB_DLL_EXTERN const char      *mecab_lattice_tostr(mecab_lattice_t *lattice);
00650 
00654   MECAB_DLL_EXTERN const char      *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size);
00655 
00659   MECAB_DLL_EXTERN const char      *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N);
00660 
00665   MECAB_DLL_EXTERN const char      *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size);
00666 
00670   MECAB_DLL_EXTERN int             mecab_lattice_has_constraint(mecab_lattice_t *lattice);
00671 
00675   MECAB_DLL_EXTERN int             mecab_lattice_get_boundary_constraint(mecab_lattice_t *lattice, size_t pos);
00676 
00677 
00681   MECAB_DLL_EXTERN const char     *mecab_lattice_get_feature_constraint(mecab_lattice_t *lattice, size_t pos);
00682 
00686   MECAB_DLL_EXTERN void            mecab_lattice_set_boundary_constraint(mecab_lattice_t *lattice, size_t pos, int boundary_type);
00687 
00691   MECAB_DLL_EXTERN void            mecab_lattice_set_feature_constraint(mecab_lattice_t *lattice, size_t begin_pos, size_t end_pos, const char *feature);
00692 
00696   MECAB_DLL_EXTERN void            mecab_lattice_set_result(mecab_lattice_t *lattice, const char *result);
00697 
00701   MECAB_DLL_EXTERN const char      *mecab_lattice_strerror(mecab_lattice_t *lattice);
00702 
00703 
00704   /* model interface */
00708   MECAB_DLL_EXTERN mecab_model_t   *mecab_model_new(int argc, char **argv);
00709 
00713   MECAB_DLL_EXTERN mecab_model_t   *mecab_model_new2(const char *arg);
00714 
00719   MECAB_DLL_EXTERN void             mecab_model_destroy(mecab_model_t *model);
00720 
00724   MECAB_DLL_EXTERN mecab_t         *mecab_model_new_tagger(mecab_model_t *model);
00725 
00729   MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model);
00730 
00734   MECAB_DLL_EXTERN int mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model);
00735 
00739   MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model);
00740 
00744   MECAB_DLL_EXTERN int mecab_model_transition_cost(mecab_model_t *model,
00745                                                    unsigned short rcAttr,
00746                                                    unsigned short lcAttr);
00747 
00751   MECAB_DLL_EXTERN mecab_node_t *mecab_model_lookup(mecab_model_t *model,
00752                                                     const char *begin,
00753                                                     const char *end,
00754                                                     mecab_lattice_t *lattice);
00755 
00756   /* static functions */
00757   MECAB_DLL_EXTERN int           mecab_do(int argc, char **argv);
00758   MECAB_DLL_EXTERN int           mecab_dict_index(int argc, char **argv);
00759   MECAB_DLL_EXTERN int           mecab_dict_gen(int argc, char **argv);
00760   MECAB_DLL_EXTERN int           mecab_cost_train(int argc, char **argv);
00761   MECAB_DLL_EXTERN int           mecab_system_eval(int argc, char **argv);
00762   MECAB_DLL_EXTERN int           mecab_test_gen(int argc, char **argv);
00763 #endif
00764 
00765 #ifdef __cplusplus
00766 }
00767 #endif
00768 
00769 /* C++ interface */
00770 #ifdef __cplusplus
00771 
00772 namespace MeCab {
00773 typedef struct mecab_dictionary_info_t DictionaryInfo;
00774 typedef struct mecab_path_t            Path;
00775 typedef struct mecab_node_t            Node;
00776 
00777 template <typename N, typename P> class Allocator;
00778 class Tagger;
00779 
00783 class MECAB_DLL_CLASS_EXTERN Lattice {
00784 public:
00788   virtual void clear()              = 0;
00789 
00794   virtual bool is_available() const = 0;
00795 
00801   virtual Node *bos_node() const              = 0;
00802 
00807   virtual Node *eos_node() const              = 0;
00808 
00809 #ifndef SWIG
00810 
00813   virtual Node **begin_nodes() const          = 0;
00814 
00818   virtual Node **end_nodes() const            = 0;
00819 #endif
00820 
00827   virtual Node *end_nodes(size_t pos) const   = 0;
00828 
00835   virtual Node *begin_nodes(size_t pos) const = 0;
00836 
00842   virtual const char *sentence() const = 0;
00843 
00848   virtual void set_sentence(const char *sentence)             = 0;
00849 
00850 #ifndef SWIG
00851 
00856   virtual void set_sentence(const char *sentence, size_t len) = 0;
00857 #endif
00858 
00863   virtual size_t size() const                                 = 0;
00864 
00869   virtual void   set_Z(double Z) = 0;
00870 
00875   virtual double Z() const = 0;
00876 
00881   virtual void  set_theta(float theta) = 0;
00882 
00887   virtual float theta() const          = 0;
00888 
00895   virtual bool next() = 0;
00896 
00901   virtual int request_type() const                = 0;
00902 
00907   virtual bool has_request_type(int request_type) const = 0;
00908 
00913   virtual void set_request_type(int request_type) = 0;
00914 
00919   virtual void add_request_type(int request_type) = 0;
00920 
00925   virtual void remove_request_type(int request_type) = 0;
00926 
00927 #ifndef SWIG
00928 
00931   virtual Allocator<Node, Path> *allocator() const = 0;
00932 #endif
00933 
00938   virtual Node *newNode() = 0;
00939 
00946   virtual const char *toString()                = 0;
00947 
00955   virtual const char *toString(const Node *node) = 0;
00956 
00964   virtual const char *enumNBestAsString(size_t N) = 0;
00965 
00966 #ifndef SWIG
00967 
00974   virtual const char *toString(char *buf, size_t size) = 0;
00975 
00984   virtual const char *toString(const Node *node,
00985                                char *buf, size_t size) = 0;
00986 
00995   virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0;
00996 #endif
00997 
01001   virtual bool has_constraint() const = 0;
01002 
01008   virtual int boundary_constraint(size_t pos) const = 0;
01009 
01015   virtual const char *feature_constraint(size_t pos) const = 0;
01016 
01022   virtual void set_boundary_constraint(size_t pos,
01023                                        int boundary_constraint_type) = 0;
01024 
01031   virtual void set_feature_constraint(
01032       size_t begin_pos, size_t end_pos,
01033       const char *feature) = 0;
01034 
01039   virtual void set_result(const char *result) = 0;
01040 
01045   virtual const char *what() const            = 0;
01046 
01051   virtual void set_what(const char *str)        = 0;
01052 
01053 #ifndef SWIG
01054 
01058   static Lattice *create();
01059 #endif
01060 
01061   virtual ~Lattice() {}
01062 };
01063 
01067 class MECAB_DLL_CLASS_EXTERN Model {
01068 public:
01073   virtual const DictionaryInfo *dictionary_info() const = 0;
01074 
01079   virtual int transition_cost(unsigned short rcAttr,
01080                               unsigned short lcAttr) const = 0;
01081 
01087   virtual Node *lookup(const char *begin, const char *end,
01088                        Lattice *lattice) const = 0;
01089 
01096   virtual Tagger  *createTagger() const = 0;
01097 
01102   virtual Lattice *createLattice() const = 0;
01103 
01115   virtual bool swap(Model *model) = 0;
01116 
01121   static const char *version();
01122 
01123   virtual ~Model() {}
01124 
01125 #ifndef SIWG
01126 
01134   static Model* create(int argc, char **argv);
01135 
01144   static Model* create(const char *arg);
01145 #endif
01146 };
01147 
01151 class MECAB_DLL_CLASS_EXTERN Tagger {
01152 public:
01165   static bool  parse(const Model &model, Lattice *lattice);
01166 
01176   virtual bool parse(Lattice *lattice) const                = 0;
01177 
01186   virtual const char* parse(const char *str)                = 0;
01187 
01197   virtual const Node* parseToNode(const char *str)          = 0;
01198 
01209   virtual const char* parseNBest(size_t N, const char *str) = 0;
01210 
01220   virtual bool  parseNBestInit(const char *str)             = 0;
01221 
01229   virtual const Node* nextNode()                            = 0;
01230 
01238   virtual const char* next()                                = 0;
01239 
01250   virtual const char* formatNode(const Node *node)          = 0;
01251 
01252 #ifndef SWIG
01253 
01263   virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0;
01264 
01271   virtual const char* parse(const char *str, size_t len)                          = 0;
01272 
01279   virtual const Node* parseToNode(const char *str, size_t len)                    = 0;
01280 
01288   virtual const char* parseNBest(size_t N, const char *str, size_t len)           = 0;
01289 
01297   virtual bool  parseNBestInit(const char *str, size_t len)                  = 0;
01298 
01306   virtual const char* next(char *ostr , size_t olen)                        = 0;
01307 
01318   virtual const char* parseNBest(size_t N, const char *str,
01319                                  size_t len, char *ostr, size_t olen)       = 0;
01320 
01329   virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0;
01330 #endif
01331 
01337   virtual void set_request_type(int request_type) = 0;
01338 
01344   virtual int  request_type() const = 0;
01345 
01351   virtual bool  partial() const                             = 0;
01352 
01358   virtual void  set_partial(bool partial)                   = 0;
01359 
01365   virtual int   lattice_level() const                       = 0;
01366 
01372   virtual void  set_lattice_level(int level)                = 0;
01373 
01379   virtual bool  all_morphs() const                          = 0;
01380 
01386   virtual void  set_all_morphs(bool all_morphs)             = 0;
01387 
01392   virtual void  set_theta(float theta)                      = 0;
01393 
01398   virtual float theta() const                               = 0;
01399 
01404   virtual const DictionaryInfo* dictionary_info() const = 0;
01405 
01410   virtual const char* what() const = 0;
01411 
01412   virtual ~Tagger() {}
01413 
01414 #ifndef SIWG
01415 
01423   static Tagger *create(int argc, char **argv);
01424 
01433   static Tagger *create(const char *arg);
01434 #endif
01435 
01440   static const char *version();
01441 };
01442 
01443 #ifndef SWIG
01444 
01447 MECAB_DLL_EXTERN Lattice     *createLattice();
01448 
01452 MECAB_DLL_EXTERN Model       *createModel(int argc, char **argv);
01453 
01457 MECAB_DLL_EXTERN Model       *createModel(const char *arg);
01458 
01462 MECAB_DLL_EXTERN Tagger      *createTagger(int argc, char **argv);
01463 
01467 MECAB_DLL_EXTERN Tagger      *createTagger(const char *arg);
01468 
01475 MECAB_DLL_EXTERN void        deleteLattice(Lattice *lattice);
01476 
01477 
01484 MECAB_DLL_EXTERN void        deleteModel(Model *model);
01485 
01492 MECAB_DLL_EXTERN void        deleteTagger(Tagger *tagger);
01493 
01498 MECAB_DLL_EXTERN const char*  getLastError();
01499 
01505 MECAB_DLL_EXTERN const char*  getTaggerError();
01506 #endif
01507 }
01508 #endif
01509 #endif  /* MECAB_MECAB_H_ */