Full Code of HIT-SCIR/ltp4j for AI

master 548bd38dffaa cached
39 files
84.0 KB
26.0k tokens
70 symbols
1 requests
Download .txt
Repository: HIT-SCIR/ltp4j
Branch: master
Commit: 548bd38dffaa
Files: 39
Total size: 84.0 KB

Directory structure:
gitextract_trfcfd8n/

├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── README.md
├── aol.properties
├── appveyor.yml
├── doc/
│   ├── api.rst
│   ├── background.rst
│   ├── conf.py
│   ├── index.rst
│   ├── install.rst
│   ├── ltp4j-document-1.0.md
│   ├── make.bat
│   ├── requirements.txt
│   └── run.rst
├── examples/
│   ├── Console.java
│   └── example
├── pom.xml
└── src/
    ├── main/
    │   ├── c++/
    │   │   ├── edu_hit_ir_ltp4j_NER.cpp
    │   │   ├── edu_hit_ir_ltp4j_Parser.cpp
    │   │   ├── edu_hit_ir_ltp4j_Postagger.cpp
    │   │   ├── edu_hit_ir_ltp4j_SRL.cpp
    │   │   ├── edu_hit_ir_ltp4j_Segmentor.cpp
    │   │   ├── edu_hit_ir_ltp4j_SplitSentence.cpp
    │   │   └── string_to_jstring.hpp
    │   └── java/
    │       └── edu/
    │           └── hit/
    │               └── ir/
    │                   └── ltp4j/
    │                       ├── NER.java
    │                       ├── Pair.java
    │                       ├── Parser.java
    │                       ├── Postagger.java
    │                       ├── SRL.java
    │                       ├── Segmentor.java
    │                       └── SplitSentence.java
    └── test/
        ├── c++/
        │   └── main.cpp
        └── java/
            └── edu/
                └── hit/
                    └── ir/
                        └── ltp4j/
                            └── test/
                                ├── NERTest.java
                                ├── ParserTest.java
                                ├── PostaggerTest.java
                                ├── SRLTest.java
                                └── SegmentorTest.java

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================

###########
# data    #
###########
ltp_data

###########
# outputs #
###########
output
CMakeFiles
cmake_install.cmake
CmakeCache.txt
Makefile
libs
target

###########
# IDEs    #
###########
.idea


================================================
FILE: .gitmodules
================================================
[submodule "src/main/c++/ltp"]
	path = src/main/c++/ltp
	url = https://github.com/HIT-SCIR/ltp.git


================================================
FILE: .travis.yml
================================================
language:
    - cpp
    - java

os:
    - linux
    - osx

before_script:
    - if [[ "$TRAVIS_OS_NAME" == "osx" && -z "$JAVA_HOME" && -x "/usr/libexec/java_home" ]] ; then export JAVA_HOME=$(/usr/libexec/java_home); fi
    - git submodule init
    - git submodule update

script:
    - mvn -Dmaven.test.skip=true


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.8.0)
project ("ltp4j")

find_package(JNI)
set (LTP_HOME "${PROJECT_SOURCE_DIR}/ltp" CACHE STRING "Use to specified ltp path") # change it to your ltp root
set (LIBRARY_OUTPUT_PATH    ${PROJECT_SOURCE_DIR}/libs)
set (JNI_SOURCE_DIR         ${PROJECT_SOURCE_DIR}/jni)

include_directories (
    ${JNI_SOURCE_DIR}
    ${LTP_HOME}/include
    ${JNI_INCLUDE_DIRS})

#if(WIN32)
#    include_directories ($ENV{JAVA_HOME}/include/win32)
#else(WIN32)
#    include_directories ($ENV{JAVA_HOME}/include/linux)
#endif(WIN32)

if(APPLE)
    set(CMAKE_CXX_FLAGS "-stdlib=libstdc++")
endif(APPLE)

if(WIN32)
    link_directories (${LTP_HOME}/lib/Debug)
else(WIN32)
    link_directories (${LTP_HOME}/lib)
endif(WIN32)

add_library (split_sentence_jni SHARED
    ${JNI_SOURCE_DIR}/split_sentence_jni.cpp)
target_link_libraries (split_sentence_jni splitsnt)

add_library (segmentor_jni SHARED ${JNI_SOURCE_DIR}/segment_jni.cpp)
target_link_libraries (segmentor_jni segmentor)

add_library(postagger_jni SHARED ${JNI_SOURCE_DIR}/postag_jni.cpp)
target_link_libraries (postagger_jni postagger)

add_library (ner_jni SHARED ${JNI_SOURCE_DIR}/ner_jni.cpp)
target_link_libraries (ner_jni ner)

add_library (parser_jni SHARED ${JNI_SOURCE_DIR}/parser_jni.cpp)
target_link_libraries (parser_jni parser)

add_library (srl_jni SHARED ${JNI_SOURCE_DIR}/srl_jni.cpp)
target_link_libraries (srl_jni srl)

# -----------------------------------------------
# TOOLKIT


================================================
FILE: README.md
================================================
ltp4j<sup>beta</sup>: Language Technology Platform For Java
============================================

[![Build Status](https://travis-ci.org/HIT-SCIR/ltp4j.svg?branch=integrate-test)](https://travis-ci.org/HIT-SCIR/ltp4j)
[![Documentation Status](https://readthedocs.org/projects/ltp/badge/?version=latest)](http://ltp4j.readthedocs.org/en/neoltp4j/?badge=neoltp4j)

# LTP 4.0
LTP 4.0 的支持请移步[libltp](https://github.com/HIT-SCIR/libltp)仓库

# 更新

1. ltp4j 现已经更新对 LTP 3.4.0的支持。
2. 项目改用 maven 构建、编译。具体使用方法参见文档。

# 简介

ltp4j是语言技术平台[(Language Technology Platform, LTP)](https://github.com/HIT-SCIR/ltp)接口的一个Java封装。
本项目旨在使Java用户可以本地调用LTP。
本项目仍在开发之中,欢迎反馈bug。

# 文档

请参考在线文档:[ltp4j使用文档](http://ltp4j.readthedocs.io)



================================================
FILE: aol.properties
================================================
amd64.Windows.msvc.cpp.defines=Windows WIN32 _WINDOWS NOMINMAX BOOST_ALL_NO_LIB
amd64.Windows.msvc.c.defines=Windows WIN32 _WINDOWS NOMINMAX BOOST_ALL_NO_LIB



================================================
FILE: appveyor.yml
================================================
version: '{build}'

image:
  - Visual Studio 2015
  - Visual Studio 2017

platform: x64

install:
  - ps: |
      Add-Type -AssemblyName System.IO.Compression.FileSystem
      if (!(Test-Path -Path "C:\maven" )) {
        (new-object System.Net.WebClient).DownloadFile(
          'http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.zip',
          'C:\maven-bin.zip'
        )
        [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven")
      }
  - cmd: SET PATH=C:\maven\apache-maven-3.3.9\bin;%JAVA_HOME%\bin;%PATH%
  - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g
  - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g

build_script:
  - git submodule init
  - git submodule update
  - mvn -Dmaven.test.skip=true

cache:
  - C:\maven\
  - C:\Users\appveyor\.m2


================================================
FILE: doc/api.rst
================================================
编程接口
========

.. java:package:: edu.hit.ir.ltp4j

分词接口
--------

.. java:type:: public class Segmentor

分词主要提供三个接口:

.. java:method:: public final native int create(String modelPath)

    功能:

    读取模型文件,初始化分词器。

    参数:

    +---------------------+------------------------------------------------------------+
    | 参数名              | 参数描述                                                   |
    +=====================+============================================================+
    | String modelPath    | 指定模型文件的路径                                         |
    +---------------------+------------------------------------------------------------+
   

.. java:method:: public final native void release()

    功能:

    释放模型文件,销毁分词器。

.. java:method:: public final native int segment(String sent, List<String> words)

    功能:

    调用分词接口。

    参数:

    +---------------------+------------------------------------------------------------+
    | 参数名              | 参数描述                                                   |
    +=====================+============================================================+
    | String sent         | 待分词句子                                                 |
    +---------------------+------------------------------------------------------------+
    | List<String> words  | 结果分词序列                                               |
    +---------------------+------------------------------------------------------------+


**示例程序**

.. code:: java

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestSegment {
      public static void main(String[] args) {
        if(Segmentor.create("../../../ltp_data/cws.model")<0){
          System.err.println("load failed");
          return;
        }

        String sent = "我是中国人";
        List<String> words = new ArrayList<String>();
        int size = Segmentor.segment(sent,words);

        for(int i = 0; i<size; i++) {
          System.out.print(words.get(i));
          if(i==size-1) {
            System.out.println();
          } else{  
            System.out.print("\t");
          }
        }
        Segmentor.release();
      }
    }


词性标注接口
--------------

词性标注主要提供四个接口

.. java:method:: public final native int create(String modelPath)

    功能:

    读取模型文件,初始化词性标注器

    参数:

    +---------------------------------+--------------------------------------------------------------------+
    | 参数名                          | 参数描述                                                           |
    +=================================+====================================================================+
    | String modelPath                | 词性标注模型路径                                                   |
    +---------------------------------+--------------------------------------------------------------------+
    
    
.. java:method:: public final native int create(String modelPath, String lexiconPath)

    功能:

    读取模型文件,初始化词性标注器

    参数:
        
    +---------------------------+---------------------------+
    | 参数名                    | 参数描述                  |
    +===========================+===========================+
    | String modelPath          | 词性标注模型路径          |
    +---------------------------+---------------------------+
    | String lexiconPath        | 指定词性标注外部词典路径。|
    +---------------------------+---------------------------+
    lexiconPath参数指定的外部词典文件样例如下所示。每行指定一个词,第一列指定单词,第二列之后指定该词的候选词性(可以有多项,每一项占一列),列与列之间用空格区分::

        雷人 v a
        】 wp

        
.. java:method:: public final native void release()

    功能:

    释放模型文件,销毁分词器。


.. java:method:: public final native int postag(List<String> words, List<String> tags)

    功能:

    调用词性标注接口

    参数:

    +--------------------+--------------------------------------------------------------------+
    | 参数名             | 参数描述                                                           |
    +====================+====================================================================+
    | List<String> words | 待标注的词序列                                                     |
    +--------------------+--------------------------------------------------------------------+
    | List<String> tags  | 词性标注结果,序列中的第i个元素是第i个词的词性                     |
    +--------------------+--------------------------------------------------------------------+

**示例程序**

.. code:: java

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestPostag {
      public static void main(String[] args) {
        if(Postagger.create("../../../ltp_data/pos.model")<0) {
          System.err.println("load failed");
          return;
        }

        List<String> words= new ArrayList<String>();
        words.add("我");   words.add("是");
        words.add("中国"); words.add("人");
        List<String> postags= new ArrayList<String>();

        int size = Postagger.postag(words,postags);
        for(int i = 0; i < size; i++) {
          System.out.print(words.get(i)+"_"+postags.get(i));
          if(i==size-1) {
            System.out.println();
          } else {
            System.out.print("|");
          }
        }
        Postagger.release();
      }
    }


命名实体识别接口
------------------

.. java:type:: public class NER

命名实体识别主要提供三个接口:

.. java:method:: public final native int create(String modelPath)

    功能:

    读取模型文件,初始化命名实体识别器

    参数:

    +----------------------------------------+--------------------------------------------------------------------+
    | 参数名                                 | 参数描述                                                           |
    +========================================+====================================================================+
    | const char * path                      | 命名实体识别模型路径                                               |
    +----------------------------------------+--------------------------------------------------------------------+

    返回值:

    返回一个指向词性标注器的指针。

.. java:method:: public final native void release()

    功能:

    释放模型文件,销毁命名实体识别器。


.. java:method:: public final native int recognize(List<String> words, List<String> postags, List<String> ners)

    功能:

    调用命名实体识别接口

    参数:

    +----------------------+----------------------------------------------------------------------------------------+
    | 参数名               | 参数描述                                                                               |
    +======================+========================================================================================+
    | List<String> words   | 待识别的词序列                                                                         |
    +----------------------+----------------------------------------------------------------------------------------+
    | List<String> postags | 待识别的词的词性序列                                                                   |
    +----------------------+----------------------------------------------------------------------------------------+
    | List<String> ners    | | 命名实体识别结果,                                                                   |
    |                      | | 命名实体识别的结果为O时表示这个词不是命名实体,                                      |
    |                      | | 否则为{POS}-{TYPE}形式的标记,POS代表这个词在命名实体中的位置,TYPE表示命名实体类型  |
    +----------------------+----------------------------------------------------------------------------------------+


**示例程序**

.. code:: java

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestNer {

     public static void main(String[] args) {
      if(NER.create("../../../ltp_data/ner.model")<0) {
       System.err.println("load failed");
        return;          
      }
       List<String> words = new ArrayList<String>();
       List<String> tags = new ArrayList<String>();
       List<String> ners = new ArrayList<String>();
       words.add("中国");tags.add("ns");
       words.add("国际");tags.add("n");
       words.add("广播");tags.add("n");
       words.add("电台");tags.add("n");
       words.add("创办");tags.add("v");
       words.add("于");tags.add("p");
       words.add("1941年");tags.add("m");
       words.add("12月");tags.add("m");
       words.add("3日");tags.add("m");
       words.add("。");tags.add("wp");

       NER.recognize(words, tags, ners);

      for (int i = 0; i < words.size(); i++) {
        System.out.println(ners.get(i));
       }

      NER.release();

     }
    }

依存句法分析接口
-----------------

.. java:type:: public class Parser

依存句法分析主要提供三个接口:

.. java:method:: public final native int create(String modelPath)

    功能:

    读取模型文件,初始化依存句法分析器

    参数:

    +---------------------------------------+--------------------------------------------------------------------+
    | 参数名                                | 参数描述                                                           |
    +=======================================+====================================================================+
    | String modelPath                      | 依存句法分析模型路径                                               |
    +---------------------------------------+--------------------------------------------------------------------+

.. java:method:: public final native void release()

    功能:

    释放模型文件,销毁依存句法分析器。

.. java:method:: public final native int parse(List<String> words, List<String> tags, List<Integer> heads, List<String> deprels)

    功能:

    调用依存句法分析接口

    参数:

    +----------------------+--------------------------------------------------------------------+
    | 参数名               | 参数描述                                                           |
    +======================+====================================================================+
    | List<String> words   | 待分析的词序列                                                     |
    +----------------------+--------------------------------------------------------------------+
    | List<String> tags    | 待分析的词的词性序列                                               |
    +----------------------+--------------------------------------------------------------------+
    | List<Integer> heads  | 结果依存弧,heads[i]代表第i个词的父亲节点的编号                    |
    +----------------------+--------------------------------------------------------------------+
    | List<String> deprels | 结果依存弧关系类型                                                 |
    +----------------------+--------------------------------------------------------------------+


**示例程序**

.. code:: java

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestParse {

        public static void main(String[] args){
            Parser parser = new Parser();
            if(parser.create("./model/ltp_data/parser.model") < 0){
                throw new RuntimeException("fail to load parser model");
            }
            List<String> words = new ArrayList<>();
            List<String> postags = new ArrayList<>();
            words.add("一把手");    postags.add("n");
            words.add("亲自");      postags.add("d");
            words.add("过河");      postags.add("v");
            words.add("。");        postags.add("wp");

            List<Integer> heads = new ArrayList<>();
            List<String> deprels = new ArrayList<>();

            parser.parse(words, postags, heads, deprels);

            for(int i=0; i<heads.size(); i++){
                System.out.println(heads.get(i));
                System.out.println(deprels.get(i));
            }
            parser.release();
        }
    }


语义角色标注接口
-------------------

.. java:type:: public class SRL

语义角色标注主要提供三个接口:

.. java:method:: public final native int create(String modelPath)

    功能:

    读取模型文件,初始化语义角色标注器

    参数:

    +----------------------------+--------------------------------------------------------------------+
    | 参数名                     | 参数描述                                                           |
    +============================+====================================================================+
    | String modelPath           | 语义角色标注模型文件所在路径                                       |
    +----------------------------+--------------------------------------------------------------------+

.. java:method:: public final native void release()

    功能:

    释放模型文件,销毁命名实体识别器。


.. java:method:: public final native int srl(List<String> words, List<String> tags, List<Integer> heads, List<String> deprels, List<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>> srls)

    功能:

    调用命名实体识别接口

    参数:

    +---------------------------------------------------+-----------------------------------------------------------+
    | 参数名                                            | 参数描述                                                  |
    +===================================================+===========================================================+
    | List<String> words                                | 输入的词序列                                              |
    +---------------------------------------------------+-----------------------------------------------------------+
    | List<String> tags                                 | 输入的词性序列                                            |
    +---------------------------------------------------+-----------------------------------------------------------+
    | List<Integer> heads                               | 这个词的父节点的编号 [#f1]_                               |
    +---------------------------------------------------+-----------------------------------------------------------+
    | List<String> deprels                              | 这个词的父节点的依存关系类型                              |
    +---------------------------------------------------+-----------------------------------------------------------+
    | List<Pair<String, Pair<Integer, Integer>>>>> srls | 结果语义角色标注                                          |
    +---------------------------------------------------+-----------------------------------------------------------+

常见问题
--------

.. rubric:: 注

.. [#f1] 编号从0记起

================================================
FILE: doc/background.rst
================================================
简介与背景知识
==============

ltp4j是 `语言技术平台 (Language Technology Platform, LTP) <http://ltp.readthedocs.org/zh_CN/latest/>`_ 接口的一个Java封装。 本项目旨在使Java用户可以本地调用LTP。

在使用ltp4j之前,您需要简要了解

* 什么是语言技术平台,它能否帮助您解决问题
* 如何安装语言技术平台
* 语言技术平台提供哪些编程接口

如果您对这些问题不了解,请首先阅读我们提供的有关语言技术平台的 `文档 <http://ltp.readthedocs.org/zh_CN/latest/>`_ 。在本文档的后续中,我们假定您已经阅读并成功编译并使用语言技术平台。


ltj4j的基本实现思路是依靠JNI技术实现在Java中调用C/C++动态库。我们建议您使用几分钟了解 `Java调用C/C++动态库 <http://www.cnblogs.com/icejoywoo/archive/2012/02/22/2363709.html>`_ 的实践方式。

ltp4j整个项目由两部分组成,他们分别是:

* ltp4j.jar:Java接口程序。
* C++代理程序:ltp4j加载的ltp动态库。



================================================
FILE: doc/conf.py
================================================
# -*- coding: utf-8 -*-
#
# LTP documentation build configuration file, created by
# sphinx-quickstart on Mon Jan 19 17:24:17 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys
import os

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))

# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['javasphinx']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix of source filenames.
source_suffix = '.rst'

# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = u'ltp4j'
copyright = u'2016, HIT-SCIR'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1-SNAPSHORT'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']

# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []

# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
html_theme = 'alabaster'

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []

# The name for this set of Sphinx documents.  If None, it defaults to
# "<project> v<release> documentation".
#html_title = None

# A shorter title for the navigation bar.  Default is the same as html_title.
#html_short_title = None

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None

# The name of an image file (within the static path) to use as favicon of the
# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}

# If false, no module index is generated.
#html_domain_indices = True

# If false, no index is generated.
#html_use_index = True

# If true, the index is split into individual pages for each letter.
#html_split_index = False

# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it.  The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None

# Output file base name for HTML help builder.
htmlhelp_basename = 'LTPdoc'


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
  ('index', 'LTP.tex', u'LTP Documentation',
   u'HIT-SCIR', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False

# If true, show page references after internal links.
#latex_show_pagerefs = False

# If true, show URL addresses after external links.
#latex_show_urls = False

# Documents to append as an appendix to all manuals.
#latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    ('index', 'ltp', u'LTP Documentation',
     [u'HIT-SCIR'], 1)
]

# If true, show URL addresses after external links.
#man_show_urls = False


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
  ('index', 'ltp4j', u'ltp4j Documentation',
   u'HIT-SCIR', 'ltp4j', 'One line description of project.',
   'Miscellaneous'),
]

# Documents to append as an appendix to all manuals.
#texinfo_appendices = []

# If false, no module index is generated.
#texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False




================================================
FILE: doc/index.rst
================================================
.. ltpdoctest documentation master file, created by
   sphinx-quickstart on Wed Jan 14 22:35:55 2015.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

.. include:: background.rst

目录
=====

.. toctree::
    :maxdepth: 2

    install
    run
    api


索引及表格
==================

* :ref:`genindex`
* :ref:`search`



================================================
FILE: doc/install.rst
================================================
.. _install-label:

编译ltp4j
=========

如果您需要使用ltp4j,必须拥有两部分内容

* ltp4j.jar与C++代理程序
* LTP模型文件

其中,LTP模型文件可以从 `百度云 <http://pan.baidu.com/share/link?shareid=1988562907&uk=2738088569>`_,当前ltp4j对应的模型版本为3.3.1。本文档将着重介绍如何编译ltp4j.jar与其C++代理程序。

安装Maven
---------

ltp4j使用 `apache maven <https://maven.apache.org/index.html>`_ 进行构建。在构建ltp4j之前,您首先需要安装maven。安装方法请参考: `安装apache maven <https://maven.apache.org/install.html>`_。

编译ltp4j
---------

在确保安装maven的前提下(即 `mvn -h` 具有输出结果),您可以按照如下方式构建ltp4j。

1. 在命令行下进入ltp4j所在文件夹
2. `git submodule init`
3. `git submodule update`
4. `mvn -Dmaven.test.skip=true`

如果您编译提示成功同时项目根目录下包含 `target/ltp4j-{version}.jar`,证明已经编译成功。

nar-maven-plugin
~~~~~~~~~~~~~~~~

本部分将介绍编译ltp4j的一些技术考虑,与编译ltp4j无关。对这部分不感兴趣的用户可以忽略这部分文档。

ltp4j的基本技术考虑是 **使用户使用最简单的技术手段编译使用ltp4j** 。所以我们选择了maven作为构建工具,希望可以通过一条指令完成编译过程。
如前文所述,ltp4j需要ltp4j.jar及其C++代理程序两部分。
为了在maven中既能够使用java编译器编译jar又能够使用C++编译器编译C++代理程序,我们经过调研,决定使用 `nar-maven-plugin <https://github.com/maven-nar/nar-maven-plugin>`_ 。这一maven插件使我们可以在不同的系统架构下编译C++的代码 (AOL)。
在使用过程中,我们发现了这一插件的一系列bug,并通过贡献代码的方式进行了解决。


编译结果
--------

nar-maven-plugin的编译结果随操作系统的不同而存在差异。其生成的ltp4j.jar以及代理文件可以从如下路径找到

* jar:`./target/ltp4j-{version}.jar`
* 代理程序:`./target/ltp4j-{version}-{AOL}-jni/`

其中,`vesion` 代表ltp4j的版本。`AOL` 代表 **体系结构-系统-链接器** 。
举例来讲,

* Windows 64位系统使用MSVC编译对应的AOL为:amd64-Windows-msvc
* Ubuntu 64位系统使用gnuc++编译对应的AOL为:amd64-Linux-gpp

编译结果示例
~~~~~~~~~~~~

**64位Linux g++**

.. code:: shell

    $ find target/ -type f -name "*.jar" -or -name "*.so"
    target/ltp4j-0.1.0-SNAPSHOT.jar
    target/nar/ltp4j-0.1.0-SNAPSHOT-amd64-Linux-gpp-jni/lib/amd64-Linux-gpp/jni/libltp4j-0.1.0-SNAPSHOT.so


**64位windows MSVC**

.. code:: shell

    $ find target/ -type f -name "*.jar" -or -name "*.dll"
    target/ltp4j-0.1.0-SNAPSHOT.jar
    target/nar/ltp4j-0.1.0-SNAPSHOT-amd64-Windows-msvc-jni/lib/amd64-Windows-msvc/jni/ltp4j-0.1.0-SNAPSHOT.dll


常见问题
--------



================================================
FILE: doc/ltp4j-document-1.0.md
================================================
ltp4j 文档
===============

ltp4j是[语言技术平台(Language Technology Platform, LTP)](https://github.com/HIT-SCIR/ltp)的Java封装。
关于LTP更多的信息,欢迎访问LTP项目主页。

### 作者
* 韩冰 << bhan@ir.hit.edu.cn >> 2014-05-15 创建文档

# 简介与背景知识
ltp4j是[语言技术平台(Language Technology Platform, LTP)](https://github.com/HIT-SCIR/ltp)接口的一个Java封装。
本项目旨在使Java用户可以本地调用LTP。

在使用ltp4j之前,您需要简要了解
* [什么是语言技术平台](https://github.com/HIT-SCIR/ltp/blob/master/doc/ltp-document-3.0.md#%E7%AE%80%E4%BB%8B),它能否帮助您解决问题
* [如何安装语言技术平台](https://github.com/HIT-SCIR/ltp/blob/master/doc/ltp-document-3.0.md#%E5%A6%82%E4%BD%95%E5%AE%89%E8%A3%85ltp)
* [语言技术平台提供哪些编程接口](https://github.com/HIT-SCIR/ltp/blob/master/doc/ltp-document-3.0.md#%E7%BC%96%E7%A8%8B%E6%8E%A5%E5%8F%A3)

如果您对这些问题不了解,请首先阅读我们提供的有关语言技术平台的文档。
在本文档的后续中,我们假定您已经阅读并成功编译并使用语言技术平台。

ltp4j主要依靠JNI实现。整个项目由两部分组成,他们分别是:

* __ltp4j.jar__:Java接口程序,利用ant能够直接编译构建为ltp4j.jar,方便用户导入使用。
* C++代理程序,在项目/jni/目录下实现Java接口中的功能,利用CMake编译构建为动态库。

# 安装

在这一章节中,我们假定您下载并将LTP放置于`/path/to/your/ltp-project`路径下;
而ltp4j放置于`/path/to/your/ltp4j-project`路径下。

## 编译ltp4j.jar

### 命令行方式

ltp4j.jar使用ant编译工具编译。 在命令行环境下,可以在项目根目录(`/path/to/your/ltp4j-project`)下使用

```
ant
```
命令编译。编译成功后,将在`build_jar/jar`下产生`ltp4j.jar`文件。

### Eclipse

如果使用Eclipse,可以按照_"File > New > Project... > Java Project from Existing Ant Buildfile"_的方式从build.xml中创建项目。 
选择next后,在Ant buildfile:一栏中填入build.xml的路径,`/path/to/your/ltp4j-project/build.xml`(window用户请添加盘符并将/改为\\),如下图所示。

![Eclipse](https://raw.githubusercontent.com/ruoshui1126/ltp4j/master/doc/java.png)

点击Finish就导入了项目。

在导入项目后,右键build.xml选择2 Ant Build。 在弹出的对话框中的选择main选项卡,并在`Base Directory:`中填入项目路径`/path/to/your/ltp4j-project/`。

![BaseDirectory](https://raw.githubusercontent.com/ruoshui1126/ltp4j/master/doc/BaseDirectory.png)

填好后执行run,build/jar下产生名为ltp4j.jar的jar文件。

### Intellij Idea

配置maven。点击右侧的MavenProject。导入pom.xml。

## 编译C++代理程序

代理程序jni动态库依赖于ltp的动态库,请先行编译LTP。

### 安装CMake
ltp4j使用的C++代理程序使用编译工具CMake构建项目。
在编译代理程序之前,你需要首先安装CMake。
CMake的网站在[这里](http://www.cmake.org)。如果你是Windows用户,请下载CMake的二进制安装包;
如果你是Linux,Mac OS或Cygwin的用户,可以通过编译源码的方式安装CMake,当然,你也可以使用Linux的软件源来安装。

### Windows(MSVC)编译

第一步:配置ltp的安装路径

因为jni依赖于ltp编译产生的动态库,所以在编译过程中需要给出ltp的路径。
请修改`/path/to/your/ltp4j-project/CMakeLists.txt`中的`LTP_HOME`的值为您的LTP项目的路径(`/path/to/your/ltp-project`),
对应修改的代码为:

```set (LTP_HOME "/path/to/your/ltp-project/")```

第二步:构建VC Project

在项目文件夹下新建一个名为build的文件夹,使用CMake Gui,在source code中填入项目文件夹,在binaries中填入build文件夹。然后Configure -> Generate。

![win-cmake](https://raw.githubusercontent.com/ruoshui1126/ltp4j/master/doc/cmake.png)

或者在命令行build 路径下运行

```
cmake ..
```

第二步:编译

### Linux,Mac OSX和Cygwin编译
Linux、Mac OSX(*)和Cygwin的用户,可以直接在项目根目录下使用命令

```
cmake .
make
```

进行编译。

编译成功后,会在libs文件夹下生成以下一些动态库(**)

| 程序名 | 说明 |
| ------ | ---- |
| split_sentence_jni.so | 分句动态库 |
| segmentor_jni.so | 分词动态库 |
| postagger_jni.so| 词性标注动态库 |
| parser_jni.so | 依存句法分析动态库 |
| ner_jni.so | 命名实体识别动态库 |
| srl_jni.so | 语义角色标注动态库 |

###注意事项

* **该处编译需要设置Java环境变量JAVA_HOME**。
* **需要保持c++编译器与JDK同是32位或者64位,否则JVM不能加载生成的动态库**

#开始使用

构建需要在本地使用ltp的工程
* 导入ltp4j.jar
* windows下将libs文件夹中生成的所有动态库、以及原ltp lib文件夹下的splitsnt、segmentor、postagger、ner、parser、srl 6个动态库拷贝到项目根目录
* linux下export LD_LIBRARY_PATH=#jni动态库路径#

接下来便可仿照下面各个接口的例子使用ltp啦。

#编程接口

## 分词接口

edu.ir.hit.ltp4j.Segmentor

分词主要提供四个接口:

**int create(带外部词典)**

功能:

读取模型文件,初始化分词器。

参数:

| 参数名 | 参数描述 |
|--------|----------|
|String path | 指定模型文件的路径 |
|String lexicon_path | 指定外部词典路径。如果lexicon_path为NULL,则不加载外部词典 |

返回值:

成功加载模型返回1,否则返回-1。

**int create**

功能:

读取模型文件,初始化分词器。

参数:

| 参数名 | 参数描述 |
|--------|----------|
|String path | 指定模型文件的路径 |


返回值:

成功加载模型返回1,否则返回-1。


**void release**

功能:

释放模型文件,销毁分词器。

参数:无

返回值:无

**int Segment**

功能:

调用分词接口。

参数:

| 参数名 | 参数描述 |
|--------|----------|
|String | 待分词句子 |
|java.util.List< String > words| 结果分词序列 |

返回值:

返回结果中词的个数。

### 示例程序

一个简单的实例程序可以说明分词接口的用法:

```
import java.util.ArrayList;
import java.util.List;
import edu.hit.ir.ltp4j.*;

public class TestSegment {
  public static void main(String[] args) {
    if(Segmentor.create("../../../ltp_data/cws.model")<0){
      System.err.println("load failed");
      return;
    }

    String sent = "我是中国人";
    List<String> words = new ArrayList<String>();
    int size = Segmentor.segment(sent,words);

    for(int i = 0; i<size; i++) {
      System.out.print(words.get(i));
      if(i==size-1) {
        System.out.println();
      } else{  
        System.out.print("\t");
      }
    }
    Segmentor.release();
  }
}
```

## 词性标注接口
edu.ir.hit.ltp4j.Postagger

词性标注主要提供三个接口

**int create**

功能:

读取模型文件,初始化词性标注器

参数:

|参数名 | 参数描述 |
|-------|----------|
|String path | 词性标注模型路径 |

返回值:

成功加载模型返回1,否则返回-1。

**void release**

功能:

释放模型文件,销毁分词器。

参数:无

返回值:无

**int Postag**

功能:

调用词性标注接口

参数:

|参数名 | 参数描述 |
|-------|----------|
|java.util.List< String >  words | 待标注的词序列 |
|java.util.List< String >  tags | 词性标注结果,序列中的第i个元素是第i个词的词性 |

返回值:

返回结果中词的个数

### 示例程序

一个简单的实例程序可以说明词性标注接口的用法:

```
import java.util.ArrayList;
import java.util.ArrayList;
import java.util.List;
import edu.hit.ir.ltp4j.*;

public class TestPostag {
  public static void main(String[] args) {
    if(Postagger.create("../../../ltp_data/pos.model")<0) {
      System.err.println("load failed");
      return;
    }
    
    List<String> words= new ArrayList<String>();
    words.add("我");   words.add("是");
    words.add("中国"); words.add("人");
    List<String> postags= new ArrayList<String>();

    int size = Postagger.postag(words,postags);
    for(int i = 0; i < size; i++) {
      System.out.print(words.get(i)+"_"+postags.get(i));
      if(i==size-1) {
        System.out.println();
      } else {
        System.out.print("|");
      }
    }
    Postagger.release();
  }
}
```

## 命名实体识别接口

edu.ir.hit.ltp4j.NER

命名实体识别主要提供三个接口:

**int create**

功能:

参数:

| 参数名 | 参数描述 |
|-------|----------|
| String path | 命名实体识别模型路径 |

返回值:

成功加载模型返回1,否则返回-1。

**void release**

功能:

释放模型文件,销毁命名实体识别器。

参数:无

返回值:无

**int recognize**

功能:

调用命名实体识别接口

参数:

|参数名 | 参数描述 |
|-------|----------|
|java.util.List< String > words | 待识别的词序列 |
|java.util.List< String > postags | 待识别的词的词性序列 |
|java.util.List< String > tags | 命名实体识别结果,命名实体识别的结果为O时表示这个词不是命名实体,否则为{POS}-{TYPE}形式的标记,POS代表这个词在命名实体中的位置,TYPE表示命名实体类型 |

返回值:

返回结果中词的个数

### 示例程序
    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestNer {

     public static void main(String[] args) {
      if(NER.create("../../../ltp_data/ner.model")<0) {
       System.err.println("load failed");
        return;          
      }
       List<String> words = new ArrayList<String>();
       List<String> tags = new ArrayList<String>();
       List<String> ners = new ArrayList<String>();
       words.add("中国");tags.add("ns");
       words.add("国际");tags.add("n");
       words.add("广播");tags.add("n");
       words.add("电台");tags.add("n");
       words.add("创办");tags.add("v");
       words.add("于");tags.add("p");
       words.add("1941年");tags.add("m");
       words.add("12月");tags.add("m");
       words.add("3日");tags.add("m");
       words.add("。");tags.add("wp");

       NER.recognize(words, tags, ners);

      for (int i = 0; i < words.size(); i++) {
        System.out.println(ners.get(i));
       }

      NER.release();

     }
     }

## 依存句法分析接口
edu.ir.hit.ltp4j.Parser
依存句法分析主要提供三个接口:

**int create**

功能:

读取模型文件,初始化依存句法分析器

参数:

|参数名 | 参数描述 |
|---|---|
|String path | 依存句法分析模型路径 |

返回值:

成功加载模型返回1,否则返回-1。

**void release**

功能:

释放模型文件,销毁依存句法分析器。

参数:无

返回值:无

**int parse**

功能:

调用依存句法分析接口

参数:

|参数名 | 参数描述 |
|---|---|
|java.util.List< String > words | 待分析的词序列 |
|java.util.List< String > postags | 待分析的词的词性序列 |
|java.util.List< Integer > heads | 结果依存弧,heads[i]代表第i个词的父亲节点的编号 |
|java.util.List< String > deprels | 结果依存弧关系类型 |

返回值:

返回结果中词的个数

### 示例程序

一个简单的实例程序可以说明依存句法分析接口的用法:

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;


    public class TestParser {

      public static void main(String[] args) {
        if(Parser.create("../../../ltp_data/parser.model")<0) {
          System.err.println("load failed");
          return;
        }
        List<String> words = new ArrayList<String>();
        List<String> tags = new ArrayList<String>();
        words.add("一把手");tags.add("n");
        words.add("亲自");tags.add("d");
        words.add("过问");tags.add("v");
        words.add("。");tags.add("wp");
        List<Integer> heads = new ArrayList<Integer>();
        List<String> deprels = new ArrayList<String>();

        int size = Parser.parse(words,tags,heads,deprels);

        for(int i = 0;i<size;i++) {
          System.out.print(heads.get(i)+":"+deprels.get(i));
          if(i==size-1) {
            System.out.println();
          }
          else{
            System.out.print("        ");
          }
        }

        Parser.release();
      }
    }




## 语义角色标注接口
edu.ir.hit.ltp4j.SRL
依存句法分析主要提供三个接口:

**int create**

功能:

读取模型文件,初始化语义角色标注分析器

参数:

|参数名 | 参数描述 |
|---|---|
|String path | 依存句法分析模型路径 |

返回值:

成功加载模型返回1,否则返回-1。

**void release**

功能:

释放模型文件,销毁语义角色标注分析器。

参数:无

返回值:无

**int srl**

功能:

调用语义角色标注接口

参数:

|参数名 | 参数描述 |
|---|---|
|java.util.List< String > words | 待分析的词序列 |
|java.util.List< String > postags | 待分析的词的词性序列 |
|java.util.List< String > ners| 待分析的命名实体序列
|java.util.List< Integer > heads | 待分析的依存弧,heads[i]代表第i个词的父亲节点的编号 |
|java.util.List< String > deprels | 待分析的依存弧关系类型 |
| List< Pair< Integer, List< Pair< String, Pair< Integer, Integer > > > > > srls | 结果语义角色标注 |

返回值:

返回角色个数

### 示例程序

一个简单的实例程序可以说明依存句法分析接口的用法:

    import java.util.ArrayList;
    import java.util.List;
    import edu.hit.ir.ltp4j.*;

    public class TestSrl {

    public static void main(String[] args) {
      SRL.create("../../../ltp_data/pisrl.model");
      ArrayList<String> words = new ArrayList<String>();
      words.add("一把手");
      words.add("亲自");
      words.add("过问");
      words.add("。");
      ArrayList<String> tags = new ArrayList<String>();
      tags.add("n");
      tags.add("d");
      tags.add("v");
      tags.add("wp");
      ArrayList<Integer> heads = new ArrayList<Integer>();
      heads.add(2);
      heads.add(2);
      heads.add(-1);
      heads.add(2);
      ArrayList<String> deprels = new ArrayList<String>();
      deprels.add("SBV");
      deprels.add("ADV");
      deprels.add("HED");
      deprels.add("WP");
      List<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>> srls = new ArrayList<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>>();
        SRL.srl(words, tags, heads, deprels, srls);
      for (int i = 0; i < srls.size(); ++i) {
        System.out.println(srls.get(i).first + ":");
          for (int j = 0; j < srls.get(i).second.size(); ++j) {
            System.out.println("   tpye = "+ srls.get(i).second.get(j).first + " beg = "+ srls.get(i).second.get(j).second.first + " end = "+ srls.get(i).second.get(j).second.second);
          }
        }
      SRL.release();
      }

    }
    
###注意事项

* **对于一个包含N个词的句子,句法分析返回的父节点范围在0至N之间,而语义角色标注的输入需要在-1至N-1之间。因此,若要在句法分析后进行语义角色标注,需要把heads作减一操作。**


================================================
FILE: doc/make.bat
================================================
@ECHO OFF

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)

if "%1" == "" goto help

if "%1" == "help" (
	:help
	echo.Please use `make ^<target^>` where ^<target^> is one of
	echo.  html       to make standalone HTML files
	echo.  dirhtml    to make HTML files named index.html in directories
	echo.  singlehtml to make a single large HTML file
	echo.  pickle     to make pickle files
	echo.  json       to make JSON files
	echo.  htmlhelp   to make HTML files and a HTML help project
	echo.  qthelp     to make HTML files and a qthelp project
	echo.  devhelp    to make HTML files and a Devhelp project
	echo.  epub       to make an epub
	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
	echo.  text       to make text files
	echo.  man        to make manual pages
	echo.  texinfo    to make Texinfo files
	echo.  gettext    to make PO message catalogs
	echo.  changes    to make an overview over all changed/added/deprecated items
	echo.  xml        to make Docutils-native XML files
	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
	echo.  linkcheck  to check all external links for integrity
	echo.  doctest    to run all doctests embedded in the documentation if enabled
	goto end
)

if "%1" == "clean" (
	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
	del /q /s %BUILDDIR%\*
	goto end
)


%SPHINXBUILD% 2> nul
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

if "%1" == "html" (
	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
	goto end
)

if "%1" == "dirhtml" (
	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
	goto end
)

if "%1" == "singlehtml" (
	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
	goto end
)

if "%1" == "pickle" (
	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the pickle files.
	goto end
)

if "%1" == "json" (
	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the JSON files.
	goto end
)

if "%1" == "htmlhelp" (
	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
	goto end
)

if "%1" == "qthelp" (
	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\LTP.qhcp
	echo.To view the help file:
	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\LTP.ghc
	goto end
)

if "%1" == "devhelp" (
	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished.
	goto end
)

if "%1" == "epub" (
	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The epub file is in %BUILDDIR%/epub.
	goto end
)

if "%1" == "latex" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdf" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf
	cd %BUILDDIR%/..
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdfja" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf-ja
	cd %BUILDDIR%/..
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "text" (
	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The text files are in %BUILDDIR%/text.
	goto end
)

if "%1" == "man" (
	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The manual pages are in %BUILDDIR%/man.
	goto end
)

if "%1" == "texinfo" (
	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
	goto end
)

if "%1" == "gettext" (
	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
	goto end
)

if "%1" == "changes" (
	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
	if errorlevel 1 exit /b 1
	echo.
	echo.The overview file is in %BUILDDIR%/changes.
	goto end
)

if "%1" == "linkcheck" (
	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
	if errorlevel 1 exit /b 1
	echo.
	echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
	goto end
)

if "%1" == "doctest" (
	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
	if errorlevel 1 exit /b 1
	echo.
	echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
	goto end
)

if "%1" == "xml" (
	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The XML files are in %BUILDDIR%/xml.
	goto end
)

if "%1" == "pseudoxml" (
	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
	goto end
)

:end


================================================
FILE: doc/requirements.txt
================================================
javasphinx


================================================
FILE: doc/run.rst
================================================
运行
====

以编译运行examples/Test.java为例。

概念
~~~~

**version与aol**

version是当前ltp4j的版本号,aol是使用ltp4j机器的`架构-系统-链接器`

在命令行条件下可以用如下命令获得

.. code:: shell

    # get version
    version=`egrep '<version>' pom.xml | head -1 | tr -d ' ' | sed 's/<version>//g' | sed 's/<\/version>//g'`
    
    # get aol
    aol=`ls target/ltp4j-${version}-*-jni.nar | sed "s/target\/ltp4j-${version}-//g" | sed "s/-jni.nar//g"`

    
在其他条件下可以根据编译步骤生成的的nar文件进行判断。
具体来讲编译步骤生成的nar文件的格式为`target/ltp4j-${version}-${aol}-jni.jar`。

**运行jni的必要条件**

1. 添加ltp4j.jar到java项目的classpath里
2. 添加c++代理程序的路径到java.library.path里

命令行
~~~~~~

.. code:: shell

    # get version
    version=`egrep '<version>' pom.xml | head -1 | tr -d ' ' | sed 's/<version>//g' | sed 's/<\/version>//g'`
    
    # get aol
    aol=`ls target/ltp4j-${version}-*-jni.nar | sed "s/target\/ltp4j-${version}-//g" | sed "s/-jni.nar//g"`

    # compile
    javac -cp "target/ltp4j-${version}.jar" examples/Test.java
    
    # run, specifying java.library.path, Test accept input from stdin
    cat examples/example | java -Djava.library.path=target/nar/ltp4j-$version-$aol-jni/lib/$aol/jni/ \
    -cp "target/ltp4j-${version}.jar:examples" Test \
    --segment-model=ltp_data/cws.model \
    --postag-model=ltp_data/pos.model \
    --ner-model=ltp_data/ner.model \
    --parser-model=ltp_data/parser.model \
    --srl-dir=ltp_data/srl/

Eclipse
~~~~~~~

1. File -> New -> Java Project, 在Project name处填入ltp4jtest
2. 右键examples项目下的src文件夹,在弹出菜单下选择New -> Class,Name处填入Test
3. 将examples/Test.java填入Test中
4. 右键examples项目下的Properties,
 1. 选择 `Java Build Path`
 2. 选择 `Libraries` 选项卡
 3. 选择 `Add External JAR...` 选择编译出的ltp4j-$version.jar文件 [添加ltp4.jar]
 4. 点击 > 箭头展开添加的ltp4j-$version.jar,在Native library location中选择C++代理程序的路径 [添加java.library.path]

如图所示:

.. image:: _static/eclipse.gif
 
参考: `How to set the java.library.path from Eclipse <http://stackoverflow.com/questions/957700/how-to-set-the-java-library-path-from-eclipse>`_

Intellij
~~~~~~~~

参考:`How to set the java.library.path in intelliJ Idea <http://stackoverflow.com/questions/19308010/how-to-set-the-java-library-path-in-intellij-idea>`_

常见问题
~~~~~~~~



================================================
FILE: examples/Console.java
================================================
import java.util.Scanner;
import java.util.ArrayList;
import java.util.List;
import edu.hit.ir.ltp4j.SplitSentence;
import edu.hit.ir.ltp4j.Segmentor;
import edu.hit.ir.ltp4j.Postagger;
import edu.hit.ir.ltp4j.NER;
import edu.hit.ir.ltp4j.Parser;
import edu.hit.ir.ltp4j.SRL;
import edu.hit.ir.ltp4j.Pair;

public class Console {
  private String segmentModel;
  private String postagModel;
  private String NERModel;
  private String parserModel;
  private String SRLModel;

  private SplitSentence sentenceSplitApp;
  private Segmentor     segmentorApp;
  private Postagger     postaggerApp;
  private NER           nerApp;
  private Parser        parserApp;
  private SRL           srlApp;

  private boolean ParseArguments(String[] args) {
    if (args.length == 1 && (args[0].equals("--help") || args[0].equals("-h"))) {
      Usage();
      return false;
    }

    for (int i = 0; i < args.length; ++ i) {
      if (args[i].startsWith("--segment-model=")) {
        segmentModel = args[i].split("=")[1];
      } else if (args[i].startsWith("--postag-model=")) {
        postagModel = args[i].split("=")[1];
      } else if (args[i].startsWith("--ner-model=")) {
        NERModel = args[i].split("=")[1];
      } else if (args[i].startsWith("--parser-model=")) {
        parserModel = args[i].split("=")[1];
      } else if (args[i].startsWith("--srl-model=")) {
        SRLModel = args[i].split("=")[1];
      } else {
        throw new IllegalArgumentException("Unknown options " + args[i]);
      }
    }

    if (segmentModel == null || postagModel == null || NERModel == null ||
        parserModel == null || SRLModel == null) {
      Usage();
      throw new IllegalArgumentException("");
    }

    sentenceSplitApp = new SplitSentence();

    segmentorApp = new Segmentor();
    segmentorApp.create(segmentModel);

    postaggerApp = new Postagger();
    postaggerApp.create(postagModel);

    nerApp = new NER();
    nerApp.create(NERModel);

    parserApp = new Parser();
    parserApp.create(parserModel);

    srlApp = new SRL();
    srlApp.create(SRLModel);

    return true;
  }

  public void Usage() {
    System.err.println("An command line example for ltp4j - The Java embedding of LTP");
    System.err.println("Sentences are inputted from stdin.");
    System.err.println("");
    System.err.println("Usage:");
    System.err.println("");
    System.err.println("  java -cp <jar-path> --segment-model=<path> \\");
    System.err.println("                      --postag-model=<path> \\");
    System.err.println("                      --ner-model=<path> \\");
    System.err.println("                      --parser-model=<path> \\");
    System.err.println("                      --srl-model=<path>");
  }

  private String join(ArrayList<String> payload, String conjunction) {
    StringBuilder sb = new StringBuilder();
    if (payload == null || payload.size() == 0) {
      return "";
    }
    sb.append(payload.get(0));
    for (int i = 1; i < payload.size(); ++ i) {
      sb.append(conjunction).append(payload.get(i));
    }
    return sb.toString();
  }


  public void Analyse(String sent) {
    ArrayList<String> sents = new ArrayList<String>();
    sentenceSplitApp.splitSentence(sent, sents);

    for(int m = 0; m < sents.size(); m++) {
      ArrayList<String> words = new ArrayList<String>();
      ArrayList<String> postags = new ArrayList<String>();
      ArrayList<String> ners = new ArrayList<String>();
      ArrayList<Integer> heads = new ArrayList<Integer>();
      ArrayList<String> deprels = new ArrayList<String>();
      List<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>> srls =
        new ArrayList<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>>();

      System.out.println("#" + (m + 1));
      System.out.println("Sentence       : " + sents.get(m));

      segmentorApp.segment(sents.get(m), words);
      System.out.println("Segment Result : " + join(words, "\t"));

      postaggerApp.postag(words, postags);
      System.out.print("Postag Result  : ");
      System.out.println(join(postags, "\t"));

      nerApp.recognize(words, postags, ners);
      System.out.print("NER Result     : ");
      System.out.println(join(ners, "\t"));

      parserApp.parse(words, postags, heads, deprels);
      int size = heads.size();
      StringBuilder sb = new StringBuilder();
      sb.append(heads.get(0)).append(":").append(deprels.get(0));
      for(int i = 1; i < heads.size(); i++) {
        sb.append("\t").append(heads.get(i)).append(":").append(deprels.get(i));
      }
      System.out.print("Parse Result   : ");
      System.out.println(sb.toString());

      for (int i = 0; i < heads.size(); i++) {
        heads.set(i, heads.get(i) - 1);
      }

      srlApp.srl(words,postags,heads,deprels,srls);

      size = srls.size();
      System.out.print("SRL Result     : ");
      if (size == 0) {
        System.out.println();
      }
      for (int i = 0; i < srls.size(); i++) {
        System.out.print(srls.get(i).first + " ->");
        for (int j = 0; j < srls.get(i).second.size(); j++) {
          System.out.print(srls.get(i).second.get(j).first
              + ": beg = " + srls.get(i).second.get(j).second.first
              + " end = " + srls.get(i).second.get(j).second.second + ";");
        }
        System.out.println();
      }
    }
  }

  public void release(){
    segmentorApp.release();
    postaggerApp.release();
    nerApp.release();
    parserApp.release();
    srlApp.release();
  }

  public static void main(String[] args) {
    Console console = new Console();

    try {
      if (!console.ParseArguments(args)) {
        return;
      }

      Scanner input = new Scanner(System.in);
      String sent;
      try {
        System.out.print(">>> ");
        while((sent = input.nextLine()) != null) {
          if (sent.length() > 0) {
            console.Analyse(sent);
          }
          System.out.print(">>> ");
        }
      } catch(Exception e) {
        console.release();
      }
    } catch (IllegalArgumentException e) {
    }
  }
}


================================================
FILE: examples/example
================================================
中国进出口银行与中国银行加强合作


================================================
FILE: pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
  http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>edu.hit.ir.ltp4j</groupId>
  <artifactId>ltp4j</artifactId>
  <version>0.1.0-SNAPSHOT</version>
  <packaging>nar</packaging>
  <name>hitscir-ltp4j</name>
  <description>Language Technology Platform for Java</description>
  <url>http://github.com/HIT-SCIR/ltp4j</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <skipTests>true</skipTests>
  </properties>

  <licenses>
    <license>
      <name>The Apache License, Version 2.0</name>
      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
    </license>
  </licenses>

  <distributionManagement>
    <snapshotRepository>
      <id>sonatype-nexus-snapshots</id>
      <name>Sonatype Nexus Snapshots</name>
      <url>https://oss.sonatype.org/content/repositories/snapshots/</url>
    </snapshotRepository>
    <repository>
      <id>sonatype-nexus-staging</id>
      <name>Nexus Release Repository</name>
      <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
    </repository>
  </distributionManagement>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.7</version>
      <scope>test</scope>
    </dependency>
  </dependencies>

  <build>
    <defaultGoal>install</defaultGoal>
    <plugins>
      <plugin>
        <artifactId>maven-dependency-plugin</artifactId>
      </plugin>
      <plugin>
        <artifactId>maven-compiler-plugin</artifactId>
      </plugin>
      <plugin>
        <groupId>com.github.maven-nar</groupId>
        <artifactId>nar-maven-plugin</artifactId>
        <version>3.5.0</version>
        <extensions>true</extensions>
        <configuration>
          <!--<linker>-->
            <!--<libs>-->
              <!--<lib>-->
                <!--<name>boost_program_options</name>-->
                <!--<type>static</type>-->
                <!--<directory>src/main/c++/ltp/lib</directory>-->
              <!--</lib>-->
              <!--<lib>-->
                <!--<name>boost_regex</name>-->
                <!--<type>static</type>-->
                <!--<directory>src/main/c++/ltp/lib</directory>-->
              <!--</lib>-->
              <!--<lib>-->
                <!--<name>boost_serialization</name>-->
                <!--<type>static</type>-->
                <!--<directory>src/main/c++/ltp/lib</directory>-->
              <!--</lib>-->
              <!--<lib>-->
                <!--<name>boost_smart_ptr</name>-->
                <!--<type>static</type>-->
                <!--<directory>src/main/c++/ltp/lib</directory>-->
              <!--</lib>-->
            <!--</libs>-->
          <!--</linker>-->
          <cpp>
            <sourceDirectory>src/main/c++</sourceDirectory>
            <excludes>
              <exclude>ltp/examples/*</exclude>
              <exclude>ltp/src/console/*</exclude>
              <exclude>ltp/src/ltp/*</exclude>
              <exclude>ltp/src/segmentor/io.cpp</exclude>
              <exclude>ltp/src/segmentor/otcws.cpp</exclude>
              <exclude>ltp/src/segmentor/segmentor_frontend.cpp</exclude>
              <exclude>ltp/src/segmentor/customized_segmentor_frontend.cpp</exclude>
              <exclude>ltp/src/postagger/io.cpp</exclude>
              <exclude>ltp/src/postagger/otpos.cpp</exclude>
              <exclude>ltp/src/postagger/postagger_frontend.cpp</exclude>
              <exclude>ltp/src/ner/io.cpp</exclude>
              <exclude>ltp/src/ner/otner.cpp</exclude>
              <exclude>ltp/src/ner/ner_frontend.cpp</exclude>
              <exclude>ltp/src/parser/*</exclude>
              <exclude>ltp/src/parser.n/io.cpp</exclude>
              <exclude>ltp/src/parser.n/main.cpp</exclude>
              <exclude>ltp/src/parser.n/parser_frontend.cpp</exclude>
              <exclude>ltp/src/srl/**/process/*</exclude>
              <exclude>ltp/src/srl/*/pred.cpp</exclude>
              <exclude>ltp/src/srl/*/train.cpp</exclude>
              <exclude>ltp/src/srl/tool/merge.cpp</exclude>
              <exclude>ltp/src/server/*</exclude>
              <exclude>ltp/src/xml4nlp/*</exclude>
              <exclude>ltp/src/unittest/*</exclude>
              <exclude>ltp/thirdparty/eigen/unsupported/**/*</exclude>
              <exclude>ltp/thirdparty/dynet/dynet/cuda.cc</exclude>
              <exclude>ltp/thirdparty/tinyxml/*</exclude>
              <exclude>ltp/thirdparty/tinythreadpp/*</exclude>
              <exclude>ltp/thirdparty/maxent/train.cpp</exclude>
              <exclude>ltp/thirdparty/maxent/predict.cpp</exclude>
              <exclude>ltp/thirdparty/gtest/**/*</exclude>
            </excludes>
            <includePaths>
              <path>src/main/c++/ltp/src</path>
              <path>src/main/c++/ltp/src/srl</path>
              <path>src/main/c++/ltp/src/srl/common</path>
              <path>src/main/c++/ltp/src/srl/include</path>
              <path>src/main/c++/ltp/src/utils</path>
              <path>src/main/c++/ltp/thirdparty/boost/include</path>
              <path>src/main/c++/ltp/thirdparty/maxent</path>
              <path>src/main/c++/ltp/thirdparty/eigen</path>
              <path>src/main/c++/ltp/thirdparty/dynet</path>
              <path>src/main/c++/ltp/thirdparty/jsoncpp/include</path>
            </includePaths>
            <debug>false</debug>
            <options>
              <option>-std=c++0x</option>
              <option>-w</option>
              <!--<option>-Wall</option>
              <option>-Wno-sign-conversion</option>
              <option>-Wno-unused-variable</option>
              <option>-Wno-conversion</option>
              <option>-Wno-shorten-64-to-32</option>
              <option>-Wno-reorder</option>-->
              <!--<option>-v</option>-->
              <!--<option>-Wno-unknown-pragmas</option>-->
            </options>
          </cpp>
          <c>
            <excludes>
              <exclude>**/*</exclude>
            </excludes>
          </c>
          <libraries>
            <library>
              <type>jni</type>
              <narSystemPackage>edu.hit.ir.ltp4j</narSystemPackage>
              <linkCPP>false</linkCPP>
            </library>
          </libraries>
          <javah>
            <includes>
              <include></include>
            </includes>
          </javah>
          <tests>
            <test>
              <name>Segmentor</name>
              <name>Postagger</name>
              <name>NER</name>
              <name>Parser</name>
              <name>SRL</name>
            </test>
          </tests>
        </configuration>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_NER.cpp
================================================
#include "ner/ner_dll.h"
#include "edu_hit_ir_ltp4j_NER.h"
#include "string_to_jstring.hpp"
#include <string>
#include <vector>
#include <jni.h>

static void * ner = NULL;

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_create
  (JNIEnv * env, jobject obj, jstring model_path) {
  const char * str = env->GetStringUTFChars( model_path , 0);

  if (!ner) {
    ner = ner_create_recognizer(str);
  } else {
    ner_release_recognizer(ner);
    ner = ner_create_recognizer(str);
  }

  env->ReleaseStringUTFChars( model_path, str); 
  if (ner) {
    return 1;
  }
  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize
  (JNIEnv * env, jobject obj, jobject array_words, jobject array_tags, jobject array_ners) {

  jclass array_list = env->GetObjectClass(array_words);

  jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z");
  jmethodID list_get = env->GetMethodID(array_list,"get","(I)Ljava/lang/Object;");
  jmethodID list_size = env->GetMethodID(array_list,"size","()I");

  std::vector<std::string> words,tags,ners;

  int size_words = env->CallIntMethod(array_words,list_size);
  int size_tags = env->CallIntMethod(array_tags,list_size);

  if (size_words!=size_tags) {
    return 0;
  }

  for (int i = 0; i < size_words; i++) {
    jobject tmp = env->CallObjectMethod(array_words,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    words.push_back(s_s);
    env->ReleaseStringUTFChars( s, st);
  }

  for (int i = 0; i < size_tags; i++) {
    jobject tmp = env->CallObjectMethod(array_tags,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    tags.push_back(s_s);
    env->ReleaseStringUTFChars( s, st); 
  }

  int len = ner_recognize(ner,words,tags,ners);

  for (int i = 0; i < len; i++) {
    jobject tmp = stringToJstring(env,ners[i].c_str());
    env->CallBooleanMethod(array_ners,list_add,tmp);
  }
  return len;
}

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_NER_release
  (JNIEnv * env, jobject obj) {
  ner_release_recognizer(ner);
  ner = NULL;
}




================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_Parser.cpp
================================================
#include "edu_hit_ir_ltp4j_Parser.h"
#include "parser/parser_dll.h"
#include "string_to_jstring.hpp"
#include <vector>
#include <string>
#include <iostream>
#include <jni.h>

static void * parser = NULL;

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_create
  (JNIEnv * env, jobject obj, jstring model_path) {

  const char * str = env->GetStringUTFChars( model_path , 0);

  if(!parser){
    parser = parser_create_parser(str);
  }

  env->ReleaseStringUTFChars( model_path, str);

  if(parser) {
    return 1;
  }

  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse
  (JNIEnv * env, jobject obj, jobject array_words, jobject array_tags, jobject array_heads, jobject array_deprels) {

  jclass array_list = env->GetObjectClass(array_words);
  jclass integer = env->FindClass("java/lang/Integer");

  jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z");
  jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;");
  jmethodID list_size = env->GetMethodID(array_list, "size", "()I");
  jmethodID integer_init =env->GetMethodID(integer, "<init>", "(I)V");

  std::vector<std::string> words,tags,deprels;
  std::vector<int> heads;

  int size_words = env->CallIntMethod(array_words,list_size);
  int size_tags = env->CallIntMethod(array_tags,list_size);

  if (size_words!=size_tags) {
    return -1;
  }

  for (int i = 0; i < size_words; i++) {
    jobject tmp = env->CallObjectMethod(array_words,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    words.push_back(s_s);
    env->ReleaseStringUTFChars( s, st);
  }

  for (int i = 0; i < size_tags; i++) {
    jobject tmp = env->CallObjectMethod(array_tags,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    tags.push_back(s_s);
    env->ReleaseStringUTFChars( s, st); 
  }

  int len = parser_parse(parser,words,tags,heads,deprels);

  if (len < 0) {
    return -1;
  }

  size_t size = heads.size();
  for (size_t i = 0; i<size; i++){
    jobject integer_object = env->NewObject(integer,integer_init,heads.at(i));
    env->CallBooleanMethod(array_heads,list_add, integer_object);
  }

  for (size_t i = 0;i<size;i++){
    jobject tmp =  stringToJstring(env,deprels[i].c_str());
    env->CallBooleanMethod(array_deprels,list_add,tmp);
  }

  return len;
}

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Parser_release
  (JNIEnv * env, jobject obj) {
  parser_release_parser(parser);
  parser = NULL;
}



================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp
================================================
#include "postagger/postag_dll.h"
#include "edu_hit_ir_ltp4j_Postagger.h"
#include "string_to_jstring.hpp"
#include <vector>
#include <string>
#include <iostream>
#include <jni.h>

static void * postagger = NULL;

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2
  (JNIEnv * env, jobject obj, jstring model_path) {
  const char* str = env->GetStringUTFChars( model_path , 0);

  if(!postagger){
    postagger = postagger_create_postagger(str);
  } else {
    postagger_release_postagger(postagger);
    postagger = postagger_create_postagger(str);
  }

  env->ReleaseStringUTFChars( model_path, str);

  if (postagger) {
    return 1;
  }
  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2Ljava_lang_String_2
  (JNIEnv * env, jobject obj, jstring model_path, jstring lexicon_path) {

  const char * model = env->GetStringUTFChars( model_path , 0);
  const char * lexicon = env->GetStringUTFChars( lexicon_path , 0);

  if(!postagger){
    postagger = postagger_create_postagger(model,lexicon);
  } else {
    postagger_release_postagger(postagger);
    postagger = postagger_create_postagger(model,lexicon);
  }

  env->ReleaseStringUTFChars( model_path, model);
  env->ReleaseStringUTFChars( lexicon_path, lexicon);

  if(postagger) {
    return 1;
  }
  return -1;
}


JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_postag
  (JNIEnv * env, jobject obj, jobject array_words, jobject array_postags) {
  jclass array_list = env->GetObjectClass(array_words);

  jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z");
  jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;");
  jmethodID list_size = env->GetMethodID(array_list, "size", "()I");

  std::vector<std::string> words, postags;

  int size = env->CallIntMethod(array_words,list_size);

  for (int i = 0; i < size ; i++) {
    jobject tmp = env->CallObjectMethod(array_words,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    words.push_back(s_s);
    env->ReleaseStringUTFChars( s, st);
  }

  int len = postagger_postag(postagger,words,postags);

  for (int i = 0; i < len; i++) {
    jobject tmp = stringToJstring(env,postags[i].c_str());
    env->CallBooleanMethod(array_postags,list_add,tmp);
  }

  return len;
}

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Postagger_release
  (JNIEnv * env, jobject obj) {
  postagger_release_postagger(postagger);
  postagger = NULL;
}





================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_SRL.cpp
================================================
#include "edu_hit_ir_ltp4j_SRL.h"
#include "srl/SRL_DLL.h"
#include "string_to_jstring.hpp"
#include <vector>
#include <string>
#include <iostream>
#include <jni.h>

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_create
  (JNIEnv * env, jobject obj, jstring model_path){
  const char * str = env->GetStringUTFChars( model_path , 0);
  std::string path(str);
  int tag = srl_load_resource(path);
  env->ReleaseStringUTFChars( model_path, str); 
  if(0==tag) {
    return 1;
  }
  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl
(JNIEnv * env, jclass obj, jobject array_words, jobject array_tags, jobject array_heads, jobject array_deprels, jobject srl_result){
  jclass array_list = env->GetObjectClass(array_words);
  jmethodID list_construct = env->GetMethodID(array_list,"<init>","()V");
  jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z");
  jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;");
  jmethodID list_size = env->GetMethodID(array_list, "size", "()I");

  jclass integer = env->FindClass("java/lang/Integer");
  jmethodID integer_construct =env->GetMethodID(integer,"<init>","(I)V");
  jmethodID integer_int =env->GetMethodID(integer,"intValue","()I");

  jclass pair = env->FindClass("edu/hit/ir/ltp4j/Pair");
  jmethodID pair_construct = env->GetMethodID(pair, "<init>",
      "(Ljava/lang/Object;Ljava/lang/Object;)V");

  std::vector<std::string> words,tags,deprels;
  std::vector<int> heads;
  std::vector<std::pair<int,std::string> > parsers;
  std::vector<
    std::pair<
      int,
      std::vector<
        std::pair<std::string, std::pair< int, int > >
      >
    >
  > srls;

  unsigned size_words = env->CallIntMethod(array_words, list_size);
  for(unsigned i = 0; i < size_words; i++){
    jobject tmp = env->CallObjectMethod(array_words,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    words.push_back(s_s);
    env->ReleaseStringUTFChars( s, st);
  }

  unsigned size_tags = env->CallIntMethod(array_tags, list_size);
  for(unsigned i = 0;i<size_tags;i++){
    jobject tmp = env->CallObjectMethod(array_tags,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    tags.push_back(s_s);
    env->ReleaseStringUTFChars( s, st); 
  }


  unsigned size_heads = env->CallIntMethod(array_heads,list_size);
  for(unsigned i = 0; i < size_heads; i++){
    jobject tmp = env->CallObjectMethod(array_heads,list_get,i);
    int digit = env->CallIntMethod(tmp,integer_int);
    heads.push_back(digit);
  }

  unsigned size_deprels = env->CallIntMethod(array_deprels,list_size);
  for(unsigned i = 0;i<size_deprels;i++){
    jobject tmp = env->CallObjectMethod(array_deprels,list_get,i);
    jstring s = reinterpret_cast<jstring> (tmp);
    const char * st = env->GetStringUTFChars(s,0);
    std::string s_s(st);
    deprels.push_back(s_s);
    env->ReleaseStringUTFChars( s, st); 
  }

  for(unsigned i = 0; i < size_heads; i++){
    parsers.push_back(make_pair(heads.at(i),deprels.at(i)));
  }

  int len = srl_dosrl(words,tags,parsers,srls);

  if(len<0)
    return -1;

  for(unsigned i = 0;i<srls.size();++i) {
    jobject trigger = env->NewObject(integer,integer_construct,srls[i].first);
    jobject args = env->NewObject(array_list,list_construct);

    for(unsigned j = 0;j<srls[i].second.size();++j) {
      jobject start = env->NewObject(integer,integer_construct,srls[i].second[j].second.first);
      jobject end = env->NewObject(integer,integer_construct,srls[i].second[j].second.second);

      jobject bound = env->NewObject(pair,pair_construct,start,end);

      jobject rels =  stringToJstring(env,srls[i].second[j].first.c_str());
      jobject inner = env->NewObject(pair,pair_construct,rels,bound);
      env->CallBooleanMethod(args,list_add,inner);
    }

    jobject outer = env->NewObject(pair,pair_construct,trigger,args);
    env->CallBooleanMethod(srl_result,list_add,outer);
  }

  return (int)srls.size();
}

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SRL_release
  (JNIEnv * env, jobject obj) {
  srl_release_resource();
}



================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp
================================================
#include "segmentor/segment_dll.h"
#include "edu_hit_ir_ltp4j_Segmentor.h"
#include "string_to_jstring.hpp"
#include <iostream>
#include <string>
#include <vector>

static void* segmentor = NULL;

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2
  (JNIEnv* env, jobject obj, jstring model_path) {
  const char* str = env->GetStringUTFChars( model_path , 0);

  if(!segmentor){
    segmentor = segmentor_create_segmentor(str);
  } else{
    segmentor_release_segmentor(segmentor);
    segmentor = segmentor_create_segmentor(str);
  }

  env->ReleaseStringUTFChars( model_path, str);

  if(segmentor) {
    return 1;
  }

  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2Ljava_lang_String_2
  (JNIEnv* env, jobject obj, jstring model_path, jstring lexicon_path) {

  const char* str_model = env->GetStringUTFChars( model_path , 0);
  const char* str_lexicon = env->GetStringUTFChars( lexicon_path , 0);

  if(!segmentor){
    segmentor = segmentor_create_segmentor(str_model,str_lexicon);
  } else{
    segmentor_release_segmentor(segmentor);
    segmentor = segmentor_create_segmentor(str_model,str_lexicon);
  }

  env->ReleaseStringUTFChars( model_path, str_model);
  env->ReleaseStringUTFChars( lexicon_path, str_lexicon);

  if(segmentor) {
    return 1;
  }
  return -1;
}

JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment
  (JNIEnv* env, jobject obj, jstring sent, jobject array_words) {

  jclass array_list = env->GetObjectClass(array_words);
  jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z");

  const char* str_sent = env->GetStringUTFChars( sent , 0);
  std::string sentence(str_sent);
  std::vector<std::string> words;

  int len = segmentor_segment(segmentor, sentence, words);

  for(int i = 0; i < len; i++) {
    jobject tmp = stringToJstring(env,words[i].c_str());
    env->CallBooleanMethod(array_words,list_add,tmp);
  }
  env->ReleaseStringUTFChars(sent, str_sent);
  return len;
}

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Segmentor_release
  (JNIEnv* env, jobject obj) {
  segmentor_release_segmentor(segmentor);
  segmentor = NULL;
}




================================================
FILE: src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp
================================================
#include "edu_hit_ir_ltp4j_SplitSentence.h"
#include "splitsnt/SplitSentence.h"
#include "string_to_jstring.hpp"
#include <string>
#include <iostream>
#include <vector>
#include <jni.h>

using namespace std;

JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SplitSentence_splitSentence
  (JNIEnv * env, jobject obj, jstring sent, jobject array_sents){
  const char * str = env->GetStringUTFChars(sent,0);
  string s_s(str);

  jclass array_list = env->GetObjectClass(array_sents);
  jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z");

  vector<string> sents;
  SplitSentence(s_s,sents);

  for (unsigned i = 0; i < sents.size(); i++) {
    jobject tmp = stringToJstring(env,sents[i].c_str());
    env->CallBooleanMethod(array_sents,list_add,tmp);
  }

  env->ReleaseStringUTFChars(sent,str);
}


================================================
FILE: src/main/c++/string_to_jstring.hpp
================================================
#include <cstdlib>
#include <cstring>
#include <jni.h>

inline jstring stringToJstring(JNIEnv* env, const char* pat) {
  jclass strClass = env->FindClass("Ljava/lang/String;");
  jmethodID ctorID = env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V");
  jbyteArray bytes = env->NewByteArray( (jsize)strlen(pat) );
  env->SetByteArrayRegion(bytes, 0, (jsize)strlen(pat), (jbyte*)pat);
  jstring encoding = env->NewStringUTF("utf-8");  
  return (jstring)env->NewObject(strClass, ctorID, bytes, encoding);
}


================================================
FILE: src/main/java/edu/hit/ir/ltp4j/NER.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class NER {
  static {
    NarSystem.loadLibrary();
  }

  public final native int create(String modelPath);
  public final native int recognize(List<String> words,
      List<String> postags, List<String> ners);
  public final native void release();
}



================================================
FILE: src/main/java/edu/hit/ir/ltp4j/Pair.java
================================================
package edu.hit.ir.ltp4j;
public class Pair<F, S> {
  public final F first;
  public final S second;

  /**
   * Constructor for a Pair.
   * 
   * @param first
   *            the first object in the Pair
   * @param second
   *            the second object in the pair
   */
  public Pair(F first, S second) {
    this.first = first;
    this.second = second;
  }

  /**
   * Compute a hash code using the hash codes of the underlying objects
   * 
   * @return a hashcode of the Pair
   */
  @Override
  public int hashCode() {
    return (first == null ? 0 : first.hashCode())
      ^ (second == null ? 0 : second.hashCode());
  }

  public boolean equals(Object o) {
    if (!(o instanceof Pair)) {
      return false;
    }
    Pair<?, ?> p = (Pair<?, ?>) o;
    return (p.first.equals(first)) && (p.second.equals(second));
  }

  /**
   * Convenience method for creating an appropriately typed pair.
   * 
   * @param a
   *            the first object in the Pair
   * @param b
   *            the second object in the pair
   * @return a Pair that is templatized with the types of a and b
   */
  public static <A, B> Pair<A, B> create(A a, B b) {
    return new Pair<A, B>(a, b);
  }
}


================================================
FILE: src/main/java/edu/hit/ir/ltp4j/Parser.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class Parser {
  static {
    NarSystem.loadLibrary();
  }

  public final native int create(String modelPath);
  public final native int parse(List<String> words,
      List<String> tags, List<Integer> heads,
      List<String> deprels);
  public final native void release();
}



================================================
FILE: src/main/java/edu/hit/ir/ltp4j/Postagger.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class Postagger {
  static {
    NarSystem.loadLibrary();
  }

  public final native int create(String modelPath);
  public final native int create(String modelPath, String lexiconPath);
  public final native int postag(List<String> words,
      List<String> tags);
  public final native void release();
}



================================================
FILE: src/main/java/edu/hit/ir/ltp4j/SRL.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class SRL {
  static {
    NarSystem.loadLibrary();
  }

  public final native int create(String modelPath);
  public final native int srl(
      List<String> words,
      List<String> tags,
      List<Integer> heads,
      List<String> deprels,
      List<Pair<Integer, List<Pair<String, Pair<Integer, Integer>>>>> srls);
  public final native void release();
}



================================================
FILE: src/main/java/edu/hit/ir/ltp4j/Segmentor.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class Segmentor {
  static {
    NarSystem.loadLibrary();
  }

  public final native int create(String modelPath);
  public final native int create(String modelPath, String lexiconPath);
  public final native int segment(String sent, List<String> words);
  public final native void release();
}



================================================
FILE: src/main/java/edu/hit/ir/ltp4j/SplitSentence.java
================================================
package edu.hit.ir.ltp4j;
import java.util.List;

public class SplitSentence{
  static {
    NarSystem.loadLibrary();
  }

  public final native void splitSentence(String sent,List<String> sents);
}


================================================
FILE: src/test/c++/main.cpp
================================================
#include <iostream>

int main(int argc, char* argv[]) {
  return 0;
}


================================================
FILE: src/test/java/edu/hit/ir/ltp4j/test/NERTest.java
================================================
package edu.hit.ir.ltp4j.test;

import edu.hit.ir.ltp4j.NER;
import java.util.List;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;

public class NERTest {
  @Test public final void testNERCreate() 
    throws Exception
  {
    NER app = new NER();
    Assert.assertEquals( 1, app.create("ltp_data/ner.model") );
  }
  
  @Test public final void testNERRecognize()
    throws Exception
  {
    NER app = new NER();
    app.create("ltp_data/ner.model");
    
    List<String> words = new ArrayList<String>();
    List<String> tags = new ArrayList<String>();
    
    words.add("中国");     tags.add("ns");
    words.add("进出口");   tags.add("n");
    words.add("银行");     tags.add("n");
    words.add("与");       tags.add("p");
    words.add("中国");     tags.add("ns");
    words.add("银行");     tags.add("n");
    words.add("加强");     tags.add("v");
    words.add("合作");     tags.add("v");
    
    List<String> result = new ArrayList<String>();
    app.recognize(words, tags, result);
    
    Assert.assertEquals( 8, result.size() );
    Assert.assertEquals( "B-Ni", result.get(0) );
    Assert.assertEquals( "I-Ni", result.get(1) );
    Assert.assertEquals( "E-Ni", result.get(2) );
    Assert.assertEquals( "O", result.get(3) );
    Assert.assertEquals( "B-Ni", result.get(4) );
    Assert.assertEquals( "E-Ni", result.get(5) );
    Assert.assertEquals( "O", result.get(6) );
    Assert.assertEquals( "O", result.get(7) );
  }
}


================================================
FILE: src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java
================================================
package edu.hit.ir.ltp4j.test;

import edu.hit.ir.ltp4j.Parser;
import java.util.List;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;

public class ParserTest {
  @Test public final void testParserCreate() 
    throws Exception
  {
    Parser app = new Parser();
    Assert.assertEquals( 1, app.create("ltp_data/parser.model") );
  }
  
  @Test public final void testParserParse()
    throws Exception
  {
    Parser app = new Parser();
    app.create("ltp_data/parser.model");
    
    List<String> words = new ArrayList<String>();
    List<String> tags = new ArrayList<String>();
    
    words.add("中国");     tags.add("ns");
    words.add("进出口");   tags.add("n");
    words.add("银行");     tags.add("n");
    words.add("与");       tags.add("p");
    words.add("中国");     tags.add("ns");
    words.add("银行");     tags.add("n");
    words.add("加强");     tags.add("v");
    words.add("合作");     tags.add("v");

    List<Integer> heads = new ArrayList<Integer>();
    List<String> deprels = new ArrayList<String>();
       
    app.parse(words, tags, heads, deprels);

    Assert.assertEquals( 8, heads.size() );  Assert.assertEquals( 8, deprels.size() );
    Assert.assertTrue( 3 == heads.get(0) );  Assert.assertEquals( "ATT", deprels.get(0) );
    Assert.assertTrue( 3 == heads.get(1) );  Assert.assertEquals( "ATT", deprels.get(1) );
    Assert.assertTrue( 7 == heads.get(2) );  Assert.assertEquals( "SBV", deprels.get(2) );
    Assert.assertTrue( 3 == heads.get(3) );  Assert.assertEquals( "ADV", deprels.get(3) );
    Assert.assertTrue( 6 == heads.get(4) );  Assert.assertEquals( "ATT", deprels.get(4) );
    Assert.assertTrue( 4 == heads.get(5) );  Assert.assertEquals( "POB", deprels.get(5) );
    Assert.assertTrue( 0 == heads.get(6) );  Assert.assertEquals( "HED", deprels.get(6) );
    Assert.assertTrue( 7 == heads.get(7) );  Assert.assertEquals( "VOB", deprels.get(7) );
  }
}


================================================
FILE: src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java
================================================
package edu.hit.ir.ltp4j.test;

import edu.hit.ir.ltp4j.Postagger;
import java.util.List;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;

public class PostaggerTest {
  @Test public final void testPostaggerCreate() 
    throws Exception
  {
    Postagger app = new Postagger();
    Assert.assertEquals( 1, app.create("ltp_data/pos.model") );
  }

  @Test public final void testPostaggerPostag() 
    throws Exception
  {
    Postagger app = new Postagger();
    app.create("ltp_data/pos.model");

    List<String> words = new ArrayList<String>();

    words.add("中国");
    words.add("进出口");
    words.add("银行");
    words.add("与");
    words.add("中国");
    words.add("银行");
    words.add("加强");
    words.add("合作");

    List<String> result = new ArrayList<String>();

    app.postag(words, result);
    Assert.assertEquals( 8, result.size() );
    Assert.assertEquals( "ns", result.get(0) );
    Assert.assertEquals( "n", result.get(1) );
    Assert.assertEquals( "n", result.get(2) );
    Assert.assertEquals( "p", result.get(3) );
    Assert.assertEquals( "ns", result.get(4) );
    Assert.assertEquals( "n", result.get(5) );
    Assert.assertEquals( "v", result.get(6) );
    Assert.assertEquals( "v", result.get(7) );
  }
}


================================================
FILE: src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java
================================================
package edu.hit.ir.ltp4j.test;

import edu.hit.ir.ltp4j.SRL;
import org.junit.Assert;
import org.junit.Test;

public class SRLTest {
  @Test public final void testSRLCreate() 
    throws Exception
  {
    SRL app = new SRL();
    Assert.assertEquals( 1, app.create("ltp_data/pisrl.model") );
  }
}


================================================
FILE: src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java
================================================
package edu.hit.ir.ltp4j.test;

import edu.hit.ir.ltp4j.Segmentor;
import java.util.List;
import java.util.ArrayList;
import org.junit.Assert;
import org.junit.Test;

public class SegmentorTest {
  @Test public final void testSegmentorCreate() 
    throws Exception
  {
    Segmentor app = new Segmentor();
    Assert.assertEquals( 1, app.create("ltp_data/cws.model") );
  }

  @Test public final void testSegmentorSegment()
    throws Exception
  {
    Segmentor app = new Segmentor();
    app.create("ltp_data/cws.model");

    List<String> result = new ArrayList<String>();
    app.segment("中国进出口银行与中国银行加强合作", result);

    Assert.assertEquals( 8, result.size() );
    Assert.assertEquals( "中国", result.get(0) );
    Assert.assertEquals( "进出口", result.get(1) );
    Assert.assertEquals( "银行", result.get(2) );
    Assert.assertEquals( "与", result.get(3) );
    Assert.assertEquals( "中国", result.get(4) );
    Assert.assertEquals( "银行", result.get(5) );
    Assert.assertEquals( "加强", result.get(6) );
    Assert.assertEquals( "合作", result.get(7) );
  }
}
Download .txt
gitextract_trfcfd8n/

├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── README.md
├── aol.properties
├── appveyor.yml
├── doc/
│   ├── api.rst
│   ├── background.rst
│   ├── conf.py
│   ├── index.rst
│   ├── install.rst
│   ├── ltp4j-document-1.0.md
│   ├── make.bat
│   ├── requirements.txt
│   └── run.rst
├── examples/
│   ├── Console.java
│   └── example
├── pom.xml
└── src/
    ├── main/
    │   ├── c++/
    │   │   ├── edu_hit_ir_ltp4j_NER.cpp
    │   │   ├── edu_hit_ir_ltp4j_Parser.cpp
    │   │   ├── edu_hit_ir_ltp4j_Postagger.cpp
    │   │   ├── edu_hit_ir_ltp4j_SRL.cpp
    │   │   ├── edu_hit_ir_ltp4j_Segmentor.cpp
    │   │   ├── edu_hit_ir_ltp4j_SplitSentence.cpp
    │   │   └── string_to_jstring.hpp
    │   └── java/
    │       └── edu/
    │           └── hit/
    │               └── ir/
    │                   └── ltp4j/
    │                       ├── NER.java
    │                       ├── Pair.java
    │                       ├── Parser.java
    │                       ├── Postagger.java
    │                       ├── SRL.java
    │                       ├── Segmentor.java
    │                       └── SplitSentence.java
    └── test/
        ├── c++/
        │   └── main.cpp
        └── java/
            └── edu/
                └── hit/
                    └── ir/
                        └── ltp4j/
                            └── test/
                                ├── NERTest.java
                                ├── ParserTest.java
                                ├── PostaggerTest.java
                                ├── SRLTest.java
                                └── SegmentorTest.java
Download .txt
SYMBOL INDEX (70 symbols across 21 files)

FILE: examples/Console.java
  class Console (line 12) | public class Console {
    method ParseArguments (line 26) | private boolean ParseArguments(String[] args) {
    method Usage (line 74) | public void Usage() {
    method join (line 87) | private String join(ArrayList<String> payload, String conjunction) {
    method Analyse (line 100) | public void Analyse(String sent) {
    method release (line 160) | public void release(){
    method main (line 168) | public static void main(String[] args) {

FILE: src/main/c++/edu_hit_ir_ltp4j_NER.cpp
  function JNIEXPORT (line 10) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_create
  function JNIEXPORT (line 28) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize
  function JNIEXPORT (line 73) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_NER_release

FILE: src/main/c++/edu_hit_ir_ltp4j_Parser.cpp
  function JNIEXPORT (line 11) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_create
  function JNIEXPORT (line 29) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse
  function JNIEXPORT (line 88) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Parser_release

FILE: src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp
  function JNIEXPORT (line 11) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lan...
  function JNIEXPORT (line 30) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lan...
  function JNIEXPORT (line 53) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_postag
  function JNIEXPORT (line 84) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Postagger_release

FILE: src/main/c++/edu_hit_ir_ltp4j_SRL.cpp
  function JNIEXPORT (line 9) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_create
  function JNIEXPORT (line 21) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl
  function JNIEXPORT (line 118) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SRL_release

FILE: src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp
  function JNIEXPORT (line 10) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lan...
  function JNIEXPORT (line 30) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lan...
  function JNIEXPORT (line 52) | JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment
  function JNIEXPORT (line 72) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Segmentor_release

FILE: src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp
  function JNIEXPORT (line 11) | JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SplitSentence_splitSentence

FILE: src/main/c++/string_to_jstring.hpp
  function jstring (line 5) | inline jstring stringToJstring(JNIEnv* env, const char* pat) {

FILE: src/main/java/edu/hit/ir/ltp4j/NER.java
  class NER (line 4) | public class NER {
    method create (line 9) | public final native int create(String modelPath);
    method recognize (line 10) | public final native int recognize(List<String> words,
    method release (line 12) | public final native void release();

FILE: src/main/java/edu/hit/ir/ltp4j/Pair.java
  class Pair (line 2) | public class Pair<F, S> {
    method Pair (line 14) | public Pair(F first, S second) {
    method hashCode (line 24) | @Override
    method equals (line 30) | public boolean equals(Object o) {
    method create (line 47) | public static <A, B> Pair<A, B> create(A a, B b) {

FILE: src/main/java/edu/hit/ir/ltp4j/Parser.java
  class Parser (line 4) | public class Parser {
    method create (line 9) | public final native int create(String modelPath);
    method parse (line 10) | public final native int parse(List<String> words,
    method release (line 13) | public final native void release();

FILE: src/main/java/edu/hit/ir/ltp4j/Postagger.java
  class Postagger (line 4) | public class Postagger {
    method create (line 9) | public final native int create(String modelPath);
    method create (line 10) | public final native int create(String modelPath, String lexiconPath);
    method postag (line 11) | public final native int postag(List<String> words,
    method release (line 13) | public final native void release();

FILE: src/main/java/edu/hit/ir/ltp4j/SRL.java
  class SRL (line 4) | public class SRL {
    method create (line 9) | public final native int create(String modelPath);
    method srl (line 10) | public final native int srl(
    method release (line 16) | public final native void release();

FILE: src/main/java/edu/hit/ir/ltp4j/Segmentor.java
  class Segmentor (line 4) | public class Segmentor {
    method create (line 9) | public final native int create(String modelPath);
    method create (line 10) | public final native int create(String modelPath, String lexiconPath);
    method segment (line 11) | public final native int segment(String sent, List<String> words);
    method release (line 12) | public final native void release();

FILE: src/main/java/edu/hit/ir/ltp4j/SplitSentence.java
  class SplitSentence (line 4) | public class SplitSentence{
    method splitSentence (line 9) | public final native void splitSentence(String sent,List<String> sents);

FILE: src/test/c++/main.cpp
  function main (line 3) | int main(int argc, char* argv[]) {

FILE: src/test/java/edu/hit/ir/ltp4j/test/NERTest.java
  class NERTest (line 9) | public class NERTest {
    method testNERCreate (line 10) | @Test public final void testNERCreate()
    method testNERRecognize (line 17) | @Test public final void testNERRecognize()

FILE: src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java
  class ParserTest (line 9) | public class ParserTest {
    method testParserCreate (line 10) | @Test public final void testParserCreate()
    method testParserParse (line 17) | @Test public final void testParserParse()

FILE: src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java
  class PostaggerTest (line 9) | public class PostaggerTest {
    method testPostaggerCreate (line 10) | @Test public final void testPostaggerCreate()
    method testPostaggerPostag (line 17) | @Test public final void testPostaggerPostag()

FILE: src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java
  class SRLTest (line 7) | public class SRLTest {
    method testSRLCreate (line 8) | @Test public final void testSRLCreate()

FILE: src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java
  class SegmentorTest (line 9) | public class SegmentorTest {
    method testSegmentorCreate (line 10) | @Test public final void testSegmentorCreate()
    method testSegmentorSegment (line 17) | @Test public final void testSegmentorSegment()
Condensed preview — 39 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (101K chars).
[
  {
    "path": ".gitignore",
    "chars": 200,
    "preview": "\n###########\n# data    #\n###########\nltp_data\n\n###########\n# outputs #\n###########\noutput\nCMakeFiles\ncmake_install.cmake"
  },
  {
    "path": ".gitmodules",
    "chars": 99,
    "preview": "[submodule \"src/main/c++/ltp\"]\n\tpath = src/main/c++/ltp\n\turl = https://github.com/HIT-SCIR/ltp.git\n"
  },
  {
    "path": ".travis.yml",
    "chars": 314,
    "preview": "language:\n    - cpp\n    - java\n\nos:\n    - linux\n    - osx\n\nbefore_script:\n    - if [[ \"$TRAVIS_OS_NAME\" == \"osx\" && -z \""
  },
  {
    "path": "CMakeLists.txt",
    "chars": 1469,
    "preview": "cmake_minimum_required (VERSION 2.8.0)\nproject (\"ltp4j\")\n\nfind_package(JNI)\nset (LTP_HOME \"${PROJECT_SOURCE_DIR}/ltp\" CA"
  },
  {
    "path": "README.md",
    "chars": 712,
    "preview": "ltp4j<sup>beta</sup>: Language Technology Platform For Java\n============================================\n\n[![Build Statu"
  },
  {
    "path": "aol.properties",
    "chars": 159,
    "preview": "amd64.Windows.msvc.cpp.defines=Windows WIN32 _WINDOWS NOMINMAX BOOST_ALL_NO_LIB\namd64.Windows.msvc.c.defines=Windows WIN"
  },
  {
    "path": "appveyor.yml",
    "chars": 822,
    "preview": "version: '{build}'\n\nimage:\n  - Visual Studio 2015\n  - Visual Studio 2017\n\nplatform: x64\n\ninstall:\n  - ps: |\n      Add-Ty"
  },
  {
    "path": "doc/api.rst",
    "chars": 14154,
    "preview": "编程接口\n========\n\n.. java:package:: edu.hit.ir.ltp4j\n\n分词接口\n--------\n\n.. java:type:: public class Segmentor\n\n分词主要提供三个接口:\n\n."
  },
  {
    "path": "doc/background.rst",
    "chars": 567,
    "preview": "简介与背景知识\n==============\n\nltp4j是 `语言技术平台 (Language Technology Platform, LTP) <http://ltp.readthedocs.org/zh_CN/latest/>`_ "
  },
  {
    "path": "doc/conf.py",
    "chars": 8117,
    "preview": "# -*- coding: utf-8 -*-\n#\n# LTP documentation build configuration file, created by\n# sphinx-quickstart on Mon Jan 19 17:"
  },
  {
    "path": "doc/index.rst",
    "chars": 382,
    "preview": ".. ltpdoctest documentation master file, created by\n   sphinx-quickstart on Wed Jan 14 22:35:55 2015.\n   You can adapt t"
  },
  {
    "path": "doc/install.rst",
    "chars": 1905,
    "preview": ".. _install-label:\n\n编译ltp4j\n=========\n\n如果您需要使用ltp4j,必须拥有两部分内容\n\n* ltp4j.jar与C++代理程序\n* LTP模型文件\n\n其中,LTP模型文件可以从 `百度云 <http:/"
  },
  {
    "path": "doc/ltp4j-document-1.0.md",
    "chars": 11059,
    "preview": "ltp4j 文档\n===============\n\nltp4j是[语言技术平台(Language Technology Platform, LTP)](https://github.com/HIT-SCIR/ltp)的Java封装。\n关于L"
  },
  {
    "path": "doc/make.bat",
    "chars": 6453,
    "preview": "@ECHO OFF\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-build\n)\nset BUI"
  },
  {
    "path": "doc/requirements.txt",
    "chars": 11,
    "preview": "javasphinx\n"
  },
  {
    "path": "doc/run.rst",
    "chars": 2142,
    "preview": "运行\n====\n\n以编译运行examples/Test.java为例。\n\n概念\n~~~~\n\n**version与aol**\n\nversion是当前ltp4j的版本号,aol是使用ltp4j机器的`架构-系统-链接器`\n\n在命令行条件下可以用"
  },
  {
    "path": "examples/Console.java",
    "chars": 6069,
    "preview": "import java.util.Scanner;\nimport java.util.ArrayList;\nimport java.util.List;\nimport edu.hit.ir.ltp4j.SplitSentence;\nimpo"
  },
  {
    "path": "examples/example",
    "chars": 17,
    "preview": "中国进出口银行与中国银行加强合作\n"
  },
  {
    "path": "pom.xml",
    "chars": 6893,
    "preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n  xsi:schemaL"
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_NER.cpp",
    "chars": 2194,
    "preview": "#include \"ner/ner_dll.h\"\n#include \"edu_hit_ir_ltp4j_NER.h\"\n#include \"string_to_jstring.hpp\"\n#include <string>\n#include <"
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_Parser.cpp",
    "chars": 2617,
    "preview": "#include \"edu_hit_ir_ltp4j_Parser.h\"\n#include \"parser/parser_dll.h\"\n#include \"string_to_jstring.hpp\"\n#include <vector>\n#"
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp",
    "chars": 2575,
    "preview": "#include \"postagger/postag_dll.h\"\n#include \"edu_hit_ir_ltp4j_Postagger.h\"\n#include \"string_to_jstring.hpp\"\n#include <vec"
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_SRL.cpp",
    "chars": 4235,
    "preview": "#include \"edu_hit_ir_ltp4j_SRL.h\"\n#include \"srl/SRL_DLL.h\"\n#include \"string_to_jstring.hpp\"\n#include <vector>\n#include <"
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp",
    "chars": 2271,
    "preview": "#include \"segmentor/segment_dll.h\"\r\n#include \"edu_hit_ir_ltp4j_Segmentor.h\"\r\n#include \"string_to_jstring.hpp\"\r\n#include "
  },
  {
    "path": "src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp",
    "chars": 820,
    "preview": "#include \"edu_hit_ir_ltp4j_SplitSentence.h\"\n#include \"splitsnt/SplitSentence.h\"\n#include \"string_to_jstring.hpp\"\n#includ"
  },
  {
    "path": "src/main/c++/string_to_jstring.hpp",
    "chars": 517,
    "preview": "#include <cstdlib>\n#include <cstring>\n#include <jni.h>\n\ninline jstring stringToJstring(JNIEnv* env, const char* pat) {\n "
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/NER.java",
    "chars": 311,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class NER {\n  static {\n    NarSystem.loadLibrary();\n  }\n\n  publ"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/Pair.java",
    "chars": 1196,
    "preview": "package edu.hit.ir.ltp4j;\npublic class Pair<F, S> {\n  public final F first;\n  public final S second;\n\n  /**\n   * Constru"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/Parser.java",
    "chars": 337,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class Parser {\n  static {\n    NarSystem.loadLibrary();\n  }\n\n  p"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/Postagger.java",
    "chars": 364,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class Postagger {\n  static {\n    NarSystem.loadLibrary();\n  }\n\n"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/SRL.java",
    "chars": 421,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class SRL {\n  static {\n    NarSystem.loadLibrary();\n  }\n\n  publ"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/Segmentor.java",
    "chars": 353,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class Segmentor {\n  static {\n    NarSystem.loadLibrary();\n  }\n\n"
  },
  {
    "path": "src/main/java/edu/hit/ir/ltp4j/SplitSentence.java",
    "chars": 199,
    "preview": "package edu.hit.ir.ltp4j;\nimport java.util.List;\n\npublic class SplitSentence{\n  static {\n    NarSystem.loadLibrary();\n  "
  },
  {
    "path": "src/test/c++/main.cpp",
    "chars": 70,
    "preview": "#include <iostream>\n\nint main(int argc, char* argv[]) {\n  return 0;\n}\n"
  },
  {
    "path": "src/test/java/edu/hit/ir/ltp4j/test/NERTest.java",
    "chars": 1455,
    "preview": "package edu.hit.ir.ltp4j.test;\n\nimport edu.hit.ir.ltp4j.NER;\nimport java.util.List;\nimport java.util.ArrayList;\nimport o"
  },
  {
    "path": "src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java",
    "chars": 1917,
    "preview": "package edu.hit.ir.ltp4j.test;\n\nimport edu.hit.ir.ltp4j.Parser;\nimport java.util.List;\nimport java.util.ArrayList;\nimpor"
  },
  {
    "path": "src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java",
    "chars": 1258,
    "preview": "package edu.hit.ir.ltp4j.test;\n\nimport edu.hit.ir.ltp4j.Postagger;\nimport java.util.List;\nimport java.util.ArrayList;\nim"
  },
  {
    "path": "src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java",
    "chars": 298,
    "preview": "package edu.hit.ir.ltp4j.test;\n\nimport edu.hit.ir.ltp4j.SRL;\nimport org.junit.Assert;\nimport org.junit.Test;\n\npublic cla"
  },
  {
    "path": "src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java",
    "chars": 1058,
    "preview": "package edu.hit.ir.ltp4j.test;\n\nimport edu.hit.ir.ltp4j.Segmentor;\nimport java.util.List;\nimport java.util.ArrayList;\nim"
  }
]

About this extraction

This page contains the full source code of the HIT-SCIR/ltp4j GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 39 files (84.0 KB), approximately 26.0k tokens, and a symbol index with 70 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!